@inproceedings{nikolova-stoupak-etal-2024-llm,
title = "{LLM}-Generated Contexts to Practice Specialised Vocabulary: Corpus Presentation and Comparison",
author = "Nikolova-Stoupak, Iglika and
Bibauw, Serge and
Dumont, Amandine and
Stas, Fran{\c{c}}oise and
Watrin, Patrick and
Fran{\c{c}}ois, Thomas",
editor = "Balaguer, Mathieu and
Bendahman, Nihed and
Ho-dac, Lydia-Mai and
Mauclair, Julie and
G Moreno, Jose and
Pinquier, Julien",
booktitle = "Actes de la 31{\`e}me Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles, volume 1 : articles longs et prises de position",
month = "7",
year = "2024",
address = "Toulouse, France",
publisher = "ATALA and AFPC",
url = "https://aclanthology.org/2024.jeptalnrecital-taln.33",
pages = "472--498",
abstract = "This project evaluates the potential of LLM and dynamic corpora to generate contexts ai- med at the practice and acquisition of specialised English vocabulary. We compared reference contexts{---}handpicked by expert teachers{---}for a specialised vocabulary list to contexts generated by three recent large language models (LLM) of different sizes (Mistral-7B-Instruct, Vicuna-13B, and Gemini 1.0 Pro) and to contexts extracted from articles web-crawled from specialised websites. The comparison uses a representative set of length-based, morphosyntactic, semantic, and discourse- related textual characteristics. We conclude that the LLM-based corpora can be combined effectively with a web-crawled one to form an academic corpus characterised by appropriate complexity and textual variety.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nikolova-stoupak-etal-2024-llm">
<titleInfo>
<title>LLM-Generated Contexts to Practice Specialised Vocabulary: Corpus Presentation and Comparison</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iglika</namePart>
<namePart type="family">Nikolova-Stoupak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Bibauw</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amandine</namePart>
<namePart type="family">Dumont</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Françoise</namePart>
<namePart type="family">Stas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Watrin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thomas</namePart>
<namePart type="family">François</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Actes de la 31ème Conférence sur le Traitement Automatique des Langues Naturelles, volume 1 : articles longs et prises de position</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mathieu</namePart>
<namePart type="family">Balaguer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nihed</namePart>
<namePart type="family">Bendahman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lydia-Mai</namePart>
<namePart type="family">Ho-dac</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julie</namePart>
<namePart type="family">Mauclair</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">G Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julien</namePart>
<namePart type="family">Pinquier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ATALA and AFPC</publisher>
<place>
<placeTerm type="text">Toulouse, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This project evaluates the potential of LLM and dynamic corpora to generate contexts ai- med at the practice and acquisition of specialised English vocabulary. We compared reference contexts—handpicked by expert teachers—for a specialised vocabulary list to contexts generated by three recent large language models (LLM) of different sizes (Mistral-7B-Instruct, Vicuna-13B, and Gemini 1.0 Pro) and to contexts extracted from articles web-crawled from specialised websites. The comparison uses a representative set of length-based, morphosyntactic, semantic, and discourse- related textual characteristics. We conclude that the LLM-based corpora can be combined effectively with a web-crawled one to form an academic corpus characterised by appropriate complexity and textual variety.</abstract>
<identifier type="citekey">nikolova-stoupak-etal-2024-llm</identifier>
<location>
<url>https://aclanthology.org/2024.jeptalnrecital-taln.33</url>
</location>
<part>
<date>2024-7</date>
<extent unit="page">
<start>472</start>
<end>498</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LLM-Generated Contexts to Practice Specialised Vocabulary: Corpus Presentation and Comparison
%A Nikolova-Stoupak, Iglika
%A Bibauw, Serge
%A Dumont, Amandine
%A Stas, Françoise
%A Watrin, Patrick
%A François, Thomas
%Y Balaguer, Mathieu
%Y Bendahman, Nihed
%Y Ho-dac, Lydia-Mai
%Y Mauclair, Julie
%Y G Moreno, Jose
%Y Pinquier, Julien
%S Actes de la 31ème Conférence sur le Traitement Automatique des Langues Naturelles, volume 1 : articles longs et prises de position
%D 2024
%8 July
%I ATALA and AFPC
%C Toulouse, France
%F nikolova-stoupak-etal-2024-llm
%X This project evaluates the potential of LLM and dynamic corpora to generate contexts ai- med at the practice and acquisition of specialised English vocabulary. We compared reference contexts—handpicked by expert teachers—for a specialised vocabulary list to contexts generated by three recent large language models (LLM) of different sizes (Mistral-7B-Instruct, Vicuna-13B, and Gemini 1.0 Pro) and to contexts extracted from articles web-crawled from specialised websites. The comparison uses a representative set of length-based, morphosyntactic, semantic, and discourse- related textual characteristics. We conclude that the LLM-based corpora can be combined effectively with a web-crawled one to form an academic corpus characterised by appropriate complexity and textual variety.
%U https://aclanthology.org/2024.jeptalnrecital-taln.33
%P 472-498
Markdown (Informal)
[LLM-Generated Contexts to Practice Specialised Vocabulary: Corpus Presentation and Comparison](https://aclanthology.org/2024.jeptalnrecital-taln.33) (Nikolova-Stoupak et al., JEP/TALN/RECITAL 2024)
ACL