@inproceedings{guzzi-etal-2023-annotation,
title = "Annotation of lexical bundles with discourse functions in a {S}panish academic corpus",
author = "Guzzi, Eleonora and
Alonso-Ramos, Margarita and
Garcia, Marcos and
Garc{\'\i}a Salido, Marcos",
editor = "Bhatia, Archna and
Evang, Kilian and
Garcia, Marcos and
Giouli, Voula and
Han, Lifeng and
Taslimipoor, Shiva",
booktitle = "Proceedings of the 19th Workshop on Multiword Expressions (MWE 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.mwe-1.14",
doi = "10.18653/v1/2023.mwe-1.14",
pages = "99--105",
abstract = "This paper describes the process of annotation of 996 lexical bundles (LB) assigned to 39 different discourse functions in a Spanish academic corpus. The purpose of the annotation is to obtain a new Spanish gold-standard corpus of 1,800,000 words useful for training and evaluating computational models that are capable of identifying automatically LBs for each context in new corpora, as well as for linguistic analysis about the role of LBs in academic discourse. The annotation process revealed that correspondence between LBs and discourse functions is not biunivocal and that the degree of ambiguity is high, so linguists{'} contribution has been essential for improving the automatic assignation of tags.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="guzzi-etal-2023-annotation">
<titleInfo>
<title>Annotation of lexical bundles with discourse functions in a Spanish academic corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eleonora</namePart>
<namePart type="family">Guzzi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margarita</namePart>
<namePart type="family">Alonso-Ramos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">García Salido</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Workshop on Multiword Expressions (MWE 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Archna</namePart>
<namePart type="family">Bhatia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kilian</namePart>
<namePart type="family">Evang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Garcia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Voula</namePart>
<namePart type="family">Giouli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lifeng</namePart>
<namePart type="family">Han</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shiva</namePart>
<namePart type="family">Taslimipoor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the process of annotation of 996 lexical bundles (LB) assigned to 39 different discourse functions in a Spanish academic corpus. The purpose of the annotation is to obtain a new Spanish gold-standard corpus of 1,800,000 words useful for training and evaluating computational models that are capable of identifying automatically LBs for each context in new corpora, as well as for linguistic analysis about the role of LBs in academic discourse. The annotation process revealed that correspondence between LBs and discourse functions is not biunivocal and that the degree of ambiguity is high, so linguists’ contribution has been essential for improving the automatic assignation of tags.</abstract>
<identifier type="citekey">guzzi-etal-2023-annotation</identifier>
<identifier type="doi">10.18653/v1/2023.mwe-1.14</identifier>
<location>
<url>https://aclanthology.org/2023.mwe-1.14</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>99</start>
<end>105</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Annotation of lexical bundles with discourse functions in a Spanish academic corpus
%A Guzzi, Eleonora
%A Alonso-Ramos, Margarita
%A Garcia, Marcos
%A García Salido, Marcos
%Y Bhatia, Archna
%Y Evang, Kilian
%Y Garcia, Marcos
%Y Giouli, Voula
%Y Han, Lifeng
%Y Taslimipoor, Shiva
%S Proceedings of the 19th Workshop on Multiword Expressions (MWE 2023)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F guzzi-etal-2023-annotation
%X This paper describes the process of annotation of 996 lexical bundles (LB) assigned to 39 different discourse functions in a Spanish academic corpus. The purpose of the annotation is to obtain a new Spanish gold-standard corpus of 1,800,000 words useful for training and evaluating computational models that are capable of identifying automatically LBs for each context in new corpora, as well as for linguistic analysis about the role of LBs in academic discourse. The annotation process revealed that correspondence between LBs and discourse functions is not biunivocal and that the degree of ambiguity is high, so linguists’ contribution has been essential for improving the automatic assignation of tags.
%R 10.18653/v1/2023.mwe-1.14
%U https://aclanthology.org/2023.mwe-1.14
%U https://doi.org/10.18653/v1/2023.mwe-1.14
%P 99-105
Markdown (Informal)
[Annotation of lexical bundles with discourse functions in a Spanish academic corpus](https://aclanthology.org/2023.mwe-1.14) (Guzzi et al., MWE 2023)
ACL