@inproceedings{marchal-etal-2021-semi,
title = "Semi-automatic discourse annotation in a low-resource language: Developing a connective lexicon for {N}igerian {P}idgin",
author = "Marchal, Marian and
Scholman, Merel and
Demberg, Vera",
editor = "Braud, Chlo{\'e} and
Hardmeier, Christian and
Li, Junyi Jessy and
Louis, Annie and
Strube, Michael and
Zeldes, Amir",
booktitle = "Proceedings of the 2nd Workshop on Computational Approaches to Discourse",
month = nov,
year = "2021",
address = "Punta Cana, Dominican Republic and Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.codi-main.8",
doi = "10.18653/v1/2021.codi-main.8",
pages = "84--94",
abstract = "Cross-linguistic research on discourse structure and coherence marking requires discourse-annotated corpora and connective lexicons in a large number of languages. However, the availability of such resources is limited, especially for languages for which linguistic resources are scarce in general, such as Nigerian Pidgin. In this study, we demonstrate how a semi-automatic approach can be used to source connectives and their relation senses and develop a discourse-annotated corpus in a low-resource language. Connectives and their relation senses were extracted from a parallel corpus combining automatic (PDTB end-to-end parser) and manual annotations. This resulted in Naija-Lex, a lexicon of discourse connectives in Nigerian Pidgin with English translations. The lexicon shows that the majority of Nigerian Pidgin connectives are borrowed from its English lexifier, but that there are also some connectives that are unique to Nigerian Pidgin.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marchal-etal-2021-semi">
<titleInfo>
<title>Semi-automatic discourse annotation in a low-resource language: Developing a connective lexicon for Nigerian Pidgin</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marian</namePart>
<namePart type="family">Marchal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Merel</namePart>
<namePart type="family">Scholman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Workshop on Computational Approaches to Discourse</title>
</titleInfo>
<name type="personal">
<namePart type="given">Chloé</namePart>
<namePart type="family">Braud</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christian</namePart>
<namePart type="family">Hardmeier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junyi</namePart>
<namePart type="given">Jessy</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annie</namePart>
<namePart type="family">Louis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Strube</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amir</namePart>
<namePart type="family">Zeldes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Punta Cana, Dominican Republic and Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Cross-linguistic research on discourse structure and coherence marking requires discourse-annotated corpora and connective lexicons in a large number of languages. However, the availability of such resources is limited, especially for languages for which linguistic resources are scarce in general, such as Nigerian Pidgin. In this study, we demonstrate how a semi-automatic approach can be used to source connectives and their relation senses and develop a discourse-annotated corpus in a low-resource language. Connectives and their relation senses were extracted from a parallel corpus combining automatic (PDTB end-to-end parser) and manual annotations. This resulted in Naija-Lex, a lexicon of discourse connectives in Nigerian Pidgin with English translations. The lexicon shows that the majority of Nigerian Pidgin connectives are borrowed from its English lexifier, but that there are also some connectives that are unique to Nigerian Pidgin.</abstract>
<identifier type="citekey">marchal-etal-2021-semi</identifier>
<identifier type="doi">10.18653/v1/2021.codi-main.8</identifier>
<location>
<url>https://aclanthology.org/2021.codi-main.8</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>84</start>
<end>94</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Semi-automatic discourse annotation in a low-resource language: Developing a connective lexicon for Nigerian Pidgin
%A Marchal, Marian
%A Scholman, Merel
%A Demberg, Vera
%Y Braud, Chloé
%Y Hardmeier, Christian
%Y Li, Junyi Jessy
%Y Louis, Annie
%Y Strube, Michael
%Y Zeldes, Amir
%S Proceedings of the 2nd Workshop on Computational Approaches to Discourse
%D 2021
%8 November
%I Association for Computational Linguistics
%C Punta Cana, Dominican Republic and Online
%F marchal-etal-2021-semi
%X Cross-linguistic research on discourse structure and coherence marking requires discourse-annotated corpora and connective lexicons in a large number of languages. However, the availability of such resources is limited, especially for languages for which linguistic resources are scarce in general, such as Nigerian Pidgin. In this study, we demonstrate how a semi-automatic approach can be used to source connectives and their relation senses and develop a discourse-annotated corpus in a low-resource language. Connectives and their relation senses were extracted from a parallel corpus combining automatic (PDTB end-to-end parser) and manual annotations. This resulted in Naija-Lex, a lexicon of discourse connectives in Nigerian Pidgin with English translations. The lexicon shows that the majority of Nigerian Pidgin connectives are borrowed from its English lexifier, but that there are also some connectives that are unique to Nigerian Pidgin.
%R 10.18653/v1/2021.codi-main.8
%U https://aclanthology.org/2021.codi-main.8
%U https://doi.org/10.18653/v1/2021.codi-main.8
%P 84-94
Markdown (Informal)
[Semi-automatic discourse annotation in a low-resource language: Developing a connective lexicon for Nigerian Pidgin](https://aclanthology.org/2021.codi-main.8) (Marchal et al., CODI 2021)
ACL