@inproceedings{yung-etal-2024-discogem,
title = "{D}isco{G}e{M} 2.0: A Parallel Corpus of {E}nglish, {G}erman, {F}rench and {C}zech Implicit Discourse Relations",
author = "Yung, Frances and
Scholman, Merel and
Zikanova, Sarka and
Demberg, Vera",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.443",
pages = "4940--4956",
abstract = "We present DiscoGeM 2.0, a crowdsourced, parallel corpus of 12,834 implicit discourse relations, with English, German, French and Czech data. We propose and validate a new single-step crowdsourcing annotation method and apply it to collect new annotations in German, French and Czech. The corpus was constructed by having crowdsourced annotators choose a suitable discourse connective for each relation from a set of unambiguous candidates. Every instance was annotated by 10 workers. Our corpus hence represents the first multi-lingual resource that contains distributions of discourse interpretations for implicit relations. The results show that the connective insertion method of discourse annotation can be reliably extended to other languages. The resulting multi-lingual annotations also reveal that implicit relations inferred in one language may differ from those inferred in the translation, meaning the annotations are not always directly transferable. DiscoGem 2.0 promotes the investigation of cross-linguistic differences in discourse marking and could improve automatic discourse parsing applications. It is openly downloadable here: https://github.com/merelscholman/DiscoGeM.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yung-etal-2024-discogem">
<titleInfo>
<title>DiscoGeM 2.0: A Parallel Corpus of English, German, French and Czech Implicit Discourse Relations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Frances</namePart>
<namePart type="family">Yung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Merel</namePart>
<namePart type="family">Scholman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarka</namePart>
<namePart type="family">Zikanova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vera</namePart>
<namePart type="family">Demberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present DiscoGeM 2.0, a crowdsourced, parallel corpus of 12,834 implicit discourse relations, with English, German, French and Czech data. We propose and validate a new single-step crowdsourcing annotation method and apply it to collect new annotations in German, French and Czech. The corpus was constructed by having crowdsourced annotators choose a suitable discourse connective for each relation from a set of unambiguous candidates. Every instance was annotated by 10 workers. Our corpus hence represents the first multi-lingual resource that contains distributions of discourse interpretations for implicit relations. The results show that the connective insertion method of discourse annotation can be reliably extended to other languages. The resulting multi-lingual annotations also reveal that implicit relations inferred in one language may differ from those inferred in the translation, meaning the annotations are not always directly transferable. DiscoGem 2.0 promotes the investigation of cross-linguistic differences in discourse marking and could improve automatic discourse parsing applications. It is openly downloadable here: https://github.com/merelscholman/DiscoGeM.</abstract>
<identifier type="citekey">yung-etal-2024-discogem</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.443</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>4940</start>
<end>4956</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DiscoGeM 2.0: A Parallel Corpus of English, German, French and Czech Implicit Discourse Relations
%A Yung, Frances
%A Scholman, Merel
%A Zikanova, Sarka
%A Demberg, Vera
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F yung-etal-2024-discogem
%X We present DiscoGeM 2.0, a crowdsourced, parallel corpus of 12,834 implicit discourse relations, with English, German, French and Czech data. We propose and validate a new single-step crowdsourcing annotation method and apply it to collect new annotations in German, French and Czech. The corpus was constructed by having crowdsourced annotators choose a suitable discourse connective for each relation from a set of unambiguous candidates. Every instance was annotated by 10 workers. Our corpus hence represents the first multi-lingual resource that contains distributions of discourse interpretations for implicit relations. The results show that the connective insertion method of discourse annotation can be reliably extended to other languages. The resulting multi-lingual annotations also reveal that implicit relations inferred in one language may differ from those inferred in the translation, meaning the annotations are not always directly transferable. DiscoGem 2.0 promotes the investigation of cross-linguistic differences in discourse marking and could improve automatic discourse parsing applications. It is openly downloadable here: https://github.com/merelscholman/DiscoGeM.
%U https://aclanthology.org/2024.lrec-main.443
%P 4940-4956
Markdown (Informal)
[DiscoGeM 2.0: A Parallel Corpus of English, German, French and Czech Implicit Discourse Relations](https://aclanthology.org/2024.lrec-main.443) (Yung et al., LREC-COLING 2024)
ACL