@inproceedings{stosic-etal-2024-parcolab-parallel,
title = "The {P}ar{C}o{L}ab Parallel Corpus and Its Extension to Four Regional Languages of {F}rance",
author = "Stosic, Dejan and
Marjanovi{\'c}, Sa{\v{s}}a and
Bernhard, Delphine and
Bach, Xavier and
Bras, Myriam and
Kevers, Laurent and
Retali-Medori, Stella and
Vergez-Couret, Marianne and
Werner, Carole",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.1392",
pages = "16014--16023",
abstract = "Parallel corpora are still scarce for most of the world{'}s language pairs. The situation is by no means different for regional languages of France. In addition, adequate web interfaces facilitate and encourage the use of parallel corpora by target users, such as language learners and teachers, as well as linguists. In this paper, we describe ParCoLab, a parallel corpus and a web platform for querying the corpus. From its onset, ParCoLab has been geared towards lower-resource languages, with an initial corpus in Serbian, along with French and English (later Spanish). We focus here on the extension of ParCoLab with a parallel corpus for four regional languages of France: Alsatian, Corsican, Occitan and Poitevin-Saintongeais. In particular, we detail criteria for choosing texts and issues related to their collection. The new parallel corpus contains more than 20k tokens per regional language.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stosic-etal-2024-parcolab-parallel">
<titleInfo>
<title>The ParCoLab Parallel Corpus and Its Extension to Four Regional Languages of France</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dejan</namePart>
<namePart type="family">Stosic</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saša</namePart>
<namePart type="family">Marjanović</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Delphine</namePart>
<namePart type="family">Bernhard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xavier</namePart>
<namePart type="family">Bach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Myriam</namePart>
<namePart type="family">Bras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laurent</namePart>
<namePart type="family">Kevers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stella</namePart>
<namePart type="family">Retali-Medori</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marianne</namePart>
<namePart type="family">Vergez-Couret</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carole</namePart>
<namePart type="family">Werner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Parallel corpora are still scarce for most of the world’s language pairs. The situation is by no means different for regional languages of France. In addition, adequate web interfaces facilitate and encourage the use of parallel corpora by target users, such as language learners and teachers, as well as linguists. In this paper, we describe ParCoLab, a parallel corpus and a web platform for querying the corpus. From its onset, ParCoLab has been geared towards lower-resource languages, with an initial corpus in Serbian, along with French and English (later Spanish). We focus here on the extension of ParCoLab with a parallel corpus for four regional languages of France: Alsatian, Corsican, Occitan and Poitevin-Saintongeais. In particular, we detail criteria for choosing texts and issues related to their collection. The new parallel corpus contains more than 20k tokens per regional language.</abstract>
<identifier type="citekey">stosic-etal-2024-parcolab-parallel</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.1392</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>16014</start>
<end>16023</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The ParCoLab Parallel Corpus and Its Extension to Four Regional Languages of France
%A Stosic, Dejan
%A Marjanović, Saša
%A Bernhard, Delphine
%A Bach, Xavier
%A Bras, Myriam
%A Kevers, Laurent
%A Retali-Medori, Stella
%A Vergez-Couret, Marianne
%A Werner, Carole
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F stosic-etal-2024-parcolab-parallel
%X Parallel corpora are still scarce for most of the world’s language pairs. The situation is by no means different for regional languages of France. In addition, adequate web interfaces facilitate and encourage the use of parallel corpora by target users, such as language learners and teachers, as well as linguists. In this paper, we describe ParCoLab, a parallel corpus and a web platform for querying the corpus. From its onset, ParCoLab has been geared towards lower-resource languages, with an initial corpus in Serbian, along with French and English (later Spanish). We focus here on the extension of ParCoLab with a parallel corpus for four regional languages of France: Alsatian, Corsican, Occitan and Poitevin-Saintongeais. In particular, we detail criteria for choosing texts and issues related to their collection. The new parallel corpus contains more than 20k tokens per regional language.
%U https://aclanthology.org/2024.lrec-main.1392
%P 16014-16023
Markdown (Informal)
[The ParCoLab Parallel Corpus and Its Extension to Four Regional Languages of France](https://aclanthology.org/2024.lrec-main.1392) (Stosic et al., LREC-COLING 2024)
ACL
- Dejan Stosic, Saša Marjanović, Delphine Bernhard, Xavier Bach, Myriam Bras, Laurent Kevers, Stella Retali-Medori, Marianne Vergez-Couret, and Carole Werner. 2024. The ParCoLab Parallel Corpus and Its Extension to Four Regional Languages of France. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 16014–16023, Torino, Italia. ELRA and ICCL.