@inproceedings{gongora-etal-2021-experiments,
title = "Experiments on a {G}uarani Corpus of News and Social Media",
author = "G{\'o}ngora, Santiago and
Giossa, Nicol{\'a}s and
Chiruzzo, Luis",
editor = "Mager, Manuel and
Oncevay, Arturo and
Rios, Annette and
Ruiz, Ivan Vladimir Meza and
Palmer, Alexis and
Neubig, Graham and
Kann, Katharina",
booktitle = "Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.americasnlp-1.16",
doi = "10.18653/v1/2021.americasnlp-1.16",
pages = "153--158",
abstract = "While Guarani is widely spoken in South America, obtaining a large amount of Guarani text from the web is hard. We present the building process of a Guarani corpus composed of a parallel Guarani-Spanish set of news articles, and a monolingual set of tweets. We perform some word embeddings experiments aiming at evaluating the quality of the Guarani split of the corpus, finding encouraging results but noticing that more diversity in text domains might be needed for further improvements.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="gongora-etal-2021-experiments">
<titleInfo>
<title>Experiments on a Guarani Corpus of News and Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Santiago</namePart>
<namePart type="family">Góngora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nicolás</namePart>
<namePart type="family">Giossa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="family">Chiruzzo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas</title>
</titleInfo>
<name type="personal">
<namePart type="given">Manuel</namePart>
<namePart type="family">Mager</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Oncevay</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annette</namePart>
<namePart type="family">Rios</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivan</namePart>
<namePart type="given">Vladimir</namePart>
<namePart type="given">Meza</namePart>
<namePart type="family">Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexis</namePart>
<namePart type="family">Palmer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katharina</namePart>
<namePart type="family">Kann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>While Guarani is widely spoken in South America, obtaining a large amount of Guarani text from the web is hard. We present the building process of a Guarani corpus composed of a parallel Guarani-Spanish set of news articles, and a monolingual set of tweets. We perform some word embeddings experiments aiming at evaluating the quality of the Guarani split of the corpus, finding encouraging results but noticing that more diversity in text domains might be needed for further improvements.</abstract>
<identifier type="citekey">gongora-etal-2021-experiments</identifier>
<identifier type="doi">10.18653/v1/2021.americasnlp-1.16</identifier>
<location>
<url>https://aclanthology.org/2021.americasnlp-1.16</url>
</location>
<part>
<date>2021-06</date>
<extent unit="page">
<start>153</start>
<end>158</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Experiments on a Guarani Corpus of News and Social Media
%A Góngora, Santiago
%A Giossa, Nicolás
%A Chiruzzo, Luis
%Y Mager, Manuel
%Y Oncevay, Arturo
%Y Rios, Annette
%Y Ruiz, Ivan Vladimir Meza
%Y Palmer, Alexis
%Y Neubig, Graham
%Y Kann, Katharina
%S Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas
%D 2021
%8 June
%I Association for Computational Linguistics
%C Online
%F gongora-etal-2021-experiments
%X While Guarani is widely spoken in South America, obtaining a large amount of Guarani text from the web is hard. We present the building process of a Guarani corpus composed of a parallel Guarani-Spanish set of news articles, and a monolingual set of tweets. We perform some word embeddings experiments aiming at evaluating the quality of the Guarani split of the corpus, finding encouraging results but noticing that more diversity in text domains might be needed for further improvements.
%R 10.18653/v1/2021.americasnlp-1.16
%U https://aclanthology.org/2021.americasnlp-1.16
%U https://doi.org/10.18653/v1/2021.americasnlp-1.16
%P 153-158
Markdown (Informal)
[Experiments on a Guarani Corpus of News and Social Media](https://aclanthology.org/2021.americasnlp-1.16) (Góngora et al., AmericasNLP 2021)
ACL
- Santiago Góngora, Nicolás Giossa, and Luis Chiruzzo. 2021. Experiments on a Guarani Corpus of News and Social Media. In Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas, pages 153–158, Online. Association for Computational Linguistics.