@inproceedings{prys-etal-2016-cysill,
title = "Cysill Ar-lein: A Corpus of Written Contemporary {W}elsh Compiled from an On-line Spelling and Grammar Checker",
author = "Prys, Delyth and
Prys, Gruffudd and
Jones, Dewi Bryn",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1519",
pages = "3261--3264",
abstract = "This paper describes the use of a free, on-line language spelling and grammar checking aid as a vehicle for the collection of a significant (31 million words and rising) corpus of text for academic research in the context of less resourced languages where such data in sufficient quantities are often unavailable. It describes two versions of the corpus: the texts as submitted, prior to the correction process, and the texts following the user{'}s incorporation of any suggested changes. An overview of the corpus{'} contents is given and an analysis of use including usage statistics is also provided. Issues surrounding privacy and the anonymization of data are explored as is the data{'}s potential use for linguistic analysis, lexical research and language modelling. The method used for gathering this corpus is believed to be unique, and is a valuable addition to corpus studies in a minority language.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="prys-etal-2016-cysill">
<titleInfo>
<title>Cysill Ar-lein: A Corpus of Written Contemporary Welsh Compiled from an On-line Spelling and Grammar Checker</title>
</titleInfo>
<name type="personal">
<namePart type="given">Delyth</namePart>
<namePart type="family">Prys</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gruffudd</namePart>
<namePart type="family">Prys</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dewi</namePart>
<namePart type="given">Bryn</namePart>
<namePart type="family">Jones</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper describes the use of a free, on-line language spelling and grammar checking aid as a vehicle for the collection of a significant (31 million words and rising) corpus of text for academic research in the context of less resourced languages where such data in sufficient quantities are often unavailable. It describes two versions of the corpus: the texts as submitted, prior to the correction process, and the texts following the user’s incorporation of any suggested changes. An overview of the corpus’ contents is given and an analysis of use including usage statistics is also provided. Issues surrounding privacy and the anonymization of data are explored as is the data’s potential use for linguistic analysis, lexical research and language modelling. The method used for gathering this corpus is believed to be unique, and is a valuable addition to corpus studies in a minority language.</abstract>
<identifier type="citekey">prys-etal-2016-cysill</identifier>
<location>
<url>https://aclanthology.org/L16-1519</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>3261</start>
<end>3264</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Cysill Ar-lein: A Corpus of Written Contemporary Welsh Compiled from an On-line Spelling and Grammar Checker
%A Prys, Delyth
%A Prys, Gruffudd
%A Jones, Dewi Bryn
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F prys-etal-2016-cysill
%X This paper describes the use of a free, on-line language spelling and grammar checking aid as a vehicle for the collection of a significant (31 million words and rising) corpus of text for academic research in the context of less resourced languages where such data in sufficient quantities are often unavailable. It describes two versions of the corpus: the texts as submitted, prior to the correction process, and the texts following the user’s incorporation of any suggested changes. An overview of the corpus’ contents is given and an analysis of use including usage statistics is also provided. Issues surrounding privacy and the anonymization of data are explored as is the data’s potential use for linguistic analysis, lexical research and language modelling. The method used for gathering this corpus is believed to be unique, and is a valuable addition to corpus studies in a minority language.
%U https://aclanthology.org/L16-1519
%P 3261-3264
Markdown (Informal)
[Cysill Ar-lein: A Corpus of Written Contemporary Welsh Compiled from an On-line Spelling and Grammar Checker](https://aclanthology.org/L16-1519) (Prys et al., LREC 2016)
ACL