@inproceedings{aluisio-etal-2004-lacio,
title = "The L{\'a}cio-Web: Corpora and Tools to Advance {B}razilian {P}ortuguese Language Investigations and Computational Linguistic Tools",
author = "Aluisio, Sandra and
Pinheiro, Gisele Montilha and
Manfrin, Aline M. P. and
de Oliveira, Leandro H. M. and
Genoves, Jr., Luiz C. and
Tagnin, Stella E. O.",
editor = "Lino, Maria Teresa and
Xavier, Maria Francisca and
Ferreira, F{\'a}tima and
Costa, Rute and
Silva, Raquel",
booktitle = "Proceedings of the Fourth International Conference on Language Resources and Evaluation ({LREC}{'}04)",
month = may,
year = "2004",
address = "Lisbon, Portugal",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2004/pdf/410.pdf",
abstract = "In this paper we discuss the five requirements for building large publicly available corpora which geared the construction of the L{\'a}cio-Web corpora and their environments: 1) a comprehensive text typology; 2) text copyright clearance, compilation and annotation scheme; 3) a friendly and didactic interface; 4) the need to serve as support for several types of research; 5) the need to offer an array of associated tools. Also, we present the features that make L{\'a}cio-Web corpora interesting and novel as well as the limitations of this project, such as corpora size and balance, and the non-inclusion of spoken texts in the project{'}s reference corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="aluisio-etal-2004-lacio">
<titleInfo>
<title>The Lácio-Web: Corpora and Tools to Advance Brazilian Portuguese Language Investigations and Computational Linguistic Tools</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="family">Aluisio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gisele</namePart>
<namePart type="given">Montilha</namePart>
<namePart type="family">Pinheiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aline</namePart>
<namePart type="given">M</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Manfrin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leandro</namePart>
<namePart type="given">H</namePart>
<namePart type="given">M</namePart>
<namePart type="family">de Oliveira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luiz</namePart>
<namePart type="given">C</namePart>
<namePart type="family">Genoves</namePart>
<namePart type="suffix">Jr.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stella</namePart>
<namePart type="given">E</namePart>
<namePart type="given">O</namePart>
<namePart type="family">Tagnin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2004-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Teresa</namePart>
<namePart type="family">Lino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Francisca</namePart>
<namePart type="family">Xavier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fátima</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rute</namePart>
<namePart type="family">Costa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Lisbon, Portugal</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we discuss the five requirements for building large publicly available corpora which geared the construction of the Lácio-Web corpora and their environments: 1) a comprehensive text typology; 2) text copyright clearance, compilation and annotation scheme; 3) a friendly and didactic interface; 4) the need to serve as support for several types of research; 5) the need to offer an array of associated tools. Also, we present the features that make Lácio-Web corpora interesting and novel as well as the limitations of this project, such as corpora size and balance, and the non-inclusion of spoken texts in the project’s reference corpus.</abstract>
<identifier type="citekey">aluisio-etal-2004-lacio</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/410.pdf</url>
</location>
<part>
<date>2004-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Lácio-Web: Corpora and Tools to Advance Brazilian Portuguese Language Investigations and Computational Linguistic Tools
%A Aluisio, Sandra
%A Pinheiro, Gisele Montilha
%A Manfrin, Aline M. P.
%A de Oliveira, Leandro H. M.
%A Genoves Jr., Luiz C.
%A Tagnin, Stella E. O.
%Y Lino, Maria Teresa
%Y Xavier, Maria Francisca
%Y Ferreira, Fátima
%Y Costa, Rute
%Y Silva, Raquel
%S Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04)
%D 2004
%8 May
%I European Language Resources Association (ELRA)
%C Lisbon, Portugal
%F aluisio-etal-2004-lacio
%X In this paper we discuss the five requirements for building large publicly available corpora which geared the construction of the Lácio-Web corpora and their environments: 1) a comprehensive text typology; 2) text copyright clearance, compilation and annotation scheme; 3) a friendly and didactic interface; 4) the need to serve as support for several types of research; 5) the need to offer an array of associated tools. Also, we present the features that make Lácio-Web corpora interesting and novel as well as the limitations of this project, such as corpora size and balance, and the non-inclusion of spoken texts in the project’s reference corpus.
%U http://www.lrec-conf.org/proceedings/lrec2004/pdf/410.pdf
Markdown (Informal)
[The Lácio-Web: Corpora and Tools to Advance Brazilian Portuguese Language Investigations and Computational Linguistic Tools](http://www.lrec-conf.org/proceedings/lrec2004/pdf/410.pdf) (Aluisio et al., LREC 2004)
ACL