@inproceedings{novais-etal-2012-portuguese,
title = "{P}ortuguese Text Generation from Large Corpora",
author = "Novais, Eder and
Paraboni, Ivandr{\'e} and
Silva, Douglas",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Do{\u{g}}an, Mehmet U{\u{g}}ur and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Eighth International Conference on Language Resources and Evaluation ({LREC}'12)",
month = may,
year = "2012",
address = "Istanbul, Turkey",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2012/pdf/153_Paper.pdf",
pages = "4010--4014",
abstract = "In the implementation of a surface realisation engine, many of the computational techniques seen in other AI fields have been widely applied. Among these, the use of statistical methods has been particularly successful, as in the so-called 'generate-and-select', or 2-stages architectures. Systems of this kind produce output strings from possibly underspecified input data by over-generating a large number of alternative realisations (often including ungrammatical candidate sentences.) These are subsequently ranked with the aid of a statistical language model, and the most likely candidate is selected as the output string. Statistical approaches may however face a number of difficulties. Among these, there is the issue of data sparseness, a problem that is particularly evident in cases such as our target language - Brazilian Portuguese - which is not only morphologically-rich, but relatively poor in NLP resources such as large, publicly available corpora. In this work we describe a first implementation of a shallow surface realisation system for this language that deals with the issue of data sparseness by making use of factored language models built from a (relatively) large corpus of Brazilian newspapers articles.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="novais-etal-2012-portuguese">
<titleInfo>
<title>Portuguese Text Generation from Large Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eder</namePart>
<namePart type="family">Novais</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivandré</namePart>
<namePart type="family">Paraboni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Douglas</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mehmet</namePart>
<namePart type="given">Uğur</namePart>
<namePart type="family">Doğan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Istanbul, Turkey</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the implementation of a surface realisation engine, many of the computational techniques seen in other AI fields have been widely applied. Among these, the use of statistical methods has been particularly successful, as in the so-called ’generate-and-select’, or 2-stages architectures. Systems of this kind produce output strings from possibly underspecified input data by over-generating a large number of alternative realisations (often including ungrammatical candidate sentences.) These are subsequently ranked with the aid of a statistical language model, and the most likely candidate is selected as the output string. Statistical approaches may however face a number of difficulties. Among these, there is the issue of data sparseness, a problem that is particularly evident in cases such as our target language - Brazilian Portuguese - which is not only morphologically-rich, but relatively poor in NLP resources such as large, publicly available corpora. In this work we describe a first implementation of a shallow surface realisation system for this language that deals with the issue of data sparseness by making use of factored language models built from a (relatively) large corpus of Brazilian newspapers articles.</abstract>
<identifier type="citekey">novais-etal-2012-portuguese</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2012/pdf/153_Paper.pdf</url>
</location>
<part>
<date>2012-05</date>
<extent unit="page">
<start>4010</start>
<end>4014</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Portuguese Text Generation from Large Corpora
%A Novais, Eder
%A Paraboni, Ivandré
%A Silva, Douglas
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Doğan, Mehmet Uğur
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC’12)
%D 2012
%8 May
%I European Language Resources Association (ELRA)
%C Istanbul, Turkey
%F novais-etal-2012-portuguese
%X In the implementation of a surface realisation engine, many of the computational techniques seen in other AI fields have been widely applied. Among these, the use of statistical methods has been particularly successful, as in the so-called ’generate-and-select’, or 2-stages architectures. Systems of this kind produce output strings from possibly underspecified input data by over-generating a large number of alternative realisations (often including ungrammatical candidate sentences.) These are subsequently ranked with the aid of a statistical language model, and the most likely candidate is selected as the output string. Statistical approaches may however face a number of difficulties. Among these, there is the issue of data sparseness, a problem that is particularly evident in cases such as our target language - Brazilian Portuguese - which is not only morphologically-rich, but relatively poor in NLP resources such as large, publicly available corpora. In this work we describe a first implementation of a shallow surface realisation system for this language that deals with the issue of data sparseness by making use of factored language models built from a (relatively) large corpus of Brazilian newspapers articles.
%U http://www.lrec-conf.org/proceedings/lrec2012/pdf/153_Paper.pdf
%P 4010-4014
Markdown (Informal)
[Portuguese Text Generation from Large Corpora](http://www.lrec-conf.org/proceedings/lrec2012/pdf/153_Paper.pdf) (Novais et al., LREC 2012)
ACL
- Eder Novais, Ivandré Paraboni, and Douglas Silva. 2012. Portuguese Text Generation from Large Corpora. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12), pages 4010–4014, Istanbul, Turkey. European Language Resources Association (ELRA).