@inproceedings{dinu-2008-classifying,
title = "On Classifying Coherent/Incoherent {R}omanian Short Texts",
author = "Dinu, Anca",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Tapias, Daniel",
booktitle = "Proceedings of the Sixth International Conference on Language Resources and Evaluation ({LREC}'08)",
month = may,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2008/pdf/732_paper.pdf",
abstract = "In this paper we present and discuss the results of a text coherence experiment performed on a small corpus of Romanian text from a number of alternative high school manuals. During the last 10 years, an abundance of alternative manuals for high school was produced and distributed in Romania. Due to the large amount of material and to the relative short time in which it was produced, the question of assessing the quality of this material emerged; this process relied mostly of subjective human personal opinion, given the lack of automatic tools for Romanian. Debates and claims of poor quality of the alternative manuals resulted in a number of examples of incomprehensible / incoherent paragraphs extracted from such manuals. Our goal was to create an automatic tool which may be used as an indication of poor quality of such texts. We created a small corpus of representative texts from Romanian alternative manuals. We manually classified the chosen paragraphs from such manuals into two categories: comprehensible/coherent text and incomprehensible/incoherent text. We then used different machine learning techniques to automatically classify them in a supervised manner. Our approach is rather simple, but the results are encouraging.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dinu-2008-classifying">
<titleInfo>
<title>On Classifying Coherent/Incoherent Romanian Short Texts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anca</namePart>
<namePart type="family">Dinu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present and discuss the results of a text coherence experiment performed on a small corpus of Romanian text from a number of alternative high school manuals. During the last 10 years, an abundance of alternative manuals for high school was produced and distributed in Romania. Due to the large amount of material and to the relative short time in which it was produced, the question of assessing the quality of this material emerged; this process relied mostly of subjective human personal opinion, given the lack of automatic tools for Romanian. Debates and claims of poor quality of the alternative manuals resulted in a number of examples of incomprehensible / incoherent paragraphs extracted from such manuals. Our goal was to create an automatic tool which may be used as an indication of poor quality of such texts. We created a small corpus of representative texts from Romanian alternative manuals. We manually classified the chosen paragraphs from such manuals into two categories: comprehensible/coherent text and incomprehensible/incoherent text. We then used different machine learning techniques to automatically classify them in a supervised manner. Our approach is rather simple, but the results are encouraging.</abstract>
<identifier type="citekey">dinu-2008-classifying</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2008/pdf/732_paper.pdf</url>
</location>
<part>
<date>2008-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On Classifying Coherent/Incoherent Romanian Short Texts
%A Dinu, Anca
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Tapias, Daniel
%S Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC’08)
%D 2008
%8 May
%I European Language Resources Association (ELRA)
%C Marrakech, Morocco
%F dinu-2008-classifying
%X In this paper we present and discuss the results of a text coherence experiment performed on a small corpus of Romanian text from a number of alternative high school manuals. During the last 10 years, an abundance of alternative manuals for high school was produced and distributed in Romania. Due to the large amount of material and to the relative short time in which it was produced, the question of assessing the quality of this material emerged; this process relied mostly of subjective human personal opinion, given the lack of automatic tools for Romanian. Debates and claims of poor quality of the alternative manuals resulted in a number of examples of incomprehensible / incoherent paragraphs extracted from such manuals. Our goal was to create an automatic tool which may be used as an indication of poor quality of such texts. We created a small corpus of representative texts from Romanian alternative manuals. We manually classified the chosen paragraphs from such manuals into two categories: comprehensible/coherent text and incomprehensible/incoherent text. We then used different machine learning techniques to automatically classify them in a supervised manner. Our approach is rather simple, but the results are encouraging.
%U http://www.lrec-conf.org/proceedings/lrec2008/pdf/732_paper.pdf
Markdown (Informal)
[On Classifying Coherent/Incoherent Romanian Short Texts](http://www.lrec-conf.org/proceedings/lrec2008/pdf/732_paper.pdf) (Dinu, LREC 2008)
ACL