@inproceedings{bouma-etal-2010-towards,
title = "Towards a Large Parallel Corpus of Cleft Constructions",
author = "Bouma, Gerlof and
{\O}vrelid, Lilja and
Kuhn, Jonas",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/291_Paper.pdf",
abstract = "We present our efforts to create a large-scale, semi-automatically annotated parallel corpus of cleft constructions. The corpus is intended to reduce or make more effective the manual task of finding examples of clefts in a corpus. The corpus is being developed in the context of the Collaborative Research Centre SFB 632, which is a large, interdisciplinary research initiative to study information structure, at the University of Potsdam and the Humboldt University in Berlin. The corpus is based on the Europarl corpus (version 3). We show how state-of-the-art NLP tools, like POS taggers and statistical dependency parsers, may facilitate powerful and precise searches. We argue that identifying clefts using automatically added syntactic structure annotation is ultimately to be preferred over using lower level, though more robust, extraction methods like regular expression matching. An evaluation of the extraction method for one of the languages also offers some support for this method. We end the paper by discussing the resulting corpus itself. We present some examples of interesting clefts and translational counterparts from the corpus and suggest ways of exploiting our newly created resource in the cross-linguistic study of clefts.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bouma-etal-2010-towards">
<titleInfo>
<title>Towards a Large Parallel Corpus of Cleft Constructions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gerlof</namePart>
<namePart type="family">Bouma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lilja</namePart>
<namePart type="family">Øvrelid</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jonas</namePart>
<namePart type="family">Kuhn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present our efforts to create a large-scale, semi-automatically annotated parallel corpus of cleft constructions. The corpus is intended to reduce or make more effective the manual task of finding examples of clefts in a corpus. The corpus is being developed in the context of the Collaborative Research Centre SFB 632, which is a large, interdisciplinary research initiative to study information structure, at the University of Potsdam and the Humboldt University in Berlin. The corpus is based on the Europarl corpus (version 3). We show how state-of-the-art NLP tools, like POS taggers and statistical dependency parsers, may facilitate powerful and precise searches. We argue that identifying clefts using automatically added syntactic structure annotation is ultimately to be preferred over using lower level, though more robust, extraction methods like regular expression matching. An evaluation of the extraction method for one of the languages also offers some support for this method. We end the paper by discussing the resulting corpus itself. We present some examples of interesting clefts and translational counterparts from the corpus and suggest ways of exploiting our newly created resource in the cross-linguistic study of clefts.</abstract>
<identifier type="citekey">bouma-etal-2010-towards</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/291_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards a Large Parallel Corpus of Cleft Constructions
%A Bouma, Gerlof
%A Øvrelid, Lilja
%A Kuhn, Jonas
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F bouma-etal-2010-towards
%X We present our efforts to create a large-scale, semi-automatically annotated parallel corpus of cleft constructions. The corpus is intended to reduce or make more effective the manual task of finding examples of clefts in a corpus. The corpus is being developed in the context of the Collaborative Research Centre SFB 632, which is a large, interdisciplinary research initiative to study information structure, at the University of Potsdam and the Humboldt University in Berlin. The corpus is based on the Europarl corpus (version 3). We show how state-of-the-art NLP tools, like POS taggers and statistical dependency parsers, may facilitate powerful and precise searches. We argue that identifying clefts using automatically added syntactic structure annotation is ultimately to be preferred over using lower level, though more robust, extraction methods like regular expression matching. An evaluation of the extraction method for one of the languages also offers some support for this method. We end the paper by discussing the resulting corpus itself. We present some examples of interesting clefts and translational counterparts from the corpus and suggest ways of exploiting our newly created resource in the cross-linguistic study of clefts.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/291_Paper.pdf
Markdown (Informal)
[Towards a Large Parallel Corpus of Cleft Constructions](http://www.lrec-conf.org/proceedings/lrec2010/pdf/291_Paper.pdf) (Bouma et al., LREC 2010)
ACL
- Gerlof Bouma, Lilja Øvrelid, and Jonas Kuhn. 2010. Towards a Large Parallel Corpus of Cleft Constructions. In Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC'10), Valletta, Malta. European Language Resources Association (ELRA).