@inproceedings{de-loupy-etal-2010-french,
title = "A {F}rench Human Reference Corpus for Multi-Document Summarization and Sentence Compression",
author = "de Loupy, Claude and
Gu{\'e}gan, Marie and
Ayache, Christelle and
Seng, Somara and
Moreno, Juan-Manuel Torres",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/919_Paper.pdf",
abstract = "This paper presents two corpora produced within the RPM2 project: a multi-document summarization corpus and a sentence compression corpus. Both corpora are in French. The first one is the only one we know in this language. It contains 20 topics with 20 documents each. A first set of 10 documents per topic is summarized and then the second set is used to produce an update summarization (new information). 4 annotators were involved and produced a total of 160 abstracts. The second corpus contains all the sentences of the first one. 4 annotators were asked to compress the 8432 sentences. This is the biggest corpus of compressed sentences we know, whatever the language. The paper provides some figures in order to compare the different annotators: compression rates, number of tokens per sentence, percentage of tokens kept according to their POS, position of dropped tokens in the sentence compression phase, etc. These figures show important differences from an annotator to the other. Another point is the different strategies of compression used according to the length of the sentence.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="de-loupy-etal-2010-french">
<titleInfo>
<title>A French Human Reference Corpus for Multi-Document Summarization and Sentence Compression</title>
</titleInfo>
<name type="personal">
<namePart type="given">Claude</namePart>
<namePart type="family">de Loupy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie</namePart>
<namePart type="family">Guégan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christelle</namePart>
<namePart type="family">Ayache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Somara</namePart>
<namePart type="family">Seng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan-Manuel</namePart>
<namePart type="given">Torres</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents two corpora produced within the RPM2 project: a multi-document summarization corpus and a sentence compression corpus. Both corpora are in French. The first one is the only one we know in this language. It contains 20 topics with 20 documents each. A first set of 10 documents per topic is summarized and then the second set is used to produce an update summarization (new information). 4 annotators were involved and produced a total of 160 abstracts. The second corpus contains all the sentences of the first one. 4 annotators were asked to compress the 8432 sentences. This is the biggest corpus of compressed sentences we know, whatever the language. The paper provides some figures in order to compare the different annotators: compression rates, number of tokens per sentence, percentage of tokens kept according to their POS, position of dropped tokens in the sentence compression phase, etc. These figures show important differences from an annotator to the other. Another point is the different strategies of compression used according to the length of the sentence.</abstract>
<identifier type="citekey">de-loupy-etal-2010-french</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/919_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A French Human Reference Corpus for Multi-Document Summarization and Sentence Compression
%A de Loupy, Claude
%A Guégan, Marie
%A Ayache, Christelle
%A Seng, Somara
%A Moreno, Juan-Manuel Torres
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F de-loupy-etal-2010-french
%X This paper presents two corpora produced within the RPM2 project: a multi-document summarization corpus and a sentence compression corpus. Both corpora are in French. The first one is the only one we know in this language. It contains 20 topics with 20 documents each. A first set of 10 documents per topic is summarized and then the second set is used to produce an update summarization (new information). 4 annotators were involved and produced a total of 160 abstracts. The second corpus contains all the sentences of the first one. 4 annotators were asked to compress the 8432 sentences. This is the biggest corpus of compressed sentences we know, whatever the language. The paper provides some figures in order to compare the different annotators: compression rates, number of tokens per sentence, percentage of tokens kept according to their POS, position of dropped tokens in the sentence compression phase, etc. These figures show important differences from an annotator to the other. Another point is the different strategies of compression used according to the length of the sentence.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/919_Paper.pdf
Markdown (Informal)
[A French Human Reference Corpus for Multi-Document Summarization and Sentence Compression](http://www.lrec-conf.org/proceedings/lrec2010/pdf/919_Paper.pdf) (de Loupy et al., LREC 2010)
ACL