@inproceedings{giannakopoulos-pittaras-2019-summary,
title = "The Summary Evaluation Task in the {M}ulti{L}ing - {RANLP} 2019 Workshop",
author = "Giannakopoulos, George and
Pittaras, Nikiforos",
editor = "Giannakopoulos, George",
booktitle = "Proceedings of the Workshop MultiLing 2019: Summarization Across Languages, Genres and Sources",
month = sep,
year = "2019",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/W19-8903/",
doi = "10.26615/978-954-452-058-8_003",
pages = "11--16",
abstract = "This report covers the summarization evaluation task, proposed to the summarization community via the MultiLing 2019 Workshop of the RANLP 2019 conference. The task aims to encourage the development of automatic summarization evaluation methods closely aligned with manual, human-authored summary grades and judgements. A multilingual setting is adopted, building upon a corpus of Wikinews articles across 6 languages (English, Arabic, Romanian, Greek, Spanish and Czech). The evaluation utilizes human (golden) and machine-generated (peer) summaries, which have been assigned human evaluation scores from previous MultiLing tasks. Using these resources, the original corpus is augmented with synthetic data, combining summary texts under three different strategies (reorder, merge and replace), each engineered to introduce noise in the summary in a controlled and quantifiable way. We estimate that the utilization of such data can extract and highlight useful attributes of summary quality estimation, aiding the creation of data-driven automatic methods with an increased correlation to human summary evaluations across domains and languages. This paper provides a brief description of the summary evaluation task, the data generation protocol and the resources made available by the MultiLing community, towards improving automatic summarization evaluation."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="giannakopoulos-pittaras-2019-summary">
<titleInfo>
<title>The Summary Evaluation Task in the MultiLing - RANLP 2019 Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Giannakopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikiforos</namePart>
<namePart type="family">Pittaras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop MultiLing 2019: Summarization Across Languages, Genres and Sources</title>
</titleInfo>
<name type="personal">
<namePart type="given">George</namePart>
<namePart type="family">Giannakopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This report covers the summarization evaluation task, proposed to the summarization community via the MultiLing 2019 Workshop of the RANLP 2019 conference. The task aims to encourage the development of automatic summarization evaluation methods closely aligned with manual, human-authored summary grades and judgements. A multilingual setting is adopted, building upon a corpus of Wikinews articles across 6 languages (English, Arabic, Romanian, Greek, Spanish and Czech). The evaluation utilizes human (golden) and machine-generated (peer) summaries, which have been assigned human evaluation scores from previous MultiLing tasks. Using these resources, the original corpus is augmented with synthetic data, combining summary texts under three different strategies (reorder, merge and replace), each engineered to introduce noise in the summary in a controlled and quantifiable way. We estimate that the utilization of such data can extract and highlight useful attributes of summary quality estimation, aiding the creation of data-driven automatic methods with an increased correlation to human summary evaluations across domains and languages. This paper provides a brief description of the summary evaluation task, the data generation protocol and the resources made available by the MultiLing community, towards improving automatic summarization evaluation.</abstract>
<identifier type="citekey">giannakopoulos-pittaras-2019-summary</identifier>
<identifier type="doi">10.26615/978-954-452-058-8_003</identifier>
<location>
<url>https://aclanthology.org/W19-8903/</url>
</location>
<part>
<date>2019-09</date>
<extent unit="page">
<start>11</start>
<end>16</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Summary Evaluation Task in the MultiLing - RANLP 2019 Workshop
%A Giannakopoulos, George
%A Pittaras, Nikiforos
%Y Giannakopoulos, George
%S Proceedings of the Workshop MultiLing 2019: Summarization Across Languages, Genres and Sources
%D 2019
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F giannakopoulos-pittaras-2019-summary
%X This report covers the summarization evaluation task, proposed to the summarization community via the MultiLing 2019 Workshop of the RANLP 2019 conference. The task aims to encourage the development of automatic summarization evaluation methods closely aligned with manual, human-authored summary grades and judgements. A multilingual setting is adopted, building upon a corpus of Wikinews articles across 6 languages (English, Arabic, Romanian, Greek, Spanish and Czech). The evaluation utilizes human (golden) and machine-generated (peer) summaries, which have been assigned human evaluation scores from previous MultiLing tasks. Using these resources, the original corpus is augmented with synthetic data, combining summary texts under three different strategies (reorder, merge and replace), each engineered to introduce noise in the summary in a controlled and quantifiable way. We estimate that the utilization of such data can extract and highlight useful attributes of summary quality estimation, aiding the creation of data-driven automatic methods with an increased correlation to human summary evaluations across domains and languages. This paper provides a brief description of the summary evaluation task, the data generation protocol and the resources made available by the MultiLing community, towards improving automatic summarization evaluation.
%R 10.26615/978-954-452-058-8_003
%U https://aclanthology.org/W19-8903/
%U https://doi.org/10.26615/978-954-452-058-8_003
%P 11-16
Markdown (Informal)
[The Summary Evaluation Task in the MultiLing - RANLP 2019 Workshop](https://aclanthology.org/W19-8903/) (Giannakopoulos & Pittaras, RANLP 2019)
ACL