@inproceedings{jumel-etal-2020-tesa,
title = "{TESA}: A {T}ask in {E}ntity {S}emantic {A}ggregation for Abstractive Summarization",
author = "Jumel, Cl{\'e}ment and
Louis, Annie and
Cheung, Jackie Chi Kit",
editor = "Webber, Bonnie and
Cohn, Trevor and
He, Yulan and
Liu, Yang",
booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.emnlp-main.646",
doi = "10.18653/v1/2020.emnlp-main.646",
pages = "8031--8050",
abstract = "Human-written texts contain frequent generalizations and semantic aggregation of content. In a document, they may refer to a pair of named entities such as {`}London{'} and {`}Paris{'} with different expressions: {``}the major cities{''}, {``}the capital cities{''} and {``}two European cities{''}. Yet generation, especially, abstractive summarization systems have so far focused heavily on paraphrasing and simplifying the source content, to the exclusion of such semantic abstraction capabilities. In this paper, we present a new dataset and task aimed at the semantic aggregation of entities. TESA contains a dataset of 5.3K crowd-sourced entity aggregations of Person, Organization, and Location named entities. The aggregations are document-appropriate, meaning that they are produced by annotators to match the situational context of a given news article from the New York Times. We then build baseline models for generating aggregations given a tuple of entities and document context. We finetune on TESA an encoder-decoder language model and compare it with simpler classification methods based on linguistically informed features. Our quantitative and qualitative evaluations show reasonable performance in making a choice from a given list of expressions, but free-form expressions are understandably harder to generate and evaluate.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jumel-etal-2020-tesa">
<titleInfo>
<title>TESA: A Task in Entity Semantic Aggregation for Abstractive Summarization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Clément</namePart>
<namePart type="family">Jumel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annie</namePart>
<namePart type="family">Louis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jackie</namePart>
<namePart type="given">Chi</namePart>
<namePart type="given">Kit</namePart>
<namePart type="family">Cheung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bonnie</namePart>
<namePart type="family">Webber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trevor</namePart>
<namePart type="family">Cohn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Human-written texts contain frequent generalizations and semantic aggregation of content. In a document, they may refer to a pair of named entities such as ‘London’ and ‘Paris’ with different expressions: “the major cities”, “the capital cities” and “two European cities”. Yet generation, especially, abstractive summarization systems have so far focused heavily on paraphrasing and simplifying the source content, to the exclusion of such semantic abstraction capabilities. In this paper, we present a new dataset and task aimed at the semantic aggregation of entities. TESA contains a dataset of 5.3K crowd-sourced entity aggregations of Person, Organization, and Location named entities. The aggregations are document-appropriate, meaning that they are produced by annotators to match the situational context of a given news article from the New York Times. We then build baseline models for generating aggregations given a tuple of entities and document context. We finetune on TESA an encoder-decoder language model and compare it with simpler classification methods based on linguistically informed features. Our quantitative and qualitative evaluations show reasonable performance in making a choice from a given list of expressions, but free-form expressions are understandably harder to generate and evaluate.</abstract>
<identifier type="citekey">jumel-etal-2020-tesa</identifier>
<identifier type="doi">10.18653/v1/2020.emnlp-main.646</identifier>
<location>
<url>https://aclanthology.org/2020.emnlp-main.646</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>8031</start>
<end>8050</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T TESA: A Task in Entity Semantic Aggregation for Abstractive Summarization
%A Jumel, Clément
%A Louis, Annie
%A Cheung, Jackie Chi Kit
%Y Webber, Bonnie
%Y Cohn, Trevor
%Y He, Yulan
%Y Liu, Yang
%S Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F jumel-etal-2020-tesa
%X Human-written texts contain frequent generalizations and semantic aggregation of content. In a document, they may refer to a pair of named entities such as ‘London’ and ‘Paris’ with different expressions: “the major cities”, “the capital cities” and “two European cities”. Yet generation, especially, abstractive summarization systems have so far focused heavily on paraphrasing and simplifying the source content, to the exclusion of such semantic abstraction capabilities. In this paper, we present a new dataset and task aimed at the semantic aggregation of entities. TESA contains a dataset of 5.3K crowd-sourced entity aggregations of Person, Organization, and Location named entities. The aggregations are document-appropriate, meaning that they are produced by annotators to match the situational context of a given news article from the New York Times. We then build baseline models for generating aggregations given a tuple of entities and document context. We finetune on TESA an encoder-decoder language model and compare it with simpler classification methods based on linguistically informed features. Our quantitative and qualitative evaluations show reasonable performance in making a choice from a given list of expressions, but free-form expressions are understandably harder to generate and evaluate.
%R 10.18653/v1/2020.emnlp-main.646
%U https://aclanthology.org/2020.emnlp-main.646
%U https://doi.org/10.18653/v1/2020.emnlp-main.646
%P 8031-8050
Markdown (Informal)
[TESA: A Task in Entity Semantic Aggregation for Abstractive Summarization](https://aclanthology.org/2020.emnlp-main.646) (Jumel et al., EMNLP 2020)
ACL