@inproceedings{maclaughlin-smith-2021-content,
title = "Content-based Models of Quotation",
author = "MacLaughlin, Ansel and
Smith, David",
editor = "Merlo, Paola and
Tiedemann, Jorg and
Tsarfaty, Reut",
booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume",
month = apr,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.eacl-main.195",
doi = "10.18653/v1/2021.eacl-main.195",
pages = "2296--2314",
abstract = "We explore the task of quotability identification, in which, given a document, we aim to identify which of its passages are the most quotable, i.e. the most likely to be directly quoted by later derived documents. We approach quotability identification as a passage ranking problem and evaluate how well both feature-based and BERT-based (Devlin et al., 2019) models rank the passages in a given document by their predicted quotability. We explore this problem through evaluations on five datasets that span multiple languages (English, Latin) and genres of literature (e.g. poetry, plays, novels) and whose corresponding derived documents are of multiple types (news, journal articles). Our experiments confirm the relatively strong performance of BERT-based models on this task, with the best model, a RoBERTA sequential sentence tagger, achieving an average rho of 0.35 and NDCG@1, 5, 50 of 0.26, 0.31 and 0.40, respectively, across all five datasets.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="maclaughlin-smith-2021-content">
<titleInfo>
<title>Content-based Models of Quotation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ansel</namePart>
<namePart type="family">MacLaughlin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paola</namePart>
<namePart type="family">Merlo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jorg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Reut</namePart>
<namePart type="family">Tsarfaty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We explore the task of quotability identification, in which, given a document, we aim to identify which of its passages are the most quotable, i.e. the most likely to be directly quoted by later derived documents. We approach quotability identification as a passage ranking problem and evaluate how well both feature-based and BERT-based (Devlin et al., 2019) models rank the passages in a given document by their predicted quotability. We explore this problem through evaluations on five datasets that span multiple languages (English, Latin) and genres of literature (e.g. poetry, plays, novels) and whose corresponding derived documents are of multiple types (news, journal articles). Our experiments confirm the relatively strong performance of BERT-based models on this task, with the best model, a RoBERTA sequential sentence tagger, achieving an average rho of 0.35 and NDCG@1, 5, 50 of 0.26, 0.31 and 0.40, respectively, across all five datasets.</abstract>
<identifier type="citekey">maclaughlin-smith-2021-content</identifier>
<identifier type="doi">10.18653/v1/2021.eacl-main.195</identifier>
<location>
<url>https://aclanthology.org/2021.eacl-main.195</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>2296</start>
<end>2314</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Content-based Models of Quotation
%A MacLaughlin, Ansel
%A Smith, David
%Y Merlo, Paola
%Y Tiedemann, Jorg
%Y Tsarfaty, Reut
%S Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume
%D 2021
%8 April
%I Association for Computational Linguistics
%C Online
%F maclaughlin-smith-2021-content
%X We explore the task of quotability identification, in which, given a document, we aim to identify which of its passages are the most quotable, i.e. the most likely to be directly quoted by later derived documents. We approach quotability identification as a passage ranking problem and evaluate how well both feature-based and BERT-based (Devlin et al., 2019) models rank the passages in a given document by their predicted quotability. We explore this problem through evaluations on five datasets that span multiple languages (English, Latin) and genres of literature (e.g. poetry, plays, novels) and whose corresponding derived documents are of multiple types (news, journal articles). Our experiments confirm the relatively strong performance of BERT-based models on this task, with the best model, a RoBERTA sequential sentence tagger, achieving an average rho of 0.35 and NDCG@1, 5, 50 of 0.26, 0.31 and 0.40, respectively, across all five datasets.
%R 10.18653/v1/2021.eacl-main.195
%U https://aclanthology.org/2021.eacl-main.195
%U https://doi.org/10.18653/v1/2021.eacl-main.195
%P 2296-2314
Markdown (Informal)
[Content-based Models of Quotation](https://aclanthology.org/2021.eacl-main.195) (MacLaughlin & Smith, EACL 2021)
ACL
- Ansel MacLaughlin and David Smith. 2021. Content-based Models of Quotation. In Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, pages 2296–2314, Online. Association for Computational Linguistics.