@inproceedings{wiseman-etal-2021-data,
title = "Data-to-text Generation by Splicing Together Nearest Neighbors",
author = "Wiseman, Sam and
Backurs, Arturs and
Stratos, Karl",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.352",
doi = "10.18653/v1/2021.emnlp-main.352",
pages = "4283--4299",
abstract = "We propose to tackle data-to-text generation tasks by directly splicing together retrieved segments of text from {``}neighbor{''} source-target pairs. Unlike recent work that conditions on retrieved neighbors but generates text token-by-token, left-to-right, we learn a policy that directly manipulates segments of neighbor text, by inserting or replacing them in partially constructed generations. Standard techniques for training such a policy require an oracle derivation for each generation, and we prove that finding the shortest such derivation can be reduced to parsing under a particular weighted context-free grammar. We find that policies learned in this way perform on par with strong baselines in terms of automatic and human evaluation, but allow for more interpretable and controllable generation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wiseman-etal-2021-data">
<titleInfo>
<title>Data-to-text Generation by Splicing Together Nearest Neighbors</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sam</namePart>
<namePart type="family">Wiseman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturs</namePart>
<namePart type="family">Backurs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karl</namePart>
<namePart type="family">Stratos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose to tackle data-to-text generation tasks by directly splicing together retrieved segments of text from “neighbor” source-target pairs. Unlike recent work that conditions on retrieved neighbors but generates text token-by-token, left-to-right, we learn a policy that directly manipulates segments of neighbor text, by inserting or replacing them in partially constructed generations. Standard techniques for training such a policy require an oracle derivation for each generation, and we prove that finding the shortest such derivation can be reduced to parsing under a particular weighted context-free grammar. We find that policies learned in this way perform on par with strong baselines in terms of automatic and human evaluation, but allow for more interpretable and controllable generation.</abstract>
<identifier type="citekey">wiseman-etal-2021-data</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-main.352</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-main.352</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>4283</start>
<end>4299</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Data-to-text Generation by Splicing Together Nearest Neighbors
%A Wiseman, Sam
%A Backurs, Arturs
%A Stratos, Karl
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F wiseman-etal-2021-data
%X We propose to tackle data-to-text generation tasks by directly splicing together retrieved segments of text from “neighbor” source-target pairs. Unlike recent work that conditions on retrieved neighbors but generates text token-by-token, left-to-right, we learn a policy that directly manipulates segments of neighbor text, by inserting or replacing them in partially constructed generations. Standard techniques for training such a policy require an oracle derivation for each generation, and we prove that finding the shortest such derivation can be reduced to parsing under a particular weighted context-free grammar. We find that policies learned in this way perform on par with strong baselines in terms of automatic and human evaluation, but allow for more interpretable and controllable generation.
%R 10.18653/v1/2021.emnlp-main.352
%U https://aclanthology.org/2021.emnlp-main.352
%U https://doi.org/10.18653/v1/2021.emnlp-main.352
%P 4283-4299
Markdown (Informal)
[Data-to-text Generation by Splicing Together Nearest Neighbors](https://aclanthology.org/2021.emnlp-main.352) (Wiseman et al., EMNLP 2021)
ACL
- Sam Wiseman, Arturs Backurs, and Karl Stratos. 2021. Data-to-text Generation by Splicing Together Nearest Neighbors. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pages 4283–4299, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics.