@inproceedings{teranishi-matsumoto-2022-coordination,
title = "Coordination Generation via Synchronized Text-Infilling",
author = "Teranishi, Hiroki and
Matsumoto, Yuji",
editor = "Calzolari, Nicoletta and
Huang, Chu-Ren and
Kim, Hansaem and
Pustejovsky, James and
Wanner, Leo and
Choi, Key-Sun and
Ryu, Pum-Mo and
Chen, Hsin-Hsi and
Donatelli, Lucia and
Ji, Heng and
Kurohashi, Sadao and
Paggio, Patrizia and
Xue, Nianwen and
Kim, Seokhwan and
Hahm, Younggyun and
He, Zhong and
Lee, Tony Kyungil and
Santus, Enrico and
Bond, Francis and
Na, Seung-Hoon",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.517",
pages = "5914--5924",
abstract = "Generating synthetic data for supervised learning from large-scale pre-trained language models has enhanced performances across several NLP tasks, especially in low-resource scenarios. In particular, many studies of data augmentation employ masked language models to replace words with other words in a sentence. However, most of them are evaluated on sentence classification tasks and cannot immediately be applied to tasks related to the sentence structure. In this paper, we propose a simple yet effective approach to generating sentences with a coordinate structure in which the boundaries of its conjuncts are explicitly specified. For a given span in a sentence, our method embeds a mask with a coordinating conjunction in two ways ({''}X and [mask]{''}, {''}[mask] and X{''}) and forces masked language models to fill the two blanks with an identical text. To achieve this, we introduce decoding methods for BERT and T5 models with the constraint that predictions for different masks are synchronized. Furthermore, we develop a training framework that effectively selects synthetic examples for the supervised coordination disambiguation task. We demonstrate that our method produces promising coordination instances that provide gains for the task in low-resource settings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="teranishi-matsumoto-2022-coordination">
<titleInfo>
<title>Coordination Generation via Synchronized Text-Infilling</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hiroki</namePart>
<namePart type="family">Teranishi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuji</namePart>
<namePart type="family">Matsumoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chu-Ren</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hansaem</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">James</namePart>
<namePart type="family">Pustejovsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Leo</namePart>
<namePart type="family">Wanner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Key-Sun</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pum-Mo</namePart>
<namePart type="family">Ryu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hsin-Hsi</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Donatelli</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrizia</namePart>
<namePart type="family">Paggio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seokhwan</namePart>
<namePart type="family">Kim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Younggyun</namePart>
<namePart type="family">Hahm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhong</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tony</namePart>
<namePart type="given">Kyungil</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Enrico</namePart>
<namePart type="family">Santus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Francis</namePart>
<namePart type="family">Bond</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seung-Hoon</namePart>
<namePart type="family">Na</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Generating synthetic data for supervised learning from large-scale pre-trained language models has enhanced performances across several NLP tasks, especially in low-resource scenarios. In particular, many studies of data augmentation employ masked language models to replace words with other words in a sentence. However, most of them are evaluated on sentence classification tasks and cannot immediately be applied to tasks related to the sentence structure. In this paper, we propose a simple yet effective approach to generating sentences with a coordinate structure in which the boundaries of its conjuncts are explicitly specified. For a given span in a sentence, our method embeds a mask with a coordinating conjunction in two ways (”X and [mask]”, ”[mask] and X”) and forces masked language models to fill the two blanks with an identical text. To achieve this, we introduce decoding methods for BERT and T5 models with the constraint that predictions for different masks are synchronized. Furthermore, we develop a training framework that effectively selects synthetic examples for the supervised coordination disambiguation task. We demonstrate that our method produces promising coordination instances that provide gains for the task in low-resource settings.</abstract>
<identifier type="citekey">teranishi-matsumoto-2022-coordination</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.517</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>5914</start>
<end>5924</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Coordination Generation via Synchronized Text-Infilling
%A Teranishi, Hiroki
%A Matsumoto, Yuji
%Y Calzolari, Nicoletta
%Y Huang, Chu-Ren
%Y Kim, Hansaem
%Y Pustejovsky, James
%Y Wanner, Leo
%Y Choi, Key-Sun
%Y Ryu, Pum-Mo
%Y Chen, Hsin-Hsi
%Y Donatelli, Lucia
%Y Ji, Heng
%Y Kurohashi, Sadao
%Y Paggio, Patrizia
%Y Xue, Nianwen
%Y Kim, Seokhwan
%Y Hahm, Younggyun
%Y He, Zhong
%Y Lee, Tony Kyungil
%Y Santus, Enrico
%Y Bond, Francis
%Y Na, Seung-Hoon
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F teranishi-matsumoto-2022-coordination
%X Generating synthetic data for supervised learning from large-scale pre-trained language models has enhanced performances across several NLP tasks, especially in low-resource scenarios. In particular, many studies of data augmentation employ masked language models to replace words with other words in a sentence. However, most of them are evaluated on sentence classification tasks and cannot immediately be applied to tasks related to the sentence structure. In this paper, we propose a simple yet effective approach to generating sentences with a coordinate structure in which the boundaries of its conjuncts are explicitly specified. For a given span in a sentence, our method embeds a mask with a coordinating conjunction in two ways (”X and [mask]”, ”[mask] and X”) and forces masked language models to fill the two blanks with an identical text. To achieve this, we introduce decoding methods for BERT and T5 models with the constraint that predictions for different masks are synchronized. Furthermore, we develop a training framework that effectively selects synthetic examples for the supervised coordination disambiguation task. We demonstrate that our method produces promising coordination instances that provide gains for the task in low-resource settings.
%U https://aclanthology.org/2022.coling-1.517
%P 5914-5924
Markdown (Informal)
[Coordination Generation via Synchronized Text-Infilling](https://aclanthology.org/2022.coling-1.517) (Teranishi & Matsumoto, COLING 2022)
ACL
- Hiroki Teranishi and Yuji Matsumoto. 2022. Coordination Generation via Synchronized Text-Infilling. In Proceedings of the 29th International Conference on Computational Linguistics, pages 5914–5924, Gyeongju, Republic of Korea. International Committee on Computational Linguistics.