@inproceedings{hatagaki-etal-2022-parallel,
title = "Parallel Corpus Filtering for {J}apanese Text Simplification",
author = "Hatagaki, Koki and
Kajiwara, Tomoyuki and
Ninomiya, Takashi",
editor = "{\v{S}}tajner, Sanja and
Saggion, Horacio and
Ferr{\'e}s, Daniel and
Shardlow, Matthew and
Sheang, Kim Cheng and
North, Kai and
Zampieri, Marcos and
Xu, Wei",
booktitle = "Proceedings of the Workshop on Text Simplification, Accessibility, and Readability (TSAR-2022)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Virtual)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.tsar-1.2",
doi = "10.18653/v1/2022.tsar-1.2",
pages = "12--18",
abstract = "We propose a method of parallel corpus filtering for Japanese text simplification. The parallel corpus for this task contains some redundant wording. In this study, we first identify the type and size of noisy sentence pairs in the Japanese text simplification corpus. We then propose a method of parallel corpus filtering to remove each type of noisy sentence pair. Experimental results show that filtering the training parallel corpus with the proposed method improves simplification performance.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hatagaki-etal-2022-parallel">
<titleInfo>
<title>Parallel Corpus Filtering for Japanese Text Simplification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Koki</namePart>
<namePart type="family">Hatagaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoyuki</namePart>
<namePart type="family">Kajiwara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takashi</namePart>
<namePart type="family">Ninomiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Text Simplification, Accessibility, and Readability (TSAR-2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sanja</namePart>
<namePart type="family">Štajner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Horacio</namePart>
<namePart type="family">Saggion</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Ferrés</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Shardlow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kim</namePart>
<namePart type="given">Cheng</namePart>
<namePart type="family">Sheang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wei</namePart>
<namePart type="family">Xu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Virtual)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We propose a method of parallel corpus filtering for Japanese text simplification. The parallel corpus for this task contains some redundant wording. In this study, we first identify the type and size of noisy sentence pairs in the Japanese text simplification corpus. We then propose a method of parallel corpus filtering to remove each type of noisy sentence pair. Experimental results show that filtering the training parallel corpus with the proposed method improves simplification performance.</abstract>
<identifier type="citekey">hatagaki-etal-2022-parallel</identifier>
<identifier type="doi">10.18653/v1/2022.tsar-1.2</identifier>
<location>
<url>https://aclanthology.org/2022.tsar-1.2</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>12</start>
<end>18</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Parallel Corpus Filtering for Japanese Text Simplification
%A Hatagaki, Koki
%A Kajiwara, Tomoyuki
%A Ninomiya, Takashi
%Y Štajner, Sanja
%Y Saggion, Horacio
%Y Ferrés, Daniel
%Y Shardlow, Matthew
%Y Sheang, Kim Cheng
%Y North, Kai
%Y Zampieri, Marcos
%Y Xu, Wei
%S Proceedings of the Workshop on Text Simplification, Accessibility, and Readability (TSAR-2022)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Virtual)
%F hatagaki-etal-2022-parallel
%X We propose a method of parallel corpus filtering for Japanese text simplification. The parallel corpus for this task contains some redundant wording. In this study, we first identify the type and size of noisy sentence pairs in the Japanese text simplification corpus. We then propose a method of parallel corpus filtering to remove each type of noisy sentence pair. Experimental results show that filtering the training parallel corpus with the proposed method improves simplification performance.
%R 10.18653/v1/2022.tsar-1.2
%U https://aclanthology.org/2022.tsar-1.2
%U https://doi.org/10.18653/v1/2022.tsar-1.2
%P 12-18
Markdown (Informal)
[Parallel Corpus Filtering for Japanese Text Simplification](https://aclanthology.org/2022.tsar-1.2) (Hatagaki et al., TSAR 2022)
ACL
- Koki Hatagaki, Tomoyuki Kajiwara, and Takashi Ninomiya. 2022. Parallel Corpus Filtering for Japanese Text Simplification. In Proceedings of the Workshop on Text Simplification, Accessibility, and Readability (TSAR-2022), pages 12–18, Abu Dhabi, United Arab Emirates (Virtual). Association for Computational Linguistics.