@inproceedings{cho-etal-2017-domain,
title = "Domain-independent Punctuation and Segmentation Insertion",
author = "Cho, Eunah and
Niehues, Jan and
Waibel, Alex",
editor = "Sakti, Sakriani and
Utiyama, Masao",
booktitle = "Proceedings of the 14th International Conference on Spoken Language Translation",
month = dec # " 14-15",
year = "2017",
address = "Tokyo, Japan",
publisher = "International Workshop on Spoken Language Translation",
url = "https://aclanthology.org/2017.iwslt-1.11",
pages = "74--81",
abstract = "Punctuation and segmentation is crucial in spoken language translation, as it has a strong impact to translation performance. However, the impact of rare or unknown words in the performance of punctuation and segmentation insertion has not been thoroughly studied. In this work, we simulate various degrees of domain-match in testing scenario and investigate their impact to the punctuation insertion task. We explore three rare word generalizing schemes using part-of-speech (POS) tokens. Experiments show that generalizing rare and unknown words greatly improves the punctuation insertion performance, reaching up to 8.8 points of improvement in F-score when applied to the out-of-domain test scenario. We show that this improvement in punctuation quality has a positive impact on a following machine translation (MT) performance, improving it by 2 BLEU points.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cho-etal-2017-domain">
<titleInfo>
<title>Domain-independent Punctuation and Segmentation Insertion</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eunah</namePart>
<namePart type="family">Cho</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Niehues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Waibel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-dec 14-15</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 14th International Conference on Spoken Language Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masao</namePart>
<namePart type="family">Utiyama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Workshop on Spoken Language Translation</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Punctuation and segmentation is crucial in spoken language translation, as it has a strong impact to translation performance. However, the impact of rare or unknown words in the performance of punctuation and segmentation insertion has not been thoroughly studied. In this work, we simulate various degrees of domain-match in testing scenario and investigate their impact to the punctuation insertion task. We explore three rare word generalizing schemes using part-of-speech (POS) tokens. Experiments show that generalizing rare and unknown words greatly improves the punctuation insertion performance, reaching up to 8.8 points of improvement in F-score when applied to the out-of-domain test scenario. We show that this improvement in punctuation quality has a positive impact on a following machine translation (MT) performance, improving it by 2 BLEU points.</abstract>
<identifier type="citekey">cho-etal-2017-domain</identifier>
<location>
<url>https://aclanthology.org/2017.iwslt-1.11</url>
</location>
<part>
<date>2017-dec 14-15</date>
<extent unit="page">
<start>74</start>
<end>81</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Domain-independent Punctuation and Segmentation Insertion
%A Cho, Eunah
%A Niehues, Jan
%A Waibel, Alex
%Y Sakti, Sakriani
%Y Utiyama, Masao
%S Proceedings of the 14th International Conference on Spoken Language Translation
%D 2017
%8 dec 14 15
%I International Workshop on Spoken Language Translation
%C Tokyo, Japan
%F cho-etal-2017-domain
%X Punctuation and segmentation is crucial in spoken language translation, as it has a strong impact to translation performance. However, the impact of rare or unknown words in the performance of punctuation and segmentation insertion has not been thoroughly studied. In this work, we simulate various degrees of domain-match in testing scenario and investigate their impact to the punctuation insertion task. We explore three rare word generalizing schemes using part-of-speech (POS) tokens. Experiments show that generalizing rare and unknown words greatly improves the punctuation insertion performance, reaching up to 8.8 points of improvement in F-score when applied to the out-of-domain test scenario. We show that this improvement in punctuation quality has a positive impact on a following machine translation (MT) performance, improving it by 2 BLEU points.
%U https://aclanthology.org/2017.iwslt-1.11
%P 74-81
Markdown (Informal)
[Domain-independent Punctuation and Segmentation Insertion](https://aclanthology.org/2017.iwslt-1.11) (Cho et al., IWSLT 2017)
ACL