@inproceedings{wang-etal-2016-efficient,
title = "An Efficient and Effective Online Sentence Segmenter for Simultaneous Interpretation",
author = "Wang, Xiaolin and
Finch, Andrew and
Utiyama, Masao and
Sumita, Eiichiro",
editor = "Nakazawa, Toshiaki and
Mino, Hideya and
Ding, Chenchen and
Goto, Isao and
Neubig, Graham and
Kurohashi, Sadao and
Riza, Ir. Hammam and
Bhattacharyya, Pushpak",
booktitle = "Proceedings of the 3rd Workshop on {A}sian Translation ({WAT}2016)",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/W16-4613",
pages = "139--148",
abstract = "Simultaneous interpretation is a very challenging application of machine translation in which the input is a stream of words from a speech recognition engine. The key problem is how to segment the stream in an online manner into units suitable for translation. The segmentation process proceeds by calculating a confidence score for each word that indicates the soundness of placing a sentence boundary after it, and then heuristics are employed to determine the position of the boundaries. Multiple variants of the confidence scoring method and segmentation heuristics were studied. Experimental results show that the best performing strategy is not only efficient in terms of average latency per word, but also achieved end-to-end translation quality close to an offline baseline, and close to oracle segmentation.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wang-etal-2016-efficient">
<titleInfo>
<title>An Efficient and Effective Online Sentence Segmenter for Simultaneous Interpretation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Xiaolin</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Finch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masao</namePart>
<namePart type="family">Utiyama</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eiichiro</namePart>
<namePart type="family">Sumita</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 3rd Workshop on Asian Translation (WAT2016)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Toshiaki</namePart>
<namePart type="family">Nakazawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hideya</namePart>
<namePart type="family">Mino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenchen</namePart>
<namePart type="family">Ding</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isao</namePart>
<namePart type="family">Goto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graham</namePart>
<namePart type="family">Neubig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sadao</namePart>
<namePart type="family">Kurohashi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ir.</namePart>
<namePart type="given">Hammam</namePart>
<namePart type="family">Riza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pushpak</namePart>
<namePart type="family">Bhattacharyya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Simultaneous interpretation is a very challenging application of machine translation in which the input is a stream of words from a speech recognition engine. The key problem is how to segment the stream in an online manner into units suitable for translation. The segmentation process proceeds by calculating a confidence score for each word that indicates the soundness of placing a sentence boundary after it, and then heuristics are employed to determine the position of the boundaries. Multiple variants of the confidence scoring method and segmentation heuristics were studied. Experimental results show that the best performing strategy is not only efficient in terms of average latency per word, but also achieved end-to-end translation quality close to an offline baseline, and close to oracle segmentation.</abstract>
<identifier type="citekey">wang-etal-2016-efficient</identifier>
<location>
<url>https://aclanthology.org/W16-4613</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>139</start>
<end>148</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Efficient and Effective Online Sentence Segmenter for Simultaneous Interpretation
%A Wang, Xiaolin
%A Finch, Andrew
%A Utiyama, Masao
%A Sumita, Eiichiro
%Y Nakazawa, Toshiaki
%Y Mino, Hideya
%Y Ding, Chenchen
%Y Goto, Isao
%Y Neubig, Graham
%Y Kurohashi, Sadao
%Y Riza, Ir. Hammam
%Y Bhattacharyya, Pushpak
%S Proceedings of the 3rd Workshop on Asian Translation (WAT2016)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F wang-etal-2016-efficient
%X Simultaneous interpretation is a very challenging application of machine translation in which the input is a stream of words from a speech recognition engine. The key problem is how to segment the stream in an online manner into units suitable for translation. The segmentation process proceeds by calculating a confidence score for each word that indicates the soundness of placing a sentence boundary after it, and then heuristics are employed to determine the position of the boundaries. Multiple variants of the confidence scoring method and segmentation heuristics were studied. Experimental results show that the best performing strategy is not only efficient in terms of average latency per word, but also achieved end-to-end translation quality close to an offline baseline, and close to oracle segmentation.
%U https://aclanthology.org/W16-4613
%P 139-148
Markdown (Informal)
[An Efficient and Effective Online Sentence Segmenter for Simultaneous Interpretation](https://aclanthology.org/W16-4613) (Wang et al., WAT 2016)
ACL