@inproceedings{murata-etal-2010-construction,
title = "Construction of Chunk-Aligned Bilingual Lecture Corpus for Simultaneous Machine Translation",
author = "Murata, Masaki and
Ohno, Tomohiro and
Matsubara, Shigeki and
Inagaki, Yasuyoshi",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/581_Paper.pdf",
abstract = "With the development of speech and language processing, speech translation systems have been developed. These studies target spoken dialogues, and employ consecutive interpretation, which uses a sentence as the translation unit. On the other hand, there exist a few researches about simultaneous interpreting, and recently, the language resources for promoting simultaneous interpreting research, such as the publication of an analytical large-scale corpus, has been prepared. For the future, it is necessary to make the corpora more practical toward realization of a simultaneous interpreting system. In this paper, we describe the construction of a bilingual corpus which can be used for simultaneous lecture interpreting research. Simultaneous lecture interpreting systems are required to recognize translation units in the middle of a sentence, and generate its translation at the proper timing. We constructed the bilingual lecture corpus by the following steps. First, we segmented sentences in the lecture data into semantically meaningful units for the simultaneous interpreting. And then, we assigned the translations to these units from the viewpoint of the simultaneous interpreting. In addition, we investigated the possibility of automatically detecting the simultaneous interpreting timing from our corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="murata-etal-2010-construction">
<titleInfo>
<title>Construction of Chunk-Aligned Bilingual Lecture Corpus for Simultaneous Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Masaki</namePart>
<namePart type="family">Murata</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomohiro</namePart>
<namePart type="family">Ohno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shigeki</namePart>
<namePart type="family">Matsubara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yasuyoshi</namePart>
<namePart type="family">Inagaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the development of speech and language processing, speech translation systems have been developed. These studies target spoken dialogues, and employ consecutive interpretation, which uses a sentence as the translation unit. On the other hand, there exist a few researches about simultaneous interpreting, and recently, the language resources for promoting simultaneous interpreting research, such as the publication of an analytical large-scale corpus, has been prepared. For the future, it is necessary to make the corpora more practical toward realization of a simultaneous interpreting system. In this paper, we describe the construction of a bilingual corpus which can be used for simultaneous lecture interpreting research. Simultaneous lecture interpreting systems are required to recognize translation units in the middle of a sentence, and generate its translation at the proper timing. We constructed the bilingual lecture corpus by the following steps. First, we segmented sentences in the lecture data into semantically meaningful units for the simultaneous interpreting. And then, we assigned the translations to these units from the viewpoint of the simultaneous interpreting. In addition, we investigated the possibility of automatically detecting the simultaneous interpreting timing from our corpus.</abstract>
<identifier type="citekey">murata-etal-2010-construction</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/581_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Construction of Chunk-Aligned Bilingual Lecture Corpus for Simultaneous Machine Translation
%A Murata, Masaki
%A Ohno, Tomohiro
%A Matsubara, Shigeki
%A Inagaki, Yasuyoshi
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F murata-etal-2010-construction
%X With the development of speech and language processing, speech translation systems have been developed. These studies target spoken dialogues, and employ consecutive interpretation, which uses a sentence as the translation unit. On the other hand, there exist a few researches about simultaneous interpreting, and recently, the language resources for promoting simultaneous interpreting research, such as the publication of an analytical large-scale corpus, has been prepared. For the future, it is necessary to make the corpora more practical toward realization of a simultaneous interpreting system. In this paper, we describe the construction of a bilingual corpus which can be used for simultaneous lecture interpreting research. Simultaneous lecture interpreting systems are required to recognize translation units in the middle of a sentence, and generate its translation at the proper timing. We constructed the bilingual lecture corpus by the following steps. First, we segmented sentences in the lecture data into semantically meaningful units for the simultaneous interpreting. And then, we assigned the translations to these units from the viewpoint of the simultaneous interpreting. In addition, we investigated the possibility of automatically detecting the simultaneous interpreting timing from our corpus.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/581_Paper.pdf
Markdown (Informal)
[Construction of Chunk-Aligned Bilingual Lecture Corpus for Simultaneous Machine Translation](http://www.lrec-conf.org/proceedings/lrec2010/pdf/581_Paper.pdf) (Murata et al., LREC 2010)
ACL