@inproceedings{wein-etal-2024-barriers,
title = "Barriers to Effective Evaluation of Simultaneous Interpretation",
author = "Wein, Shira and
I, Te and
Cherry, Colin and
Juraska, Juraj and
Padfield, Dirk and
Macherey, Wolfgang",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2024",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-eacl.15",
pages = "209--219",
abstract = "Simultaneous interpretation is an especially challenging form of translation because it requires converting speech from one language to another in real-time. Though prior work has relied on out-of-the-box machine translation metrics to evaluate interpretation data, we hypothesize that strategies common in high-quality human interpretations, such as summarization, may not be handled well by standard machine translation metrics. In this work, we examine both qualitatively and quantitatively four potential barriers to evaluation of interpretation: disfluency, summarization, paraphrasing, and segmentation. Our experiments reveal that, while some machine translation metrics correlate fairly well with human judgments of interpretation quality, much work is still needed to account for strategies of interpretation during evaluation. As a first step to address this, we develop a fine-tuned model for interpretation evaluation, and achieve better correlation with human judgments than the state-of-the-art machine translation metrics.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wein-etal-2024-barriers">
<titleInfo>
<title>Barriers to Effective Evaluation of Simultaneous Interpretation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shira</namePart>
<namePart type="family">Wein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Te</namePart>
<namePart type="family">I</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Colin</namePart>
<namePart type="family">Cherry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juraj</namePart>
<namePart type="family">Juraska</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Padfield</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wolfgang</namePart>
<namePart type="family">Macherey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EACL 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yvette</namePart>
<namePart type="family">Graham</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Purver</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">St. Julian’s, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Simultaneous interpretation is an especially challenging form of translation because it requires converting speech from one language to another in real-time. Though prior work has relied on out-of-the-box machine translation metrics to evaluate interpretation data, we hypothesize that strategies common in high-quality human interpretations, such as summarization, may not be handled well by standard machine translation metrics. In this work, we examine both qualitatively and quantitatively four potential barriers to evaluation of interpretation: disfluency, summarization, paraphrasing, and segmentation. Our experiments reveal that, while some machine translation metrics correlate fairly well with human judgments of interpretation quality, much work is still needed to account for strategies of interpretation during evaluation. As a first step to address this, we develop a fine-tuned model for interpretation evaluation, and achieve better correlation with human judgments than the state-of-the-art machine translation metrics.</abstract>
<identifier type="citekey">wein-etal-2024-barriers</identifier>
<location>
<url>https://aclanthology.org/2024.findings-eacl.15</url>
</location>
<part>
<date>2024-03</date>
<extent unit="page">
<start>209</start>
<end>219</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Barriers to Effective Evaluation of Simultaneous Interpretation
%A Wein, Shira
%A I, Te
%A Cherry, Colin
%A Juraska, Juraj
%A Padfield, Dirk
%A Macherey, Wolfgang
%Y Graham, Yvette
%Y Purver, Matthew
%S Findings of the Association for Computational Linguistics: EACL 2024
%D 2024
%8 March
%I Association for Computational Linguistics
%C St. Julian’s, Malta
%F wein-etal-2024-barriers
%X Simultaneous interpretation is an especially challenging form of translation because it requires converting speech from one language to another in real-time. Though prior work has relied on out-of-the-box machine translation metrics to evaluate interpretation data, we hypothesize that strategies common in high-quality human interpretations, such as summarization, may not be handled well by standard machine translation metrics. In this work, we examine both qualitatively and quantitatively four potential barriers to evaluation of interpretation: disfluency, summarization, paraphrasing, and segmentation. Our experiments reveal that, while some machine translation metrics correlate fairly well with human judgments of interpretation quality, much work is still needed to account for strategies of interpretation during evaluation. As a first step to address this, we develop a fine-tuned model for interpretation evaluation, and achieve better correlation with human judgments than the state-of-the-art machine translation metrics.
%U https://aclanthology.org/2024.findings-eacl.15
%P 209-219
Markdown (Informal)
[Barriers to Effective Evaluation of Simultaneous Interpretation](https://aclanthology.org/2024.findings-eacl.15) (Wein et al., Findings 2024)
ACL