@inproceedings{jain-etal-2022-quality,
title = "Quality Scoring of Source Words in Neural Translation Models",
author = "Jain, Priyesh and
Sarawagi, Sunita and
Tomar, Tushar",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-main.732",
doi = "10.18653/v1/2022.emnlp-main.732",
pages = "10683--10691",
abstract = "Word-level quality scores on input source sentences can provide useful feedback to an end-user when translating into an unfamiliar target language. Recent approaches either require training special word-scoring models based on synthetic data or require repeated invocation of the translation model. We propose a simple approach based on comparing the difference of probabilities from two language models. The basic premise of our method is to reason how well each source word is explained by the target sentence as against the source language model. Our approach provides up to five points higher F1 scores and is significantly faster than the state of the art methods on three language pairs. Also, our method does not require training any new model. We release a public dataset on word omissions and mistranslations on a new language pair.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="jain-etal-2022-quality">
<titleInfo>
<title>Quality Scoring of Source Words in Neural Translation Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Priyesh</namePart>
<namePart type="family">Jain</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunita</namePart>
<namePart type="family">Sarawagi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tushar</namePart>
<namePart type="family">Tomar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Word-level quality scores on input source sentences can provide useful feedback to an end-user when translating into an unfamiliar target language. Recent approaches either require training special word-scoring models based on synthetic data or require repeated invocation of the translation model. We propose a simple approach based on comparing the difference of probabilities from two language models. The basic premise of our method is to reason how well each source word is explained by the target sentence as against the source language model. Our approach provides up to five points higher F1 scores and is significantly faster than the state of the art methods on three language pairs. Also, our method does not require training any new model. We release a public dataset on word omissions and mistranslations on a new language pair.</abstract>
<identifier type="citekey">jain-etal-2022-quality</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-main.732</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-main.732</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>10683</start>
<end>10691</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Quality Scoring of Source Words in Neural Translation Models
%A Jain, Priyesh
%A Sarawagi, Sunita
%A Tomar, Tushar
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F jain-etal-2022-quality
%X Word-level quality scores on input source sentences can provide useful feedback to an end-user when translating into an unfamiliar target language. Recent approaches either require training special word-scoring models based on synthetic data or require repeated invocation of the translation model. We propose a simple approach based on comparing the difference of probabilities from two language models. The basic premise of our method is to reason how well each source word is explained by the target sentence as against the source language model. Our approach provides up to five points higher F1 scores and is significantly faster than the state of the art methods on three language pairs. Also, our method does not require training any new model. We release a public dataset on word omissions and mistranslations on a new language pair.
%R 10.18653/v1/2022.emnlp-main.732
%U https://aclanthology.org/2022.emnlp-main.732
%U https://doi.org/10.18653/v1/2022.emnlp-main.732
%P 10683-10691
Markdown (Informal)
[Quality Scoring of Source Words in Neural Translation Models](https://aclanthology.org/2022.emnlp-main.732) (Jain et al., EMNLP 2022)
ACL
- Priyesh Jain, Sunita Sarawagi, and Tushar Tomar. 2022. Quality Scoring of Source Words in Neural Translation Models. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing, pages 10683–10691, Abu Dhabi, United Arab Emirates. Association for Computational Linguistics.