@inproceedings{yanishevsky-2021-bad,
title = "Bad to the Bone: Predicting the Impact of Source on {MT}",
author = "Yanishevsky, Alex",
editor = "Campbell, Janice and
Huyck, Ben and
Larocca, Stephen and
Marciano, Jay and
Savenkov, Konstantin and
Yanishevsky, Alex",
booktitle = "Proceedings of Machine Translation Summit XVIII: Users and Providers Track",
month = aug,
year = "2021",
address = "Virtual",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2021.mtsummit-up.14",
pages = "175--199",
abstract = "It{'}s a well-known truism that poorly written source has a profound negative effect on the quality of machine translation, drastically reduces the productivity of post-editors and impacts turnaround times. But what is bad and how bad is bad? Conversely, what are the features emblematic of good content and how good is good? The impact of source on MT is crucial since a lot of content is written by non-native authors, created by technical specialists for a non-technical audience and may not adhere to brand tone and voice. AI can be employed to identify these errors and predict {`}at-risk{'} content prior to localization in a multitude of languages. The presentation will show how source files and even individual sentences within those source files can be analyzed for markers of complexity and readability and thus are more likely to cause mistranslations and omissions for machine translation and subsequent post-editing. Potential solutions will be explored such as rewriting the source to be in line with acceptable threshold criteria for each product and/or domain, re-routing to other machine translation engines better suited for the task at hand and building AI-based predictive models.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yanishevsky-2021-bad">
<titleInfo>
<title>Bad to the Bone: Predicting the Impact of Source on MT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Yanishevsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Machine Translation Summit XVIII: Users and Providers Track</title>
</titleInfo>
<name type="personal">
<namePart type="given">Janice</namePart>
<namePart type="family">Campbell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ben</namePart>
<namePart type="family">Huyck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stephen</namePart>
<namePart type="family">Larocca</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jay</namePart>
<namePart type="family">Marciano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantin</namePart>
<namePart type="family">Savenkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Yanishevsky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Virtual</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>It’s a well-known truism that poorly written source has a profound negative effect on the quality of machine translation, drastically reduces the productivity of post-editors and impacts turnaround times. But what is bad and how bad is bad? Conversely, what are the features emblematic of good content and how good is good? The impact of source on MT is crucial since a lot of content is written by non-native authors, created by technical specialists for a non-technical audience and may not adhere to brand tone and voice. AI can be employed to identify these errors and predict ‘at-risk’ content prior to localization in a multitude of languages. The presentation will show how source files and even individual sentences within those source files can be analyzed for markers of complexity and readability and thus are more likely to cause mistranslations and omissions for machine translation and subsequent post-editing. Potential solutions will be explored such as rewriting the source to be in line with acceptable threshold criteria for each product and/or domain, re-routing to other machine translation engines better suited for the task at hand and building AI-based predictive models.</abstract>
<identifier type="citekey">yanishevsky-2021-bad</identifier>
<location>
<url>https://aclanthology.org/2021.mtsummit-up.14</url>
</location>
<part>
<date>2021-08</date>
<extent unit="page">
<start>175</start>
<end>199</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bad to the Bone: Predicting the Impact of Source on MT
%A Yanishevsky, Alex
%Y Campbell, Janice
%Y Huyck, Ben
%Y Larocca, Stephen
%Y Marciano, Jay
%Y Savenkov, Konstantin
%Y Yanishevsky, Alex
%S Proceedings of Machine Translation Summit XVIII: Users and Providers Track
%D 2021
%8 August
%I Association for Machine Translation in the Americas
%C Virtual
%F yanishevsky-2021-bad
%X It’s a well-known truism that poorly written source has a profound negative effect on the quality of machine translation, drastically reduces the productivity of post-editors and impacts turnaround times. But what is bad and how bad is bad? Conversely, what are the features emblematic of good content and how good is good? The impact of source on MT is crucial since a lot of content is written by non-native authors, created by technical specialists for a non-technical audience and may not adhere to brand tone and voice. AI can be employed to identify these errors and predict ‘at-risk’ content prior to localization in a multitude of languages. The presentation will show how source files and even individual sentences within those source files can be analyzed for markers of complexity and readability and thus are more likely to cause mistranslations and omissions for machine translation and subsequent post-editing. Potential solutions will be explored such as rewriting the source to be in line with acceptable threshold criteria for each product and/or domain, re-routing to other machine translation engines better suited for the task at hand and building AI-based predictive models.
%U https://aclanthology.org/2021.mtsummit-up.14
%P 175-199
Markdown (Informal)
[Bad to the Bone: Predicting the Impact of Source on MT](https://aclanthology.org/2021.mtsummit-up.14) (Yanishevsky, MTSummit 2021)
ACL