@inproceedings{sapeta-2024-ai,
title = "Is {AI} the new {''}Human evaluator{''}?",
author = "Sapeta, Aneta",
editor = "Martindale, Marianna and
Campbell, Janice and
Savenkov, Konstantin and
Goel, Shivali",
booktitle = "Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations)",
month = sep,
year = "2024",
address = "Chicago, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2024.amta-presentations.4",
pages = "30--44",
abstract = "The AI tide has been present in the Localization industry for many years now, and even though there is a big hype around it, it is still trying to find its place in localization. Some are trying to use it as an NMT replacement for the current market models, and others as a helping tool in evaluating the NMT outputs by having less Human input in evaluating the MT quality. From our experience, we are still depending on Human evaluation for assessment, but how good of an evaluator can AI be? From our tests, evaluating the MT quality by the AI can be a challenging task (even though we have seen significant progress in recent years) as it requires the system to understand the meaning of the source, and the target, and then to be able to judge the quality by assessing the more or less visible errors, and to be unbiased in giving its assessment. In this presentation, we want to show our insights on the reliability of AI for MT and whether we can exclude humans from the evaluation circle.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sapeta-2024-ai">
<titleInfo>
<title>Is AI the new ”Human evaluator”?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Aneta</namePart>
<namePart type="family">Sapeta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marianna</namePart>
<namePart type="family">Martindale</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Janice</namePart>
<namePart type="family">Campbell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Konstantin</namePart>
<namePart type="family">Savenkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shivali</namePart>
<namePart type="family">Goel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">Chicago, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The AI tide has been present in the Localization industry for many years now, and even though there is a big hype around it, it is still trying to find its place in localization. Some are trying to use it as an NMT replacement for the current market models, and others as a helping tool in evaluating the NMT outputs by having less Human input in evaluating the MT quality. From our experience, we are still depending on Human evaluation for assessment, but how good of an evaluator can AI be? From our tests, evaluating the MT quality by the AI can be a challenging task (even though we have seen significant progress in recent years) as it requires the system to understand the meaning of the source, and the target, and then to be able to judge the quality by assessing the more or less visible errors, and to be unbiased in giving its assessment. In this presentation, we want to show our insights on the reliability of AI for MT and whether we can exclude humans from the evaluation circle.</abstract>
<identifier type="citekey">sapeta-2024-ai</identifier>
<location>
<url>https://aclanthology.org/2024.amta-presentations.4</url>
</location>
<part>
<date>2024-09</date>
<extent unit="page">
<start>30</start>
<end>44</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Is AI the new ”Human evaluator”?
%A Sapeta, Aneta
%Y Martindale, Marianna
%Y Campbell, Janice
%Y Savenkov, Konstantin
%Y Goel, Shivali
%S Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations)
%D 2024
%8 September
%I Association for Machine Translation in the Americas
%C Chicago, USA
%F sapeta-2024-ai
%X The AI tide has been present in the Localization industry for many years now, and even though there is a big hype around it, it is still trying to find its place in localization. Some are trying to use it as an NMT replacement for the current market models, and others as a helping tool in evaluating the NMT outputs by having less Human input in evaluating the MT quality. From our experience, we are still depending on Human evaluation for assessment, but how good of an evaluator can AI be? From our tests, evaluating the MT quality by the AI can be a challenging task (even though we have seen significant progress in recent years) as it requires the system to understand the meaning of the source, and the target, and then to be able to judge the quality by assessing the more or less visible errors, and to be unbiased in giving its assessment. In this presentation, we want to show our insights on the reliability of AI for MT and whether we can exclude humans from the evaluation circle.
%U https://aclanthology.org/2024.amta-presentations.4
%P 30-44
Markdown (Informal)
[Is AI the new ”Human evaluator”?](https://aclanthology.org/2024.amta-presentations.4) (Sapeta, AMTA 2024)
ACL
- Aneta Sapeta. 2024. Is AI the new ”Human evaluator”?. In Proceedings of the 16th Conference of the Association for Machine Translation in the Americas (Volume 2: Presentations), pages 30–44, Chicago, USA. Association for Machine Translation in the Americas.