@inproceedings{apsel-etal-2025-beyond,
title = "Beyond Tokens and Into Minds: Future Directions for Human-Centered Evaluation in Machine Translation Post-Editing",
author = "Apsel, Molly and
Kothari, Sunil and
Mehta, Manish and
Sundarababu, Vasudevan",
editor = "Akter, Mousumi and
Chowdhury, Tahiya and
Eger, Steffen and
Leiter, Christoph and
Opitz, Juri and
{\c{C}}ano, Erion",
booktitle = "Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems",
month = dec,
year = "2025",
address = "Mumbai, India",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.eval4nlp-1.1/",
pages = "1--8",
ISBN = "979-8-89176-305-0",
abstract = "Machine translation post-editing (MTPE) is central to evaluating and ensuring translation quality, particularly for low-resource languages (LRLs), where systems are more error-prone than for high-resource languages. Traditional token-based models segment text according to statistical patterns of their (primarily high-resource) training data, which can distort meaning, fragment words in morphologically rich languages, and complicate MTPE and evaluation. Current evaluation metrics also tend to emphasize surface-level similarity to reference texts, overlooking how humans actually approach translation tasks and creating issues when references are unavailable or a more abstract interpretation is needed. In this position paper, we argue that emerging architectures (Large Concept Models [LCMs] and Byte Latent Transformers [BLTs]) and insights from cognitive science open new possibilities for MTPE frameworks. LCMs represent meaning at the conceptual level, enabling evaluation of different translation approaches and the robustness of such models in MT. At the same time, BLTs operate below the token level, potentially easing post-editing across diverse language scripts. Drawing on cognitive theories of bilingualism and meaning representation, we outline hypotheses and research methods for evaluating post-editing data, translation quality, and interface design toward more robust, human-centered MT evaluation."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="apsel-etal-2025-beyond">
<titleInfo>
<title>Beyond Tokens and Into Minds: Future Directions for Human-Centered Evaluation in Machine Translation Post-Editing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Molly</namePart>
<namePart type="family">Apsel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunil</namePart>
<namePart type="family">Kothari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manish</namePart>
<namePart type="family">Mehta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vasudevan</namePart>
<namePart type="family">Sundarababu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mousumi</namePart>
<namePart type="family">Akter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tahiya</namePart>
<namePart type="family">Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Steffen</namePart>
<namePart type="family">Eger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christoph</namePart>
<namePart type="family">Leiter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juri</namePart>
<namePart type="family">Opitz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Erion</namePart>
<namePart type="family">Çano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mumbai, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-305-0</identifier>
</relatedItem>
<abstract>Machine translation post-editing (MTPE) is central to evaluating and ensuring translation quality, particularly for low-resource languages (LRLs), where systems are more error-prone than for high-resource languages. Traditional token-based models segment text according to statistical patterns of their (primarily high-resource) training data, which can distort meaning, fragment words in morphologically rich languages, and complicate MTPE and evaluation. Current evaluation metrics also tend to emphasize surface-level similarity to reference texts, overlooking how humans actually approach translation tasks and creating issues when references are unavailable or a more abstract interpretation is needed. In this position paper, we argue that emerging architectures (Large Concept Models [LCMs] and Byte Latent Transformers [BLTs]) and insights from cognitive science open new possibilities for MTPE frameworks. LCMs represent meaning at the conceptual level, enabling evaluation of different translation approaches and the robustness of such models in MT. At the same time, BLTs operate below the token level, potentially easing post-editing across diverse language scripts. Drawing on cognitive theories of bilingualism and meaning representation, we outline hypotheses and research methods for evaluating post-editing data, translation quality, and interface design toward more robust, human-centered MT evaluation.</abstract>
<identifier type="citekey">apsel-etal-2025-beyond</identifier>
<location>
<url>https://aclanthology.org/2025.eval4nlp-1.1/</url>
</location>
<part>
<date>2025-12</date>
<extent unit="page">
<start>1</start>
<end>8</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Beyond Tokens and Into Minds: Future Directions for Human-Centered Evaluation in Machine Translation Post-Editing
%A Apsel, Molly
%A Kothari, Sunil
%A Mehta, Manish
%A Sundarababu, Vasudevan
%Y Akter, Mousumi
%Y Chowdhury, Tahiya
%Y Eger, Steffen
%Y Leiter, Christoph
%Y Opitz, Juri
%Y Çano, Erion
%S Proceedings of the 5th Workshop on Evaluation and Comparison of NLP Systems
%D 2025
%8 December
%I Association for Computational Linguistics
%C Mumbai, India
%@ 979-8-89176-305-0
%F apsel-etal-2025-beyond
%X Machine translation post-editing (MTPE) is central to evaluating and ensuring translation quality, particularly for low-resource languages (LRLs), where systems are more error-prone than for high-resource languages. Traditional token-based models segment text according to statistical patterns of their (primarily high-resource) training data, which can distort meaning, fragment words in morphologically rich languages, and complicate MTPE and evaluation. Current evaluation metrics also tend to emphasize surface-level similarity to reference texts, overlooking how humans actually approach translation tasks and creating issues when references are unavailable or a more abstract interpretation is needed. In this position paper, we argue that emerging architectures (Large Concept Models [LCMs] and Byte Latent Transformers [BLTs]) and insights from cognitive science open new possibilities for MTPE frameworks. LCMs represent meaning at the conceptual level, enabling evaluation of different translation approaches and the robustness of such models in MT. At the same time, BLTs operate below the token level, potentially easing post-editing across diverse language scripts. Drawing on cognitive theories of bilingualism and meaning representation, we outline hypotheses and research methods for evaluating post-editing data, translation quality, and interface design toward more robust, human-centered MT evaluation.
%U https://aclanthology.org/2025.eval4nlp-1.1/
%P 1-8
Markdown (Informal)
[Beyond Tokens and Into Minds: Future Directions for Human-Centered Evaluation in Machine Translation Post-Editing](https://aclanthology.org/2025.eval4nlp-1.1/) (Apsel et al., Eval4NLP 2025)
ACL