@inproceedings{mukherjee-etal-2024-cost,
title = "{C}o{ST} of breaking the {LLM}s",
author = "Mukherjee, Ananya and
Yadav, Saumitra and
Shrivastava, Manish",
editor = "Haddow, Barry and
Kocmi, Tom and
Koehn, Philipp and
Monz, Christof",
booktitle = "Proceedings of the Ninth Conference on Machine Translation",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.wmt-1.24",
pages = "299--306",
abstract = "This paper presents an evaluation of 16 machine translation systems submitted to the Shared Task of the 9th Conference of Machine Translation (WMT24) for the English-Hindi (en-hi) language pair using our Complex Structures Test (CoST) suite. Aligning with this year{'}s test suite sub-task theme, {``}Help us break LLMs{''}, we curated a comprehensive test suite encompassing diverse datasets across various categories, including autobiography, poetry, legal, conversation, play, narration, technical, and mixed genres. Our evaluation reveals that all the systems struggle significantly with the archaic style of text like legal and technical writings or text with creative twist like conversation and poetry datasets, highlighting their weaknesses in handling complex linguistic structures and stylistic nuances inherent in these text types. Our evaluation identifies the strengths and limitations of the submitted models, pointing to specific areas where further research and development are needed to enhance their performance. Our test suite is available at \url{https://github.com/AnanyaCoder/CoST-WMT-24-Test-Suite-Task}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mukherjee-etal-2024-cost">
<titleInfo>
<title>CoST of breaking the LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ananya</namePart>
<namePart type="family">Mukherjee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Saumitra</namePart>
<namePart type="family">Yadav</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manish</namePart>
<namePart type="family">Shrivastava</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth Conference on Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Barry</namePart>
<namePart type="family">Haddow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Kocmi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philipp</namePart>
<namePart type="family">Koehn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christof</namePart>
<namePart type="family">Monz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents an evaluation of 16 machine translation systems submitted to the Shared Task of the 9th Conference of Machine Translation (WMT24) for the English-Hindi (en-hi) language pair using our Complex Structures Test (CoST) suite. Aligning with this year’s test suite sub-task theme, “Help us break LLMs”, we curated a comprehensive test suite encompassing diverse datasets across various categories, including autobiography, poetry, legal, conversation, play, narration, technical, and mixed genres. Our evaluation reveals that all the systems struggle significantly with the archaic style of text like legal and technical writings or text with creative twist like conversation and poetry datasets, highlighting their weaknesses in handling complex linguistic structures and stylistic nuances inherent in these text types. Our evaluation identifies the strengths and limitations of the submitted models, pointing to specific areas where further research and development are needed to enhance their performance. Our test suite is available at https://github.com/AnanyaCoder/CoST-WMT-24-Test-Suite-Task.</abstract>
<identifier type="citekey">mukherjee-etal-2024-cost</identifier>
<location>
<url>https://aclanthology.org/2024.wmt-1.24</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>299</start>
<end>306</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CoST of breaking the LLMs
%A Mukherjee, Ananya
%A Yadav, Saumitra
%A Shrivastava, Manish
%Y Haddow, Barry
%Y Kocmi, Tom
%Y Koehn, Philipp
%Y Monz, Christof
%S Proceedings of the Ninth Conference on Machine Translation
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F mukherjee-etal-2024-cost
%X This paper presents an evaluation of 16 machine translation systems submitted to the Shared Task of the 9th Conference of Machine Translation (WMT24) for the English-Hindi (en-hi) language pair using our Complex Structures Test (CoST) suite. Aligning with this year’s test suite sub-task theme, “Help us break LLMs”, we curated a comprehensive test suite encompassing diverse datasets across various categories, including autobiography, poetry, legal, conversation, play, narration, technical, and mixed genres. Our evaluation reveals that all the systems struggle significantly with the archaic style of text like legal and technical writings or text with creative twist like conversation and poetry datasets, highlighting their weaknesses in handling complex linguistic structures and stylistic nuances inherent in these text types. Our evaluation identifies the strengths and limitations of the submitted models, pointing to specific areas where further research and development are needed to enhance their performance. Our test suite is available at https://github.com/AnanyaCoder/CoST-WMT-24-Test-Suite-Task.
%U https://aclanthology.org/2024.wmt-1.24
%P 299-306
Markdown (Informal)
[CoST of breaking the LLMs](https://aclanthology.org/2024.wmt-1.24) (Mukherjee et al., WMT 2024)
ACL
- Ananya Mukherjee, Saumitra Yadav, and Manish Shrivastava. 2024. CoST of breaking the LLMs. In Proceedings of the Ninth Conference on Machine Translation, pages 299–306, Miami, Florida, USA. Association for Computational Linguistics.