@inproceedings{dang-etal-2025-tokenization,
title = "Tokenization and Morphology in Multilingual Language Models: A Comparative Analysis of m{T}5 and {B}y{T}5",
author = "Dang, Thao Anh and
Raviv, Limor and
Galke, Lukas",
editor = "Abbas, Mourad and
Yousef, Tariq and
Galke, Lukas",
booktitle = "Proceedings of the 8th International Conference on Natural Language and Speech Processing (ICNLSP-2025)",
month = aug,
year = "2025",
address = "Southern Denmark University, Odense, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.icnlsp-1.24/",
pages = "242--257"
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dang-etal-2025-tokenization">
<titleInfo>
<title>Tokenization and Morphology in Multilingual Language Models: A Comparative Analysis of mT5 and ByT5</title>
</titleInfo>
<name type="personal">
<namePart type="given">Thao</namePart>
<namePart type="given">Anh</namePart>
<namePart type="family">Dang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Limor</namePart>
<namePart type="family">Raviv</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lukas</namePart>
<namePart type="family">Galke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 8th International Conference on Natural Language and Speech Processing (ICNLSP-2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mourad</namePart>
<namePart type="family">Abbas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tariq</namePart>
<namePart type="family">Yousef</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lukas</namePart>
<namePart type="family">Galke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Southern Denmark University, Odense, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<identifier type="citekey">dang-etal-2025-tokenization</identifier>
<location>
<url>https://aclanthology.org/2025.icnlsp-1.24/</url>
</location>
<part>
<date>2025-08</date>
<extent unit="page">
<start>242</start>
<end>257</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tokenization and Morphology in Multilingual Language Models: A Comparative Analysis of mT5 and ByT5
%A Dang, Thao Anh
%A Raviv, Limor
%A Galke, Lukas
%Y Abbas, Mourad
%Y Yousef, Tariq
%Y Galke, Lukas
%S Proceedings of the 8th International Conference on Natural Language and Speech Processing (ICNLSP-2025)
%D 2025
%8 August
%I Association for Computational Linguistics
%C Southern Denmark University, Odense, Denmark
%F dang-etal-2025-tokenization
%U https://aclanthology.org/2025.icnlsp-1.24/
%P 242-257
Markdown (Informal)
[Tokenization and Morphology in Multilingual Language Models: A Comparative Analysis of mT5 and ByT5](https://aclanthology.org/2025.icnlsp-1.24/) (Dang et al., ICNLSP 2025)
ACL