@inproceedings{nishida-etal-2026-probabilistic,
title = "Probabilistic Bilingual Subword Segmentation with Latent Subword Alignment",
author = "Nishida, Shoto and
Matsui, Daiki and
Ninomiya, Takashi and
Goto, Isao and
Tamura, Akihiro",
editor = "Baez Santamaria, Selene and
Somayajula, Sai Ashish and
Yamaguchi, Atsuki",
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 4: Student Research Workshop)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.eacl-srw.40/",
pages = "528--534",
ISBN = "979-8-89176-383-8",
abstract = "This study proposes a method for learning subword correspondences in parallel sentence pairs using the EM algorithm. Conventional neural machine translation typically employs subword segmentation models trained. However, since existing methods do not consider parallel relationships, inconsistencies in word segmentation between source and target languages may hinder translation model training. Our approach leverages direct modeling of subword correspondences in parallel corpora, thereby improving segmentation consistency across languages. Experiments across multiple machine translation tasks confirm that our proposed method improves translation accuracy for many tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nishida-etal-2026-probabilistic">
<titleInfo>
<title>Probabilistic Bilingual Subword Segmentation with Latent Subword Alignment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shoto</namePart>
<namePart type="family">Nishida</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daiki</namePart>
<namePart type="family">Matsui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takashi</namePart>
<namePart type="family">Ninomiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isao</namePart>
<namePart type="family">Goto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akihiro</namePart>
<namePart type="family">Tamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Selene</namePart>
<namePart type="family">Baez Santamaria</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sai</namePart>
<namePart type="given">Ashish</namePart>
<namePart type="family">Somayajula</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Atsuki</namePart>
<namePart type="family">Yamaguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-383-8</identifier>
</relatedItem>
<abstract>This study proposes a method for learning subword correspondences in parallel sentence pairs using the EM algorithm. Conventional neural machine translation typically employs subword segmentation models trained. However, since existing methods do not consider parallel relationships, inconsistencies in word segmentation between source and target languages may hinder translation model training. Our approach leverages direct modeling of subword correspondences in parallel corpora, thereby improving segmentation consistency across languages. Experiments across multiple machine translation tasks confirm that our proposed method improves translation accuracy for many tasks.</abstract>
<identifier type="citekey">nishida-etal-2026-probabilistic</identifier>
<location>
<url>https://aclanthology.org/2026.eacl-srw.40/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>528</start>
<end>534</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Probabilistic Bilingual Subword Segmentation with Latent Subword Alignment
%A Nishida, Shoto
%A Matsui, Daiki
%A Ninomiya, Takashi
%A Goto, Isao
%A Tamura, Akihiro
%Y Baez Santamaria, Selene
%Y Somayajula, Sai Ashish
%Y Yamaguchi, Atsuki
%S Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-383-8
%F nishida-etal-2026-probabilistic
%X This study proposes a method for learning subword correspondences in parallel sentence pairs using the EM algorithm. Conventional neural machine translation typically employs subword segmentation models trained. However, since existing methods do not consider parallel relationships, inconsistencies in word segmentation between source and target languages may hinder translation model training. Our approach leverages direct modeling of subword correspondences in parallel corpora, thereby improving segmentation consistency across languages. Experiments across multiple machine translation tasks confirm that our proposed method improves translation accuracy for many tasks.
%U https://aclanthology.org/2026.eacl-srw.40/
%P 528-534
Markdown (Informal)
[Probabilistic Bilingual Subword Segmentation with Latent Subword Alignment](https://aclanthology.org/2026.eacl-srw.40/) (Nishida et al., EACL 2026)
ACL
- Shoto Nishida, Daiki Matsui, Takashi Ninomiya, Isao Goto, and Akihiro Tamura. 2026. Probabilistic Bilingual Subword Segmentation with Latent Subword Alignment. In Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 4: Student Research Workshop), pages 528–534, Rabat, Morocco. Association for Computational Linguistics.