@inproceedings{yantosca-cheng-2025-phonotomizer,
title = "Phonotomizer: A Compact, Unsupervised, Online Training Approach to Real-Time, Multilingual Phonetic Segmentation",
author = "Yantosca, Michael S. and
Cheng, Albert M. K.",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.592/",
doi = "10.18653/v1/2025.acl-long.592",
pages = "12135--12147",
ISBN = "979-8-89176-251-0",
abstract = "Phonetic transcription requires significant time and expert training. Automated, state-of-the-art text-dependent methods still involve substantial pre-training annotation labor and may not generalize to multiple languages. Hallucination of speech amid silence or non-speech noise can also plague these methods, which fall short in real-time applications due to post hoc whole-phrase evaluation. This paper introduces Phonotomizer, a compact, unsupervised, online training approach to automatic, multilingual phonetic segmentation, a critical first stage in transcription. Unlike prior approaches, Phonotomizer trains on raw sound files alone and can modulate computational exactness. Preliminary evaluations on Irish and Twi, two underrepresented languages, exhibit segmentation comparable to current forced alignment technology, reducing acoustic model size and minimizing training epochs."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yantosca-cheng-2025-phonotomizer">
<titleInfo>
<title>Phonotomizer: A Compact, Unsupervised, Online Training Approach to Real-Time, Multilingual Phonetic Segmentation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="given">S</namePart>
<namePart type="family">Yantosca</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Albert</namePart>
<namePart type="given">M</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Cheng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Phonetic transcription requires significant time and expert training. Automated, state-of-the-art text-dependent methods still involve substantial pre-training annotation labor and may not generalize to multiple languages. Hallucination of speech amid silence or non-speech noise can also plague these methods, which fall short in real-time applications due to post hoc whole-phrase evaluation. This paper introduces Phonotomizer, a compact, unsupervised, online training approach to automatic, multilingual phonetic segmentation, a critical first stage in transcription. Unlike prior approaches, Phonotomizer trains on raw sound files alone and can modulate computational exactness. Preliminary evaluations on Irish and Twi, two underrepresented languages, exhibit segmentation comparable to current forced alignment technology, reducing acoustic model size and minimizing training epochs.</abstract>
<identifier type="citekey">yantosca-cheng-2025-phonotomizer</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.592</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.592/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>12135</start>
<end>12147</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Phonotomizer: A Compact, Unsupervised, Online Training Approach to Real-Time, Multilingual Phonetic Segmentation
%A Yantosca, Michael S.
%A Cheng, Albert M. K.
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F yantosca-cheng-2025-phonotomizer
%X Phonetic transcription requires significant time and expert training. Automated, state-of-the-art text-dependent methods still involve substantial pre-training annotation labor and may not generalize to multiple languages. Hallucination of speech amid silence or non-speech noise can also plague these methods, which fall short in real-time applications due to post hoc whole-phrase evaluation. This paper introduces Phonotomizer, a compact, unsupervised, online training approach to automatic, multilingual phonetic segmentation, a critical first stage in transcription. Unlike prior approaches, Phonotomizer trains on raw sound files alone and can modulate computational exactness. Preliminary evaluations on Irish and Twi, two underrepresented languages, exhibit segmentation comparable to current forced alignment technology, reducing acoustic model size and minimizing training epochs.
%R 10.18653/v1/2025.acl-long.592
%U https://aclanthology.org/2025.acl-long.592/
%U https://doi.org/10.18653/v1/2025.acl-long.592
%P 12135-12147
Markdown (Informal)
[Phonotomizer: A Compact, Unsupervised, Online Training Approach to Real-Time, Multilingual Phonetic Segmentation](https://aclanthology.org/2025.acl-long.592/) (Yantosca & Cheng, ACL 2025)
ACL