@article{algayres-etal-2022-dp,
title = "{DP}-Parse: Finding Word Boundaries from Raw Speech with an Instance Lexicon",
author = "Algayres, Robin and
Ricoul, Tristan and
Karadayi, Julien and
Lauren{\c{c}}on, Hugo and
Zaiem, Salah and
Mohamed, Abdelrahman and
Sagot, Beno{\^\i}t and
Dupoux, Emmanuel",
editor = "Roark, Brian and
Nenkova, Ani",
journal = "Transactions of the Association for Computational Linguistics",
volume = "10",
year = "2022",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2022.tacl-1.61",
doi = "10.1162/tacl_a_00505",
pages = "1051--1065",
abstract = "Finding word boundaries in continuous speech is challenging as there is little or no equivalent of a {`}space{'} delimiter between words. Popular Bayesian non-parametric models for text segmentation (Goldwater et al., 2006, 2009) use a Dirichlet process to jointly segment sentences and build a lexicon of word types. We introduce DP-Parse, which uses similar principles but only relies on an instance lexicon of word tokens, avoiding the clustering errors that arise with a lexicon of word types. On the Zero Resource Speech Benchmark 2017, our model sets a new speech segmentation state-of-the-art in 5 languages. The algorithm monotonically improves with better input representations, achieving yet higher scores when fed with weakly supervised inputs. Despite lacking a type lexicon, DP-Parse can be pipelined to a language model and learn semantic and syntactic representations as assessed by a new spoken word embedding benchmark. 1",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="algayres-etal-2022-dp">
<titleInfo>
<title>DP-Parse: Finding Word Boundaries from Raw Speech with an Instance Lexicon</title>
</titleInfo>
<name type="personal">
<namePart type="given">Robin</namePart>
<namePart type="family">Algayres</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tristan</namePart>
<namePart type="family">Ricoul</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julien</namePart>
<namePart type="family">Karadayi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hugo</namePart>
<namePart type="family">Laurençon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Salah</namePart>
<namePart type="family">Zaiem</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdelrahman</namePart>
<namePart type="family">Mohamed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benoît</namePart>
<namePart type="family">Sagot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmanuel</namePart>
<namePart type="family">Dupoux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Finding word boundaries in continuous speech is challenging as there is little or no equivalent of a ‘space’ delimiter between words. Popular Bayesian non-parametric models for text segmentation (Goldwater et al., 2006, 2009) use a Dirichlet process to jointly segment sentences and build a lexicon of word types. We introduce DP-Parse, which uses similar principles but only relies on an instance lexicon of word tokens, avoiding the clustering errors that arise with a lexicon of word types. On the Zero Resource Speech Benchmark 2017, our model sets a new speech segmentation state-of-the-art in 5 languages. The algorithm monotonically improves with better input representations, achieving yet higher scores when fed with weakly supervised inputs. Despite lacking a type lexicon, DP-Parse can be pipelined to a language model and learn semantic and syntactic representations as assessed by a new spoken word embedding benchmark. 1</abstract>
<identifier type="citekey">algayres-etal-2022-dp</identifier>
<identifier type="doi">10.1162/tacl_a_00505</identifier>
<location>
<url>https://aclanthology.org/2022.tacl-1.61</url>
</location>
<part>
<date>2022</date>
<detail type="volume"><number>10</number></detail>
<extent unit="page">
<start>1051</start>
<end>1065</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T DP-Parse: Finding Word Boundaries from Raw Speech with an Instance Lexicon
%A Algayres, Robin
%A Ricoul, Tristan
%A Karadayi, Julien
%A Laurençon, Hugo
%A Zaiem, Salah
%A Mohamed, Abdelrahman
%A Sagot, Benoît
%A Dupoux, Emmanuel
%J Transactions of the Association for Computational Linguistics
%D 2022
%V 10
%I MIT Press
%C Cambridge, MA
%F algayres-etal-2022-dp
%X Finding word boundaries in continuous speech is challenging as there is little or no equivalent of a ‘space’ delimiter between words. Popular Bayesian non-parametric models for text segmentation (Goldwater et al., 2006, 2009) use a Dirichlet process to jointly segment sentences and build a lexicon of word types. We introduce DP-Parse, which uses similar principles but only relies on an instance lexicon of word tokens, avoiding the clustering errors that arise with a lexicon of word types. On the Zero Resource Speech Benchmark 2017, our model sets a new speech segmentation state-of-the-art in 5 languages. The algorithm monotonically improves with better input representations, achieving yet higher scores when fed with weakly supervised inputs. Despite lacking a type lexicon, DP-Parse can be pipelined to a language model and learn semantic and syntactic representations as assessed by a new spoken word embedding benchmark. 1
%R 10.1162/tacl_a_00505
%U https://aclanthology.org/2022.tacl-1.61
%U https://doi.org/10.1162/tacl_a_00505
%P 1051-1065
Markdown (Informal)
[DP-Parse: Finding Word Boundaries from Raw Speech with an Instance Lexicon](https://aclanthology.org/2022.tacl-1.61) (Algayres et al., TACL 2022)
ACL