@inproceedings{millet-etal-2021-predicting,
title = "Predicting non-native speech perception using the Perceptual Assimilation Model and state-of-the-art acoustic models",
author = "Millet, Juliette and
Chitoran, Ioana and
Dunbar, Ewan",
editor = "Bisazza, Arianna and
Abend, Omri",
booktitle = "Proceedings of the 25th Conference on Computational Natural Language Learning",
month = nov,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.conll-1.51",
doi = "10.18653/v1/2021.conll-1.51",
pages = "661--673",
abstract = "Our native language influences the way we perceive speech sounds, affecting our ability to discriminate non-native sounds. We compare two ideas about the influence of the native language on speech perception: the Perceptual Assimilation Model, which appeals to a mental classification of sounds into native phoneme categories, versus the idea that rich, fine-grained phonetic representations tuned to the statistics of the native language, are sufficient. We operationalise this idea using representations from two state-of-the-art speech models, a Dirichlet process Gaussian mixture model and the more recent wav2vec 2.0 model. We present a new, open dataset of French- and English-speaking participants{'} speech perception behaviour for 61 vowel sounds from six languages. We show that phoneme assimilation is a better predictor than fine-grained phonetic modelling, both for the discrimination behaviour as a whole, and for predicting differences in discriminability associated with differences in native language background. We also show that wav2vec 2.0, while not good at capturing the effects of native language on speech perception, is complementary to information about native phoneme assimilation, and provides a good model of low-level phonetic representations, supporting the idea that both categorical and fine-grained perception are used during speech perception.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="millet-etal-2021-predicting">
<titleInfo>
<title>Predicting non-native speech perception using the Perceptual Assimilation Model and state-of-the-art acoustic models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Juliette</namePart>
<namePart type="family">Millet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioana</namePart>
<namePart type="family">Chitoran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ewan</namePart>
<namePart type="family">Dunbar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 25th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arianna</namePart>
<namePart type="family">Bisazza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Omri</namePart>
<namePart type="family">Abend</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Our native language influences the way we perceive speech sounds, affecting our ability to discriminate non-native sounds. We compare two ideas about the influence of the native language on speech perception: the Perceptual Assimilation Model, which appeals to a mental classification of sounds into native phoneme categories, versus the idea that rich, fine-grained phonetic representations tuned to the statistics of the native language, are sufficient. We operationalise this idea using representations from two state-of-the-art speech models, a Dirichlet process Gaussian mixture model and the more recent wav2vec 2.0 model. We present a new, open dataset of French- and English-speaking participants’ speech perception behaviour for 61 vowel sounds from six languages. We show that phoneme assimilation is a better predictor than fine-grained phonetic modelling, both for the discrimination behaviour as a whole, and for predicting differences in discriminability associated with differences in native language background. We also show that wav2vec 2.0, while not good at capturing the effects of native language on speech perception, is complementary to information about native phoneme assimilation, and provides a good model of low-level phonetic representations, supporting the idea that both categorical and fine-grained perception are used during speech perception.</abstract>
<identifier type="citekey">millet-etal-2021-predicting</identifier>
<identifier type="doi">10.18653/v1/2021.conll-1.51</identifier>
<location>
<url>https://aclanthology.org/2021.conll-1.51</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>661</start>
<end>673</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Predicting non-native speech perception using the Perceptual Assimilation Model and state-of-the-art acoustic models
%A Millet, Juliette
%A Chitoran, Ioana
%A Dunbar, Ewan
%Y Bisazza, Arianna
%Y Abend, Omri
%S Proceedings of the 25th Conference on Computational Natural Language Learning
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online
%F millet-etal-2021-predicting
%X Our native language influences the way we perceive speech sounds, affecting our ability to discriminate non-native sounds. We compare two ideas about the influence of the native language on speech perception: the Perceptual Assimilation Model, which appeals to a mental classification of sounds into native phoneme categories, versus the idea that rich, fine-grained phonetic representations tuned to the statistics of the native language, are sufficient. We operationalise this idea using representations from two state-of-the-art speech models, a Dirichlet process Gaussian mixture model and the more recent wav2vec 2.0 model. We present a new, open dataset of French- and English-speaking participants’ speech perception behaviour for 61 vowel sounds from six languages. We show that phoneme assimilation is a better predictor than fine-grained phonetic modelling, both for the discrimination behaviour as a whole, and for predicting differences in discriminability associated with differences in native language background. We also show that wav2vec 2.0, while not good at capturing the effects of native language on speech perception, is complementary to information about native phoneme assimilation, and provides a good model of low-level phonetic representations, supporting the idea that both categorical and fine-grained perception are used during speech perception.
%R 10.18653/v1/2021.conll-1.51
%U https://aclanthology.org/2021.conll-1.51
%U https://doi.org/10.18653/v1/2021.conll-1.51
%P 661-673
Markdown (Informal)
[Predicting non-native speech perception using the Perceptual Assimilation Model and state-of-the-art acoustic models](https://aclanthology.org/2021.conll-1.51) (Millet et al., CoNLL 2021)
ACL