@inproceedings{hutin-allassonniere-tang-2022-investigating,
title = "Investigating phonological theories with crowd-sourced data: The Inventory Size Hypothesis in the light of Lingua Libre",
author = "Hutin, Mathilde and
Allassonni{\`e}re-Tang, Marc",
editor = "Nicolai, Garrett and
Chodroff, Eleanor",
booktitle = "Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology",
month = jul,
year = "2022",
address = "Seattle, Washington",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.sigmorphon-1.3",
doi = "10.18653/v1/2022.sigmorphon-1.3",
pages = "23--28",
abstract = "Data-driven research in phonetics and phonology relies massively on oral resources, and access thereto. We propose to explore a question in comparative linguistics using an open-source crowd-sourced corpus, Lingua Libre, Wikimedia{'}s participatory linguistic library, to show that such corpora may offer a solution to typologists wishing to explore numerous languages at once. For the present proof of concept, we compare the realizations of Italian and Spanish vowels (sample size = 5000) to investigate whether vowel production is influenced by the size of the phonemic inventory (the Inventory Size Hypothesis), by the exact shape of the inventory (the Vowel Quality Hypothesis) or by none of the above. Results show that the size of the inventory does not seem to influence vowel production, thus supporting previous research, but also that the shape of the inventory may well be a factor determining the extent of variation in vowel production. Most of all, these results show that Lingua Libre has the potential to provide valuable data for linguistic inquiry.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hutin-allassonniere-tang-2022-investigating">
<titleInfo>
<title>Investigating phonological theories with crowd-sourced data: The Inventory Size Hypothesis in the light of Lingua Libre</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mathilde</namePart>
<namePart type="family">Hutin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marc</namePart>
<namePart type="family">Allassonnière-Tang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Garrett</namePart>
<namePart type="family">Nicolai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eleanor</namePart>
<namePart type="family">Chodroff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Seattle, Washington</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Data-driven research in phonetics and phonology relies massively on oral resources, and access thereto. We propose to explore a question in comparative linguistics using an open-source crowd-sourced corpus, Lingua Libre, Wikimedia’s participatory linguistic library, to show that such corpora may offer a solution to typologists wishing to explore numerous languages at once. For the present proof of concept, we compare the realizations of Italian and Spanish vowels (sample size = 5000) to investigate whether vowel production is influenced by the size of the phonemic inventory (the Inventory Size Hypothesis), by the exact shape of the inventory (the Vowel Quality Hypothesis) or by none of the above. Results show that the size of the inventory does not seem to influence vowel production, thus supporting previous research, but also that the shape of the inventory may well be a factor determining the extent of variation in vowel production. Most of all, these results show that Lingua Libre has the potential to provide valuable data for linguistic inquiry.</abstract>
<identifier type="citekey">hutin-allassonniere-tang-2022-investigating</identifier>
<identifier type="doi">10.18653/v1/2022.sigmorphon-1.3</identifier>
<location>
<url>https://aclanthology.org/2022.sigmorphon-1.3</url>
</location>
<part>
<date>2022-07</date>
<extent unit="page">
<start>23</start>
<end>28</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Investigating phonological theories with crowd-sourced data: The Inventory Size Hypothesis in the light of Lingua Libre
%A Hutin, Mathilde
%A Allassonnière-Tang, Marc
%Y Nicolai, Garrett
%Y Chodroff, Eleanor
%S Proceedings of the 19th SIGMORPHON Workshop on Computational Research in Phonetics, Phonology, and Morphology
%D 2022
%8 July
%I Association for Computational Linguistics
%C Seattle, Washington
%F hutin-allassonniere-tang-2022-investigating
%X Data-driven research in phonetics and phonology relies massively on oral resources, and access thereto. We propose to explore a question in comparative linguistics using an open-source crowd-sourced corpus, Lingua Libre, Wikimedia’s participatory linguistic library, to show that such corpora may offer a solution to typologists wishing to explore numerous languages at once. For the present proof of concept, we compare the realizations of Italian and Spanish vowels (sample size = 5000) to investigate whether vowel production is influenced by the size of the phonemic inventory (the Inventory Size Hypothesis), by the exact shape of the inventory (the Vowel Quality Hypothesis) or by none of the above. Results show that the size of the inventory does not seem to influence vowel production, thus supporting previous research, but also that the shape of the inventory may well be a factor determining the extent of variation in vowel production. Most of all, these results show that Lingua Libre has the potential to provide valuable data for linguistic inquiry.
%R 10.18653/v1/2022.sigmorphon-1.3
%U https://aclanthology.org/2022.sigmorphon-1.3
%U https://doi.org/10.18653/v1/2022.sigmorphon-1.3
%P 23-28
Markdown (Informal)
[Investigating phonological theories with crowd-sourced data: The Inventory Size Hypothesis in the light of Lingua Libre](https://aclanthology.org/2022.sigmorphon-1.3) (Hutin & Allassonnière-Tang, SIGMORPHON 2022)
ACL