@inproceedings{loukatou-etal-2018-modeling,
title = "Modeling infant segmentation of two morphologically diverse languages",
author = "Loukatou, Georgia-Rengina and
Stoll, Sabine and
Blasi, Damian and
Cristia, Alejandrina",
editor = "S{\'e}billot, Pascale and
Claveau, Vincent",
booktitle = "Actes de la Conf{\'e}rence TALN. Volume 1 - Articles longs, articles courts de TALN",
month = "5",
year = "2018",
address = "Rennes, France",
publisher = "ATALA",
url = "https://aclanthology.org/2018.jeptalnrecital-long.4",
pages = "47--60",
abstract = "A rich literature explores unsupervised segmentation algorithms infants could use to parse their input, mainly focusing on English, an analytic language where word, morpheme, and syllable boundaries often coincide. Synthetic languages, where words are multi-morphemic, may present unique difficulties for segmentation. Our study tests corpora of two languages selected to differ in the extent of complexity of their morphological structure, Chintang and Japanese. We use three conceptually diverse word segmentation algorithms and we evaluate them on both word- and morpheme-level representations. As predicted, results for the simpler Japanese are better than those for the more complex Chintang. However, the difference is small compared to the effect of the algorithm (with the lexical algorithm outperforming sub-lexical ones) and the level (scores were lower when evaluating on words versus morphemes). There are also important interactions between language, model, and evaluation level, which ought to be considered in future work.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="loukatou-etal-2018-modeling">
<titleInfo>
<title>Modeling infant segmentation of two morphologically diverse languages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Georgia-Rengina</namePart>
<namePart type="family">Loukatou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sabine</namePart>
<namePart type="family">Stoll</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Damian</namePart>
<namePart type="family">Blasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alejandrina</namePart>
<namePart type="family">Cristia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Actes de la Conférence TALN. Volume 1 - Articles longs, articles courts de TALN</title>
</titleInfo>
<name type="personal">
<namePart type="given">Pascale</namePart>
<namePart type="family">Sébillot</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vincent</namePart>
<namePart type="family">Claveau</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ATALA</publisher>
<place>
<placeTerm type="text">Rennes, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A rich literature explores unsupervised segmentation algorithms infants could use to parse their input, mainly focusing on English, an analytic language where word, morpheme, and syllable boundaries often coincide. Synthetic languages, where words are multi-morphemic, may present unique difficulties for segmentation. Our study tests corpora of two languages selected to differ in the extent of complexity of their morphological structure, Chintang and Japanese. We use three conceptually diverse word segmentation algorithms and we evaluate them on both word- and morpheme-level representations. As predicted, results for the simpler Japanese are better than those for the more complex Chintang. However, the difference is small compared to the effect of the algorithm (with the lexical algorithm outperforming sub-lexical ones) and the level (scores were lower when evaluating on words versus morphemes). There are also important interactions between language, model, and evaluation level, which ought to be considered in future work.</abstract>
<identifier type="citekey">loukatou-etal-2018-modeling</identifier>
<location>
<url>https://aclanthology.org/2018.jeptalnrecital-long.4</url>
</location>
<part>
<date>2018-5</date>
<extent unit="page">
<start>47</start>
<end>60</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Modeling infant segmentation of two morphologically diverse languages
%A Loukatou, Georgia-Rengina
%A Stoll, Sabine
%A Blasi, Damian
%A Cristia, Alejandrina
%Y Sébillot, Pascale
%Y Claveau, Vincent
%S Actes de la Conférence TALN. Volume 1 - Articles longs, articles courts de TALN
%D 2018
%8 May
%I ATALA
%C Rennes, France
%F loukatou-etal-2018-modeling
%X A rich literature explores unsupervised segmentation algorithms infants could use to parse their input, mainly focusing on English, an analytic language where word, morpheme, and syllable boundaries often coincide. Synthetic languages, where words are multi-morphemic, may present unique difficulties for segmentation. Our study tests corpora of two languages selected to differ in the extent of complexity of their morphological structure, Chintang and Japanese. We use three conceptually diverse word segmentation algorithms and we evaluate them on both word- and morpheme-level representations. As predicted, results for the simpler Japanese are better than those for the more complex Chintang. However, the difference is small compared to the effect of the algorithm (with the lexical algorithm outperforming sub-lexical ones) and the level (scores were lower when evaluating on words versus morphemes). There are also important interactions between language, model, and evaluation level, which ought to be considered in future work.
%U https://aclanthology.org/2018.jeptalnrecital-long.4
%P 47-60
Markdown (Informal)
[Modeling infant segmentation of two morphologically diverse languages](https://aclanthology.org/2018.jeptalnrecital-long.4) (Loukatou et al., JEP/TALN/RECITAL 2018)
ACL