@inproceedings{pilan-volodina-2018-investigating,
title = "Investigating the importance of linguistic complexity features across different datasets related to language learning",
author = "Pil{\'a}n, Ildik{\'o} and
Volodina, Elena",
editor = "Becerra-Bonache, Leonor and
Jim{\'e}nez-L{\'o}pez, M. Dolores and
Mart{\'\i}n-Vide, Carlos and
Torrens-Urrutia, Adri{\`a}",
booktitle = "Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing",
month = aug,
year = "2018",
address = "Santa Fe, New-Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-4606",
pages = "49--58",
abstract = "We present the results of our investigations aiming at identifying the most informative linguistic complexity features for classifying language learning levels in three different datasets. The datasets vary across two dimensions: the size of the instances (texts vs. sentences) and the language learning skill they involve (reading comprehension texts vs. texts written by learners themselves). We present a subset of the most predictive features for each dataset, taking into consideration significant differences in their per-class mean values and show that these subsets lead not only to simpler models, but also to an improved classification performance. Furthermore, we pinpoint fourteen central features that are good predictors regardless of the size of the linguistic unit analyzed or the skills involved, which include both morpho-syntactic and lexical dimensions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="pilan-volodina-2018-investigating">
<titleInfo>
<title>Investigating the importance of linguistic complexity features across different datasets related to language learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ildikó</namePart>
<namePart type="family">Pilán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elena</namePart>
<namePart type="family">Volodina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Leonor</namePart>
<namePart type="family">Becerra-Bonache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">M</namePart>
<namePart type="given">Dolores</namePart>
<namePart type="family">Jiménez-López</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Martín-Vide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adrià</namePart>
<namePart type="family">Torrens-Urrutia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New-Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present the results of our investigations aiming at identifying the most informative linguistic complexity features for classifying language learning levels in three different datasets. The datasets vary across two dimensions: the size of the instances (texts vs. sentences) and the language learning skill they involve (reading comprehension texts vs. texts written by learners themselves). We present a subset of the most predictive features for each dataset, taking into consideration significant differences in their per-class mean values and show that these subsets lead not only to simpler models, but also to an improved classification performance. Furthermore, we pinpoint fourteen central features that are good predictors regardless of the size of the linguistic unit analyzed or the skills involved, which include both morpho-syntactic and lexical dimensions.</abstract>
<identifier type="citekey">pilan-volodina-2018-investigating</identifier>
<location>
<url>https://aclanthology.org/W18-4606</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>49</start>
<end>58</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Investigating the importance of linguistic complexity features across different datasets related to language learning
%A Pilán, Ildikó
%A Volodina, Elena
%Y Becerra-Bonache, Leonor
%Y Jiménez-López, M. Dolores
%Y Martín-Vide, Carlos
%Y Torrens-Urrutia, Adrià
%S Proceedings of the Workshop on Linguistic Complexity and Natural Language Processing
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New-Mexico
%F pilan-volodina-2018-investigating
%X We present the results of our investigations aiming at identifying the most informative linguistic complexity features for classifying language learning levels in three different datasets. The datasets vary across two dimensions: the size of the instances (texts vs. sentences) and the language learning skill they involve (reading comprehension texts vs. texts written by learners themselves). We present a subset of the most predictive features for each dataset, taking into consideration significant differences in their per-class mean values and show that these subsets lead not only to simpler models, but also to an improved classification performance. Furthermore, we pinpoint fourteen central features that are good predictors regardless of the size of the linguistic unit analyzed or the skills involved, which include both morpho-syntactic and lexical dimensions.
%U https://aclanthology.org/W18-4606
%P 49-58
Markdown (Informal)
[Investigating the importance of linguistic complexity features across different datasets related to language learning](https://aclanthology.org/W18-4606) (Pilán & Volodina, 2018)
ACL