@inproceedings{kulmizev-etal-2017-power,
title = "The Power of Character N-grams in Native Language Identification",
author = "Kulmizev, Artur and
Blankers, Bo and
Bjerva, Johannes and
Nissim, Malvina and
van Noord, Gertjan and
Plank, Barbara and
Wieling, Martijn",
editor = "Tetreault, Joel and
Burstein, Jill and
Leacock, Claudia and
Yannakoudakis, Helen",
booktitle = "Proceedings of the 12th Workshop on Innovative Use of {NLP} for Building Educational Applications",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-5043/",
doi = "10.18653/v1/W17-5043",
pages = "382--389",
abstract = "In this paper, we explore the performance of a linear SVM trained on language independent character features for the NLI Shared Task 2017. Our basic system (GRONINGEN) achieves the best performance (87.56 F1-score) on the evaluation set using only 1-9 character n-grams as features. We compare this against several ensemble and meta-classifiers in order to examine how the linear system fares when combined with other, especially non-linear classifiers. Special emphasis is placed on the topic bias that exists by virtue of the assessment essay prompt distribution."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kulmizev-etal-2017-power">
<titleInfo>
<title>The Power of Character N-grams in Native Language Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Artur</namePart>
<namePart type="family">Kulmizev</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bo</namePart>
<namePart type="family">Blankers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Johannes</namePart>
<namePart type="family">Bjerva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Malvina</namePart>
<namePart type="family">Nissim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gertjan</namePart>
<namePart type="family">van Noord</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martijn</namePart>
<namePart type="family">Wieling</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th Workshop on Innovative Use of NLP for Building Educational Applications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Leacock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Yannakoudakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we explore the performance of a linear SVM trained on language independent character features for the NLI Shared Task 2017. Our basic system (GRONINGEN) achieves the best performance (87.56 F1-score) on the evaluation set using only 1-9 character n-grams as features. We compare this against several ensemble and meta-classifiers in order to examine how the linear system fares when combined with other, especially non-linear classifiers. Special emphasis is placed on the topic bias that exists by virtue of the assessment essay prompt distribution.</abstract>
<identifier type="citekey">kulmizev-etal-2017-power</identifier>
<identifier type="doi">10.18653/v1/W17-5043</identifier>
<location>
<url>https://aclanthology.org/W17-5043/</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>382</start>
<end>389</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Power of Character N-grams in Native Language Identification
%A Kulmizev, Artur
%A Blankers, Bo
%A Bjerva, Johannes
%A Nissim, Malvina
%A van Noord, Gertjan
%A Plank, Barbara
%A Wieling, Martijn
%Y Tetreault, Joel
%Y Burstein, Jill
%Y Leacock, Claudia
%Y Yannakoudakis, Helen
%S Proceedings of the 12th Workshop on Innovative Use of NLP for Building Educational Applications
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F kulmizev-etal-2017-power
%X In this paper, we explore the performance of a linear SVM trained on language independent character features for the NLI Shared Task 2017. Our basic system (GRONINGEN) achieves the best performance (87.56 F1-score) on the evaluation set using only 1-9 character n-grams as features. We compare this against several ensemble and meta-classifiers in order to examine how the linear system fares when combined with other, especially non-linear classifiers. Special emphasis is placed on the topic bias that exists by virtue of the assessment essay prompt distribution.
%R 10.18653/v1/W17-5043
%U https://aclanthology.org/W17-5043/
%U https://doi.org/10.18653/v1/W17-5043
%P 382-389
Markdown (Informal)
[The Power of Character N-grams in Native Language Identification](https://aclanthology.org/W17-5043/) (Kulmizev et al., BEA 2017)
ACL
- Artur Kulmizev, Bo Blankers, Johannes Bjerva, Malvina Nissim, Gertjan van Noord, Barbara Plank, and Martijn Wieling. 2017. The Power of Character N-grams in Native Language Identification. In Proceedings of the 12th Workshop on Innovative Use of NLP for Building Educational Applications, pages 382–389, Copenhagen, Denmark. Association for Computational Linguistics.