@inproceedings{del-rio-gayo-etal-2018-portuguese,
title = "A {P}ortuguese Native Language Identification Dataset",
author = "del R{\'\i}o Gayo, Iria and
Zampieri, Marcos and
Malmasi, Shervin",
editor = "Tetreault, Joel and
Burstein, Jill and
Kochmar, Ekaterina and
Leacock, Claudia and
Yannakoudakis, Helen",
booktitle = "Proceedings of the Thirteenth Workshop on Innovative Use of {NLP} for Building Educational Applications",
month = jun,
year = "2018",
address = "New Orleans, Louisiana",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-0534",
doi = "10.18653/v1/W18-0534",
pages = "291--296",
abstract = "In this paper we present NLI-PT, the first Portuguese dataset compiled for Native Language Identification (NLI), the task of identifying an author{'}s first language based on their second language writing. The dataset includes 1,868 student essays written by learners of European Portuguese, native speakers of the following L1s: Chinese, English, Spanish, German, Russian, French, Japanese, Italian, Dutch, Tetum, Arabic, Polish, Korean, Romanian, and Swedish. NLI-PT includes the original student text and four different types of annotation: POS, fine-grained POS, constituency parses, and dependency parses. NLI-PT can be used not only in NLI but also in research on several topics in the field of Second Language Acquisition and educational NLP. We discuss possible applications of this dataset and present the results obtained for the first lexical baseline system for Portuguese NLI.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="del-rio-gayo-etal-2018-portuguese">
<titleInfo>
<title>A Portuguese Native Language Identification Dataset</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iria</namePart>
<namePart type="family">del Río Gayo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shervin</namePart>
<namePart type="family">Malmasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Thirteenth Workshop on Innovative Use of NLP for Building Educational Applications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joel</namePart>
<namePart type="family">Tetreault</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jill</namePart>
<namePart type="family">Burstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Leacock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Yannakoudakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">New Orleans, Louisiana</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper we present NLI-PT, the first Portuguese dataset compiled for Native Language Identification (NLI), the task of identifying an author’s first language based on their second language writing. The dataset includes 1,868 student essays written by learners of European Portuguese, native speakers of the following L1s: Chinese, English, Spanish, German, Russian, French, Japanese, Italian, Dutch, Tetum, Arabic, Polish, Korean, Romanian, and Swedish. NLI-PT includes the original student text and four different types of annotation: POS, fine-grained POS, constituency parses, and dependency parses. NLI-PT can be used not only in NLI but also in research on several topics in the field of Second Language Acquisition and educational NLP. We discuss possible applications of this dataset and present the results obtained for the first lexical baseline system for Portuguese NLI.</abstract>
<identifier type="citekey">del-rio-gayo-etal-2018-portuguese</identifier>
<identifier type="doi">10.18653/v1/W18-0534</identifier>
<location>
<url>https://aclanthology.org/W18-0534</url>
</location>
<part>
<date>2018-06</date>
<extent unit="page">
<start>291</start>
<end>296</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Portuguese Native Language Identification Dataset
%A del Río Gayo, Iria
%A Zampieri, Marcos
%A Malmasi, Shervin
%Y Tetreault, Joel
%Y Burstein, Jill
%Y Kochmar, Ekaterina
%Y Leacock, Claudia
%Y Yannakoudakis, Helen
%S Proceedings of the Thirteenth Workshop on Innovative Use of NLP for Building Educational Applications
%D 2018
%8 June
%I Association for Computational Linguistics
%C New Orleans, Louisiana
%F del-rio-gayo-etal-2018-portuguese
%X In this paper we present NLI-PT, the first Portuguese dataset compiled for Native Language Identification (NLI), the task of identifying an author’s first language based on their second language writing. The dataset includes 1,868 student essays written by learners of European Portuguese, native speakers of the following L1s: Chinese, English, Spanish, German, Russian, French, Japanese, Italian, Dutch, Tetum, Arabic, Polish, Korean, Romanian, and Swedish. NLI-PT includes the original student text and four different types of annotation: POS, fine-grained POS, constituency parses, and dependency parses. NLI-PT can be used not only in NLI but also in research on several topics in the field of Second Language Acquisition and educational NLP. We discuss possible applications of this dataset and present the results obtained for the first lexical baseline system for Portuguese NLI.
%R 10.18653/v1/W18-0534
%U https://aclanthology.org/W18-0534
%U https://doi.org/10.18653/v1/W18-0534
%P 291-296
Markdown (Informal)
[A Portuguese Native Language Identification Dataset](https://aclanthology.org/W18-0534) (del Río Gayo et al., BEA 2018)
ACL
- Iria del Río Gayo, Marcos Zampieri, and Shervin Malmasi. 2018. A Portuguese Native Language Identification Dataset. In Proceedings of the Thirteenth Workshop on Innovative Use of NLP for Building Educational Applications, pages 291–296, New Orleans, Louisiana. Association for Computational Linguistics.