@inproceedings{acharya-etal-2025-tracing,
title = "Tracing {L}1 Interference in {E}nglish Learner Writing: A Longitudinal Corpus with Error Annotations",
author = "Acharya, Poorvi and
Liebl, J. Elizabeth and
Goswami, Dhiman and
North, Kai and
Zampieri, Marcos and
Anastasopoulos, Antonios",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.766/",
pages = "15157--15178",
ISBN = "979-8-89176-332-6",
abstract = "The availability of suitable learner corpora is crucial for studying second language acquisition (SLA) and language transfer. However, curating such corpora is challenging, as high-quality learner data is rarely publicly available. As a result, only a few learner corpora, such as ICLE and TOEFL-11, are accessible to the research community.To address this gap, we present Anonymous, a novel English learner corpus with longitudinal data. The corpus consists of 687 texts written by adult learners taking English as a second language courses in the USA. These learners are either preparing for university admission or enhancing their language proficiency while beginning their university studies. Unlike most learner corpora, Anonymous includes longitudinal data, allowing researchers to explore language learning trajectories over time. The corpus features contributions from speakers of 15 different L1s.We demonstrate the utility of Anonymous through two case studies at the intersection of SLA and Computational Linguistics: (1) Native Language Identification (NLI), and (2) a quantitative and qualitative analysis of linguistic features influenced by L1 using large language models"
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="acharya-etal-2025-tracing">
<titleInfo>
<title>Tracing L1 Interference in English Learner Writing: A Longitudinal Corpus with Error Annotations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Poorvi</namePart>
<namePart type="family">Acharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">J</namePart>
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Liebl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhiman</namePart>
<namePart type="family">Goswami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>The availability of suitable learner corpora is crucial for studying second language acquisition (SLA) and language transfer. However, curating such corpora is challenging, as high-quality learner data is rarely publicly available. As a result, only a few learner corpora, such as ICLE and TOEFL-11, are accessible to the research community.To address this gap, we present Anonymous, a novel English learner corpus with longitudinal data. The corpus consists of 687 texts written by adult learners taking English as a second language courses in the USA. These learners are either preparing for university admission or enhancing their language proficiency while beginning their university studies. Unlike most learner corpora, Anonymous includes longitudinal data, allowing researchers to explore language learning trajectories over time. The corpus features contributions from speakers of 15 different L1s.We demonstrate the utility of Anonymous through two case studies at the intersection of SLA and Computational Linguistics: (1) Native Language Identification (NLI), and (2) a quantitative and qualitative analysis of linguistic features influenced by L1 using large language models</abstract>
<identifier type="citekey">acharya-etal-2025-tracing</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.766/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>15157</start>
<end>15178</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tracing L1 Interference in English Learner Writing: A Longitudinal Corpus with Error Annotations
%A Acharya, Poorvi
%A Liebl, J. Elizabeth
%A Goswami, Dhiman
%A North, Kai
%A Zampieri, Marcos
%A Anastasopoulos, Antonios
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F acharya-etal-2025-tracing
%X The availability of suitable learner corpora is crucial for studying second language acquisition (SLA) and language transfer. However, curating such corpora is challenging, as high-quality learner data is rarely publicly available. As a result, only a few learner corpora, such as ICLE and TOEFL-11, are accessible to the research community.To address this gap, we present Anonymous, a novel English learner corpus with longitudinal data. The corpus consists of 687 texts written by adult learners taking English as a second language courses in the USA. These learners are either preparing for university admission or enhancing their language proficiency while beginning their university studies. Unlike most learner corpora, Anonymous includes longitudinal data, allowing researchers to explore language learning trajectories over time. The corpus features contributions from speakers of 15 different L1s.We demonstrate the utility of Anonymous through two case studies at the intersection of SLA and Computational Linguistics: (1) Native Language Identification (NLI), and (2) a quantitative and qualitative analysis of linguistic features influenced by L1 using large language models
%U https://aclanthology.org/2025.emnlp-main.766/
%P 15157-15178
Markdown (Informal)
[Tracing L1 Interference in English Learner Writing: A Longitudinal Corpus with Error Annotations](https://aclanthology.org/2025.emnlp-main.766/) (Acharya et al., EMNLP 2025)
ACL