@inproceedings{acharya-etal-2025-tracing,
title = "Tracing {L}1 Interference in {E}nglish Learner Writing: A Longitudinal Corpus with Error Annotations",
author = "Acharya, Poorvi and
Liebl, J. Elizabeth and
Goswami, Dhiman and
North, Kai and
Zampieri, Marcos and
Anastasopoulos, Antonios",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.766/",
doi = "10.18653/v1/2025.emnlp-main.766",
pages = "15146--15167",
ISBN = "979-8-89176-332-6",
abstract = "Language transfer is an important topic of research in second language acquisition and computational linguistics. The availability of suitable learner corpora is paramount for the study of second language acquisition (SLA) and language transfer. However, curating learner corpora is a challenging endeavor as high quality learner data is rarely publicly available. This results in only a few such corpora available to the community. To address this important gap, in this paper we present LENS, a novel English learner corpus with longitudinal data which enables researchers to investigate language learning over time. LENS contains 687 instances written by speakers of 15 different L1s. We use LENS two perform two important tasks at the intersection of SLA and Computational Linguistics: (1) Native Language Identification (NLI); and (2) an evaluation of large language models as a tool for high-precision, semi-automated annotation of L1 interference features."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="acharya-etal-2025-tracing">
<titleInfo>
<title>Tracing L1 Interference in English Learner Writing: A Longitudinal Corpus with Error Annotations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Poorvi</namePart>
<namePart type="family">Acharya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">J</namePart>
<namePart type="given">Elizabeth</namePart>
<namePart type="family">Liebl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dhiman</namePart>
<namePart type="family">Goswami</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Antonios</namePart>
<namePart type="family">Anastasopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Language transfer is an important topic of research in second language acquisition and computational linguistics. The availability of suitable learner corpora is paramount for the study of second language acquisition (SLA) and language transfer. However, curating learner corpora is a challenging endeavor as high quality learner data is rarely publicly available. This results in only a few such corpora available to the community. To address this important gap, in this paper we present LENS, a novel English learner corpus with longitudinal data which enables researchers to investigate language learning over time. LENS contains 687 instances written by speakers of 15 different L1s. We use LENS two perform two important tasks at the intersection of SLA and Computational Linguistics: (1) Native Language Identification (NLI); and (2) an evaluation of large language models as a tool for high-precision, semi-automated annotation of L1 interference features.</abstract>
<identifier type="citekey">acharya-etal-2025-tracing</identifier>
<identifier type="doi">10.18653/v1/2025.emnlp-main.766</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.766/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>15146</start>
<end>15167</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Tracing L1 Interference in English Learner Writing: A Longitudinal Corpus with Error Annotations
%A Acharya, Poorvi
%A Liebl, J. Elizabeth
%A Goswami, Dhiman
%A North, Kai
%A Zampieri, Marcos
%A Anastasopoulos, Antonios
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F acharya-etal-2025-tracing
%X Language transfer is an important topic of research in second language acquisition and computational linguistics. The availability of suitable learner corpora is paramount for the study of second language acquisition (SLA) and language transfer. However, curating learner corpora is a challenging endeavor as high quality learner data is rarely publicly available. This results in only a few such corpora available to the community. To address this important gap, in this paper we present LENS, a novel English learner corpus with longitudinal data which enables researchers to investigate language learning over time. LENS contains 687 instances written by speakers of 15 different L1s. We use LENS two perform two important tasks at the intersection of SLA and Computational Linguistics: (1) Native Language Identification (NLI); and (2) an evaluation of large language models as a tool for high-precision, semi-automated annotation of L1 interference features.
%R 10.18653/v1/2025.emnlp-main.766
%U https://aclanthology.org/2025.emnlp-main.766/
%U https://doi.org/10.18653/v1/2025.emnlp-main.766
%P 15146-15167
Markdown (Informal)
[Tracing L1 Interference in English Learner Writing: A Longitudinal Corpus with Error Annotations](https://aclanthology.org/2025.emnlp-main.766/) (Acharya et al., EMNLP 2025)
ACL