@inproceedings{barbenel-etal-2026-l1,
title = "{L}1 Influence in {L}2 Language Models: A Human-centric Approach",
author = "Barbenel, Laura and
Goulder, Lily and
O{'}Driscoll, Aoife and
Salhan, Suchir and
Arnett, Catherine and
Caines, Andrew and
Buttery, Paula",
editor = "Ma, Martin Ziqiao and
Liu, Emmy and
Liu, Jing and
Chang, Tyler A. and
Fourtassi, Abdellah and
Warstadt, Alex and
Hahn, Michael and
Sun, Weiwei and
Shi, Freda",
booktitle = "Proceedings of the 1st Workshop on Computational Developmental Linguistics ({CDL})",
month = jul,
year = "2026",
address = "Grand Hyatt Manchester San Diego, 1 Market Pl, San Diego, CA 92101",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.cdl-1.15/",
pages = "92--116",
ISBN = "979-8-89176-428-6",
abstract = "Language learners typically exhibit first language (L1) influence in their written second language (L2) production. We investigate whether similar patterns emerge in L2 language models (L2LMs), which are typically assessed on task-based benchmarks rather than on language use. We evaluate the use of Native Language Identification (NLI) as a method for detecting whether L2LMs exhibit human-like L1 influence. Using existing learner corpora and our novel L2 English dataset, we identify the conditions that yield the highest NLI accuracy, and show that text length but not proficiency affects performance. We then apply NLI to L2LM-generated text under various instruction-tuning and prompting conditions. We find that instruction tuning on human learner essays yields high NLI accuracy ({\textasciitilde}90{\%}) and is necessary for detectable L1 influence. Whilst NLI accuracy is similar for L2LM and human essays, human evaluation shows that LM-generated L1 influence remains distinguishable from human writing."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barbenel-etal-2026-l1">
<titleInfo>
<title>L1 Influence in L2 Language Models: A Human-centric Approach</title>
</titleInfo>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Barbenel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lily</namePart>
<namePart type="family">Goulder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aoife</namePart>
<namePart type="family">O’Driscoll</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suchir</namePart>
<namePart type="family">Salhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Catherine</namePart>
<namePart type="family">Arnett</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrew</namePart>
<namePart type="family">Caines</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paula</namePart>
<namePart type="family">Buttery</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st Workshop on Computational Developmental Linguistics (CDL)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="given">Ziqiao</namePart>
<namePart type="family">Ma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emmy</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jing</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tyler</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdellah</namePart>
<namePart type="family">Fourtassi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Warstadt</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Hahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weiwei</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Freda</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Grand Hyatt Manchester San Diego, 1 Market Pl, San Diego, CA 92101</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-428-6</identifier>
</relatedItem>
<abstract>Language learners typically exhibit first language (L1) influence in their written second language (L2) production. We investigate whether similar patterns emerge in L2 language models (L2LMs), which are typically assessed on task-based benchmarks rather than on language use. We evaluate the use of Native Language Identification (NLI) as a method for detecting whether L2LMs exhibit human-like L1 influence. Using existing learner corpora and our novel L2 English dataset, we identify the conditions that yield the highest NLI accuracy, and show that text length but not proficiency affects performance. We then apply NLI to L2LM-generated text under various instruction-tuning and prompting conditions. We find that instruction tuning on human learner essays yields high NLI accuracy (~90%) and is necessary for detectable L1 influence. Whilst NLI accuracy is similar for L2LM and human essays, human evaluation shows that LM-generated L1 influence remains distinguishable from human writing.</abstract>
<identifier type="citekey">barbenel-etal-2026-l1</identifier>
<location>
<url>https://aclanthology.org/2026.cdl-1.15/</url>
</location>
<part>
<date>2026-07</date>
<extent unit="page">
<start>92</start>
<end>116</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T L1 Influence in L2 Language Models: A Human-centric Approach
%A Barbenel, Laura
%A Goulder, Lily
%A O’Driscoll, Aoife
%A Salhan, Suchir
%A Arnett, Catherine
%A Caines, Andrew
%A Buttery, Paula
%Y Ma, Martin Ziqiao
%Y Liu, Emmy
%Y Liu, Jing
%Y Chang, Tyler A.
%Y Fourtassi, Abdellah
%Y Warstadt, Alex
%Y Hahn, Michael
%Y Sun, Weiwei
%Y Shi, Freda
%S Proceedings of the 1st Workshop on Computational Developmental Linguistics (CDL)
%D 2026
%8 July
%I Association for Computational Linguistics
%C Grand Hyatt Manchester San Diego, 1 Market Pl, San Diego, CA 92101
%@ 979-8-89176-428-6
%F barbenel-etal-2026-l1
%X Language learners typically exhibit first language (L1) influence in their written second language (L2) production. We investigate whether similar patterns emerge in L2 language models (L2LMs), which are typically assessed on task-based benchmarks rather than on language use. We evaluate the use of Native Language Identification (NLI) as a method for detecting whether L2LMs exhibit human-like L1 influence. Using existing learner corpora and our novel L2 English dataset, we identify the conditions that yield the highest NLI accuracy, and show that text length but not proficiency affects performance. We then apply NLI to L2LM-generated text under various instruction-tuning and prompting conditions. We find that instruction tuning on human learner essays yields high NLI accuracy (~90%) and is necessary for detectable L1 influence. Whilst NLI accuracy is similar for L2LM and human essays, human evaluation shows that LM-generated L1 influence remains distinguishable from human writing.
%U https://aclanthology.org/2026.cdl-1.15/
%P 92-116
Markdown (Informal)
[L1 Influence in L2 Language Models: A Human-centric Approach](https://aclanthology.org/2026.cdl-1.15/) (Barbenel et al., CDL 2026)
ACL
- Laura Barbenel, Lily Goulder, Aoife O’Driscoll, Suchir Salhan, Catherine Arnett, Andrew Caines, and Paula Buttery. 2026. L1 Influence in L2 Language Models: A Human-centric Approach. In Proceedings of the 1st Workshop on Computational Developmental Linguistics (CDL), pages 92–116, Grand Hyatt Manchester San Diego, 1 Market Pl, San Diego, CA 92101. Association for Computational Linguistics.