@inproceedings{mccarthy-etal-2021-jump,
title = "Jump-Starting Item Parameters for Adaptive Language Tests",
author = "McCarthy, Arya D. and
Yancey, Kevin P. and
LaFlair, Geoffrey T. and
Egbert, Jesse and
Liao, Manqian and
Settles, Burr",
editor = "Moens, Marie-Francine and
Huang, Xuanjing and
Specia, Lucia and
Yih, Scott Wen-tau",
booktitle = "Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2021",
address = "Online and Punta Cana, Dominican Republic",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.emnlp-main.67",
doi = "10.18653/v1/2021.emnlp-main.67",
pages = "883--899",
abstract = "A challenge in designing high-stakes language assessments is calibrating the test item difficulties, either a priori or from limited pilot test data. While prior work has addressed {`}cold start{'} estimation of item difficulties without piloting, we devise a multi-task generalized linear model with BERT features to jump-start these estimates, rapidly improving their quality with as few as 500 test-takers and a small sample of item exposures ({\mbox{$\approx$}}6 each) from a large item bank ({\mbox{$\approx$}}4,000 items). Our joint model provides a principled way to compare test-taker proficiency, item difficulty, and language proficiency frameworks like the Common European Framework of Reference (CEFR). This also enables new item difficulty estimates without piloting them first, which in turn limits item exposure and thus enhances test item security. Finally, using operational data from the Duolingo English Test, a high-stakes English proficiency test, we find that the difficulty estimates derived using this method correlate strongly with lexico-grammatical features that correlate with reading complexity.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mccarthy-etal-2021-jump">
<titleInfo>
<title>Jump-Starting Item Parameters for Adaptive Language Tests</title>
</titleInfo>
<name type="personal">
<namePart type="given">Arya</namePart>
<namePart type="given">D</namePart>
<namePart type="family">McCarthy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="given">P</namePart>
<namePart type="family">Yancey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Geoffrey</namePart>
<namePart type="given">T</namePart>
<namePart type="family">LaFlair</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jesse</namePart>
<namePart type="family">Egbert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Manqian</namePart>
<namePart type="family">Liao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Burr</namePart>
<namePart type="family">Settles</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marie-Francine</namePart>
<namePart type="family">Moens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xuanjing</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Scott</namePart>
<namePart type="given">Wen-tau</namePart>
<namePart type="family">Yih</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online and Punta Cana, Dominican Republic</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>A challenge in designing high-stakes language assessments is calibrating the test item difficulties, either a priori or from limited pilot test data. While prior work has addressed ‘cold start’ estimation of item difficulties without piloting, we devise a multi-task generalized linear model with BERT features to jump-start these estimates, rapidly improving their quality with as few as 500 test-takers and a small sample of item exposures (\approx6 each) from a large item bank (\approx4,000 items). Our joint model provides a principled way to compare test-taker proficiency, item difficulty, and language proficiency frameworks like the Common European Framework of Reference (CEFR). This also enables new item difficulty estimates without piloting them first, which in turn limits item exposure and thus enhances test item security. Finally, using operational data from the Duolingo English Test, a high-stakes English proficiency test, we find that the difficulty estimates derived using this method correlate strongly with lexico-grammatical features that correlate with reading complexity.</abstract>
<identifier type="citekey">mccarthy-etal-2021-jump</identifier>
<identifier type="doi">10.18653/v1/2021.emnlp-main.67</identifier>
<location>
<url>https://aclanthology.org/2021.emnlp-main.67</url>
</location>
<part>
<date>2021-11</date>
<extent unit="page">
<start>883</start>
<end>899</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Jump-Starting Item Parameters for Adaptive Language Tests
%A McCarthy, Arya D.
%A Yancey, Kevin P.
%A LaFlair, Geoffrey T.
%A Egbert, Jesse
%A Liao, Manqian
%A Settles, Burr
%Y Moens, Marie-Francine
%Y Huang, Xuanjing
%Y Specia, Lucia
%Y Yih, Scott Wen-tau
%S Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%D 2021
%8 November
%I Association for Computational Linguistics
%C Online and Punta Cana, Dominican Republic
%F mccarthy-etal-2021-jump
%X A challenge in designing high-stakes language assessments is calibrating the test item difficulties, either a priori or from limited pilot test data. While prior work has addressed ‘cold start’ estimation of item difficulties without piloting, we devise a multi-task generalized linear model with BERT features to jump-start these estimates, rapidly improving their quality with as few as 500 test-takers and a small sample of item exposures (\approx6 each) from a large item bank (\approx4,000 items). Our joint model provides a principled way to compare test-taker proficiency, item difficulty, and language proficiency frameworks like the Common European Framework of Reference (CEFR). This also enables new item difficulty estimates without piloting them first, which in turn limits item exposure and thus enhances test item security. Finally, using operational data from the Duolingo English Test, a high-stakes English proficiency test, we find that the difficulty estimates derived using this method correlate strongly with lexico-grammatical features that correlate with reading complexity.
%R 10.18653/v1/2021.emnlp-main.67
%U https://aclanthology.org/2021.emnlp-main.67
%U https://doi.org/10.18653/v1/2021.emnlp-main.67
%P 883-899
Markdown (Informal)
[Jump-Starting Item Parameters for Adaptive Language Tests](https://aclanthology.org/2021.emnlp-main.67) (McCarthy et al., EMNLP 2021)
ACL
- Arya D. McCarthy, Kevin P. Yancey, Geoffrey T. LaFlair, Jesse Egbert, Manqian Liao, and Burr Settles. 2021. Jump-Starting Item Parameters for Adaptive Language Tests. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, pages 883–899, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics.