@inproceedings{strohmaier-etal-2025-dlu,
title = "{DLU}: Dictionary Look-Up Data and Prediction",
author = "Strohmaier, David and
Tyen, Gladys and
Gu, Hongyi and
Nicholls, Diane and
Yuan, Zheng and
Buttery, Paula",
editor = "Boleda, Gemma and
Roth, Michael",
booktitle = "Proceedings of the 29th Conference on Computational Natural Language Learning",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.conll-1.32/",
doi = "10.18653/v1/2025.conll-1.32",
pages = "481--501",
ISBN = "979-8-89176-271-8",
abstract = "Knowing which words language learners struggle with is crucial for developing personalised education technologies. In this paper, we advocate for the novel task of ``dictionary look-up prediction'' as a means for evaluating the complexity of words in reading tasks. We release the Dictionary Look-Up development dataset (DLU-dev) and the Dialogue Dictionary Look-Up dataset (D-DLU), which is based on chatbot dialogues. We demonstrate that dictionary look-up is a challenging task for LLMs (results are presented for LLaMA, Gemma, and Longformer models). We explore finetuning with the ROC* loss function as a more appropriate loss for this task than the commonly used Binary Cross Entropy (BCE). We show that a feature-based model outperforms the LLMs. Finally, we investigate the transfer between DLU and the related tasks of Complex Word Identification (CWI) and Semantic Error Prediction (SEP), establishing new state-of-the-art results for SEP."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="strohmaier-etal-2025-dlu">
<titleInfo>
<title>DLU: Dictionary Look-Up Data and Prediction</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Strohmaier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gladys</namePart>
<namePart type="family">Tyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongyi</namePart>
<namePart type="family">Gu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Diane</namePart>
<namePart type="family">Nicholls</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zheng</namePart>
<namePart type="family">Yuan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paula</namePart>
<namePart type="family">Buttery</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th Conference on Computational Natural Language Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Gemma</namePart>
<namePart type="family">Boleda</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Roth</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-271-8</identifier>
</relatedItem>
<abstract>Knowing which words language learners struggle with is crucial for developing personalised education technologies. In this paper, we advocate for the novel task of “dictionary look-up prediction” as a means for evaluating the complexity of words in reading tasks. We release the Dictionary Look-Up development dataset (DLU-dev) and the Dialogue Dictionary Look-Up dataset (D-DLU), which is based on chatbot dialogues. We demonstrate that dictionary look-up is a challenging task for LLMs (results are presented for LLaMA, Gemma, and Longformer models). We explore finetuning with the ROC* loss function as a more appropriate loss for this task than the commonly used Binary Cross Entropy (BCE). We show that a feature-based model outperforms the LLMs. Finally, we investigate the transfer between DLU and the related tasks of Complex Word Identification (CWI) and Semantic Error Prediction (SEP), establishing new state-of-the-art results for SEP.</abstract>
<identifier type="citekey">strohmaier-etal-2025-dlu</identifier>
<identifier type="doi">10.18653/v1/2025.conll-1.32</identifier>
<location>
<url>https://aclanthology.org/2025.conll-1.32/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>481</start>
<end>501</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T DLU: Dictionary Look-Up Data and Prediction
%A Strohmaier, David
%A Tyen, Gladys
%A Gu, Hongyi
%A Nicholls, Diane
%A Yuan, Zheng
%A Buttery, Paula
%Y Boleda, Gemma
%Y Roth, Michael
%S Proceedings of the 29th Conference on Computational Natural Language Learning
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-271-8
%F strohmaier-etal-2025-dlu
%X Knowing which words language learners struggle with is crucial for developing personalised education technologies. In this paper, we advocate for the novel task of “dictionary look-up prediction” as a means for evaluating the complexity of words in reading tasks. We release the Dictionary Look-Up development dataset (DLU-dev) and the Dialogue Dictionary Look-Up dataset (D-DLU), which is based on chatbot dialogues. We demonstrate that dictionary look-up is a challenging task for LLMs (results are presented for LLaMA, Gemma, and Longformer models). We explore finetuning with the ROC* loss function as a more appropriate loss for this task than the commonly used Binary Cross Entropy (BCE). We show that a feature-based model outperforms the LLMs. Finally, we investigate the transfer between DLU and the related tasks of Complex Word Identification (CWI) and Semantic Error Prediction (SEP), establishing new state-of-the-art results for SEP.
%R 10.18653/v1/2025.conll-1.32
%U https://aclanthology.org/2025.conll-1.32/
%U https://doi.org/10.18653/v1/2025.conll-1.32
%P 481-501
Markdown (Informal)
[DLU: Dictionary Look-Up Data and Prediction](https://aclanthology.org/2025.conll-1.32/) (Strohmaier et al., CoNLL 2025)
ACL
- David Strohmaier, Gladys Tyen, Hongyi Gu, Diane Nicholls, Zheng Yuan, and Paula Buttery. 2025. DLU: Dictionary Look-Up Data and Prediction. In Proceedings of the 29th Conference on Computational Natural Language Learning, pages 481–501, Vienna, Austria. Association for Computational Linguistics.