@inproceedings{nohejl-etal-2024-difficult,
title = "Difficult for Whom? A Study of {J}apanese Lexical Complexity",
author = "Nohejl, Adam and
Hayakawa, Akio and
Ide, Yusuke and
Watanabe, Taro",
editor = "Shardlow, Matthew and
Saggion, Horacio and
Alva-Manchego, Fernando and
Zampieri, Marcos and
North, Kai and
{\v{S}}tajner, Sanja and
Stodden, Regina",
booktitle = "Proceedings of the Third Workshop on Text Simplification, Accessibility and Readability (TSAR 2024)",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.tsar-1.8",
pages = "69--81",
abstract = "The tasks of lexical complexity prediction (LCP) and complex word identification (CWI) commonly presuppose that difficult-to-understand words are shared by the target population. Meanwhile, personalization methods have also been proposed to adapt models to individual needs. We verify that a recent Japanese LCP dataset is representative of its target population by partially replicating the annotation. By another reannotation we show that native Chinese speakers perceive the complexity differently due to Sino-Japanese vocabulary. To explore the possibilities of personalization, we compare competitive baselines trained on the group mean ratings and individual ratings in terms of performance for an individual. We show that the model trained on a group mean performs similarly to an individual model in the CWI task, while achieving good LCP performance for an individual is difficult. We also experiment with adapting a finetuned BERT model, which results only in marginal improvements across all settings.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nohejl-etal-2024-difficult">
<titleInfo>
<title>Difficult for Whom? A Study of Japanese Lexical Complexity</title>
</titleInfo>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Nohejl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akio</namePart>
<namePart type="family">Hayakawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yusuke</namePart>
<namePart type="family">Ide</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Taro</namePart>
<namePart type="family">Watanabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Text Simplification, Accessibility and Readability (TSAR 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Shardlow</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Horacio</namePart>
<namePart type="family">Saggion</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Alva-Manchego</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marcos</namePart>
<namePart type="family">Zampieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai</namePart>
<namePart type="family">North</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanja</namePart>
<namePart type="family">Štajner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Regina</namePart>
<namePart type="family">Stodden</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The tasks of lexical complexity prediction (LCP) and complex word identification (CWI) commonly presuppose that difficult-to-understand words are shared by the target population. Meanwhile, personalization methods have also been proposed to adapt models to individual needs. We verify that a recent Japanese LCP dataset is representative of its target population by partially replicating the annotation. By another reannotation we show that native Chinese speakers perceive the complexity differently due to Sino-Japanese vocabulary. To explore the possibilities of personalization, we compare competitive baselines trained on the group mean ratings and individual ratings in terms of performance for an individual. We show that the model trained on a group mean performs similarly to an individual model in the CWI task, while achieving good LCP performance for an individual is difficult. We also experiment with adapting a finetuned BERT model, which results only in marginal improvements across all settings.</abstract>
<identifier type="citekey">nohejl-etal-2024-difficult</identifier>
<location>
<url>https://aclanthology.org/2024.tsar-1.8</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>69</start>
<end>81</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Difficult for Whom? A Study of Japanese Lexical Complexity
%A Nohejl, Adam
%A Hayakawa, Akio
%A Ide, Yusuke
%A Watanabe, Taro
%Y Shardlow, Matthew
%Y Saggion, Horacio
%Y Alva-Manchego, Fernando
%Y Zampieri, Marcos
%Y North, Kai
%Y Štajner, Sanja
%Y Stodden, Regina
%S Proceedings of the Third Workshop on Text Simplification, Accessibility and Readability (TSAR 2024)
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F nohejl-etal-2024-difficult
%X The tasks of lexical complexity prediction (LCP) and complex word identification (CWI) commonly presuppose that difficult-to-understand words are shared by the target population. Meanwhile, personalization methods have also been proposed to adapt models to individual needs. We verify that a recent Japanese LCP dataset is representative of its target population by partially replicating the annotation. By another reannotation we show that native Chinese speakers perceive the complexity differently due to Sino-Japanese vocabulary. To explore the possibilities of personalization, we compare competitive baselines trained on the group mean ratings and individual ratings in terms of performance for an individual. We show that the model trained on a group mean performs similarly to an individual model in the CWI task, while achieving good LCP performance for an individual is difficult. We also experiment with adapting a finetuned BERT model, which results only in marginal improvements across all settings.
%U https://aclanthology.org/2024.tsar-1.8
%P 69-81
Markdown (Informal)
[Difficult for Whom? A Study of Japanese Lexical Complexity](https://aclanthology.org/2024.tsar-1.8) (Nohejl et al., TSAR 2024)
ACL
- Adam Nohejl, Akio Hayakawa, Yusuke Ide, and Taro Watanabe. 2024. Difficult for Whom? A Study of Japanese Lexical Complexity. In Proceedings of the Third Workshop on Text Simplification, Accessibility and Readability (TSAR 2024), pages 69–81, Miami, Florida, USA. Association for Computational Linguistics.