@inproceedings{hou-etal-2022-promoting,
title = "Promoting Pre-trained {LM} with Linguistic Features on Automatic Readability Assessment",
author = "Hou, Shudi and
Rao, Simin and
Xia, Yu and
Li, Sujian",
editor = "He, Yulan and
Ji, Heng and
Li, Sujian and
Liu, Yang and
Chang, Chua-Hui",
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
month = nov,
year = "2022",
address = "Online only",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.aacl-short.54",
pages = "430--436",
abstract = "Automatic readability assessment (ARA) aims at classifying the readability level of a passage automatically. In the past, manually selected linguistic features are used to classify the passages. However, as the use of deep neural network surges, there is less work focusing on these linguistic features. Recently, many works integrate linguistic features with pre-trained language model (PLM) to make up for the information that PLMs are not good at capturing. Despite their initial success, insufficient analysis of the long passage characteristic of ARA has been done before. To further investigate the promotion of linguistic features on PLMs in ARA from the perspective of passage length, with commonly used linguistic features and abundant experiments, we find that: (1) Linguistic features promote PLMs in ARA mainly on long passages. (2) The promotion of the features on PLMs becomes less significant when the dataset size exceeds 750 passages. (3) By analyzing commonly used ARA datasets, we find Newsela is actually not suitable for ARA. Our code is available at \url{https://github.com/recorderhou/linguistic-features-in-ARA}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hou-etal-2022-promoting">
<titleInfo>
<title>Promoting Pre-trained LM with Linguistic Features on Automatic Readability Assessment</title>
</titleInfo>
<name type="personal">
<namePart type="given">Shudi</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Simin</namePart>
<namePart type="family">Rao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="family">Xia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chua-Hui</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online only</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic readability assessment (ARA) aims at classifying the readability level of a passage automatically. In the past, manually selected linguistic features are used to classify the passages. However, as the use of deep neural network surges, there is less work focusing on these linguistic features. Recently, many works integrate linguistic features with pre-trained language model (PLM) to make up for the information that PLMs are not good at capturing. Despite their initial success, insufficient analysis of the long passage characteristic of ARA has been done before. To further investigate the promotion of linguistic features on PLMs in ARA from the perspective of passage length, with commonly used linguistic features and abundant experiments, we find that: (1) Linguistic features promote PLMs in ARA mainly on long passages. (2) The promotion of the features on PLMs becomes less significant when the dataset size exceeds 750 passages. (3) By analyzing commonly used ARA datasets, we find Newsela is actually not suitable for ARA. Our code is available at https://github.com/recorderhou/linguistic-features-in-ARA.</abstract>
<identifier type="citekey">hou-etal-2022-promoting</identifier>
<location>
<url>https://aclanthology.org/2022.aacl-short.54</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>430</start>
<end>436</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Promoting Pre-trained LM with Linguistic Features on Automatic Readability Assessment
%A Hou, Shudi
%A Rao, Simin
%A Xia, Yu
%A Li, Sujian
%Y He, Yulan
%Y Ji, Heng
%Y Li, Sujian
%Y Liu, Yang
%Y Chang, Chua-Hui
%S Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online only
%F hou-etal-2022-promoting
%X Automatic readability assessment (ARA) aims at classifying the readability level of a passage automatically. In the past, manually selected linguistic features are used to classify the passages. However, as the use of deep neural network surges, there is less work focusing on these linguistic features. Recently, many works integrate linguistic features with pre-trained language model (PLM) to make up for the information that PLMs are not good at capturing. Despite their initial success, insufficient analysis of the long passage characteristic of ARA has been done before. To further investigate the promotion of linguistic features on PLMs in ARA from the perspective of passage length, with commonly used linguistic features and abundant experiments, we find that: (1) Linguistic features promote PLMs in ARA mainly on long passages. (2) The promotion of the features on PLMs becomes less significant when the dataset size exceeds 750 passages. (3) By analyzing commonly used ARA datasets, we find Newsela is actually not suitable for ARA. Our code is available at https://github.com/recorderhou/linguistic-features-in-ARA.
%U https://aclanthology.org/2022.aacl-short.54
%P 430-436
Markdown (Informal)
[Promoting Pre-trained LM with Linguistic Features on Automatic Readability Assessment](https://aclanthology.org/2022.aacl-short.54) (Hou et al., AACL-IJCNLP 2022)
ACL