@inproceedings{ortiz-zambrano-montejo-raez-2021-clexis2,
title = "{CL}ex{IS}2: A New Corpus for Complex Word Identification Research in Computing Studies",
author = "Ortiz Zambrano, Jenny A. and
Montejo-R{\'a}ez, Arturo",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)",
month = sep,
year = "2021",
address = "Held Online",
publisher = "INCOMA Ltd.",
url = "https://aclanthology.org/2021.ranlp-1.121",
pages = "1075--1083",
abstract = "Reading is a complex process not only because of the words or sections that are difficult for the reader to understand. Complex word identification (CWI) is the task of detecting in the content of documents the words that are difficult or complex to understand by the people of a certain group. Annotated corpora for English learners are widely available, while they are less common for the Spanish language. In this article, we present CLexIS$^2$, a new corpus in Spanish to contribute to the advancement of research in the area of Lexical Simplification, specifically in the identification and prediction of complex words in computing studies. Several metrics used to evaluate the complexity of texts in Spanish were applied, such as LC, LDI, ILFW, SSR, SCI, ASL, CS. Furthermore, as a baseline of the primer, two experiments have been performed to predict the complexity of words: one using a supervised learning approach and the other using an unsupervised solution based on the frequency of words on a general corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ortiz-zambrano-montejo-raez-2021-clexis2">
<titleInfo>
<title>CLexIS2: A New Corpus for Complex Word Identification Research in Computing Studies</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jenny</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Ortiz Zambrano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Arturo</namePart>
<namePart type="family">Montejo-Ráez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Held Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Reading is a complex process not only because of the words or sections that are difficult for the reader to understand. Complex word identification (CWI) is the task of detecting in the content of documents the words that are difficult or complex to understand by the people of a certain group. Annotated corpora for English learners are widely available, while they are less common for the Spanish language. In this article, we present CLexIS², a new corpus in Spanish to contribute to the advancement of research in the area of Lexical Simplification, specifically in the identification and prediction of complex words in computing studies. Several metrics used to evaluate the complexity of texts in Spanish were applied, such as LC, LDI, ILFW, SSR, SCI, ASL, CS. Furthermore, as a baseline of the primer, two experiments have been performed to predict the complexity of words: one using a supervised learning approach and the other using an unsupervised solution based on the frequency of words on a general corpus.</abstract>
<identifier type="citekey">ortiz-zambrano-montejo-raez-2021-clexis2</identifier>
<location>
<url>https://aclanthology.org/2021.ranlp-1.121</url>
</location>
<part>
<date>2021-09</date>
<extent unit="page">
<start>1075</start>
<end>1083</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CLexIS2: A New Corpus for Complex Word Identification Research in Computing Studies
%A Ortiz Zambrano, Jenny A.
%A Montejo-Ráez, Arturo
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2021)
%D 2021
%8 September
%I INCOMA Ltd.
%C Held Online
%F ortiz-zambrano-montejo-raez-2021-clexis2
%X Reading is a complex process not only because of the words or sections that are difficult for the reader to understand. Complex word identification (CWI) is the task of detecting in the content of documents the words that are difficult or complex to understand by the people of a certain group. Annotated corpora for English learners are widely available, while they are less common for the Spanish language. In this article, we present CLexIS², a new corpus in Spanish to contribute to the advancement of research in the area of Lexical Simplification, specifically in the identification and prediction of complex words in computing studies. Several metrics used to evaluate the complexity of texts in Spanish were applied, such as LC, LDI, ILFW, SSR, SCI, ASL, CS. Furthermore, as a baseline of the primer, two experiments have been performed to predict the complexity of words: one using a supervised learning approach and the other using an unsupervised solution based on the frequency of words on a general corpus.
%U https://aclanthology.org/2021.ranlp-1.121
%P 1075-1083
Markdown (Informal)
[CLexIS2: A New Corpus for Complex Word Identification Research in Computing Studies](https://aclanthology.org/2021.ranlp-1.121) (Ortiz Zambrano & Montejo-Ráez, RANLP 2021)
ACL