@inproceedings{yimam-etal-2017-multilingual,
title = "Multilingual and Cross-Lingual Complex Word Identification",
author = "Yimam, Seid Muhie and
{\v{S}}tajner, Sanja and
Riedl, Martin and
Biemann, Chris",
editor = "Mitkov, Ruslan and
Angelova, Galia",
booktitle = "Proceedings of the International Conference Recent Advances in Natural Language Processing, {RANLP} 2017",
month = sep,
year = "2017",
address = "Varna, Bulgaria",
publisher = "INCOMA Ltd.",
url = "https://doi.org/10.26615/978-954-452-049-6_104",
doi = "10.26615/978-954-452-049-6_104",
pages = "813--822",
abstract = "Complex Word Identification (CWI) is an important task in lexical simplification and text accessibility. Due to the lack of CWI datasets, previous works largely depend on Simple English Wikipedia and edit histories for obtaining {`}gold standard{'} annotations, which are of doubtable quality, and limited only to English. We collect complex words/phrases (CP) for English, German and Spanish, annotated by both native and non-native speakers, and propose language independent features that can be used to train multilingual and cross-lingual CWI models. We show that the performance of cross-lingual CWI systems (using a model trained on one language and applying it on the other languages) is comparable to the performance of monolingual CWI systems.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yimam-etal-2017-multilingual">
<titleInfo>
<title>Multilingual and Cross-Lingual Complex Word Identification</title>
</titleInfo>
<name type="personal">
<namePart type="given">Seid</namePart>
<namePart type="given">Muhie</namePart>
<namePart type="family">Yimam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sanja</namePart>
<namePart type="family">Štajner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Riedl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chris</namePart>
<namePart type="family">Biemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ruslan</namePart>
<namePart type="family">Mitkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>INCOMA Ltd.</publisher>
<place>
<placeTerm type="text">Varna, Bulgaria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Complex Word Identification (CWI) is an important task in lexical simplification and text accessibility. Due to the lack of CWI datasets, previous works largely depend on Simple English Wikipedia and edit histories for obtaining ‘gold standard’ annotations, which are of doubtable quality, and limited only to English. We collect complex words/phrases (CP) for English, German and Spanish, annotated by both native and non-native speakers, and propose language independent features that can be used to train multilingual and cross-lingual CWI models. We show that the performance of cross-lingual CWI systems (using a model trained on one language and applying it on the other languages) is comparable to the performance of monolingual CWI systems.</abstract>
<identifier type="citekey">yimam-etal-2017-multilingual</identifier>
<identifier type="doi">10.26615/978-954-452-049-6_104</identifier>
<part>
<date>2017-09</date>
<extent unit="page">
<start>813</start>
<end>822</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multilingual and Cross-Lingual Complex Word Identification
%A Yimam, Seid Muhie
%A Štajner, Sanja
%A Riedl, Martin
%A Biemann, Chris
%Y Mitkov, Ruslan
%Y Angelova, Galia
%S Proceedings of the International Conference Recent Advances in Natural Language Processing, RANLP 2017
%D 2017
%8 September
%I INCOMA Ltd.
%C Varna, Bulgaria
%F yimam-etal-2017-multilingual
%X Complex Word Identification (CWI) is an important task in lexical simplification and text accessibility. Due to the lack of CWI datasets, previous works largely depend on Simple English Wikipedia and edit histories for obtaining ‘gold standard’ annotations, which are of doubtable quality, and limited only to English. We collect complex words/phrases (CP) for English, German and Spanish, annotated by both native and non-native speakers, and propose language independent features that can be used to train multilingual and cross-lingual CWI models. We show that the performance of cross-lingual CWI systems (using a model trained on one language and applying it on the other languages) is comparable to the performance of monolingual CWI systems.
%R 10.26615/978-954-452-049-6_104
%U https://doi.org/10.26615/978-954-452-049-6_104
%P 813-822
Markdown (Informal)
[Multilingual and Cross-Lingual Complex Word Identification](https://doi.org/10.26615/978-954-452-049-6_104) (Yimam et al., RANLP 2017)
ACL