@inproceedings{younes-weeds-2020-embed,
title = "Embed More Ignore Less ({EMIL}): Exploiting Enriched Representations for {A}rabic {NLP}",
author = "Younes, Ahmed and
Weeds, Julie",
editor = "Zitouni, Imed and
Abdul-Mageed, Muhammad and
Bouamor, Houda and
Bougares, Fethi and
El-Haj, Mahmoud and
Tomeh, Nadi and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fifth Arabic Natural Language Processing Workshop",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.wanlp-1.13/",
pages = "139--154",
abstract = "Our research focuses on the potential improvements of exploiting language specific characteristics in the form of embeddings by neural networks. More specifically, we investigate the capability of neural techniques and embeddings to represent language specific characteristics in two sequence labeling tasks: named entity recognition (NER) and part of speech (POS) tagging. In both tasks, our preprocessing is designed to use enriched Arabic representation by adding diacritics to undiacritized text. In POS tagging, we test the ability of a neural model to capture syntactic characteristics encoded within these diacritics by incorporating an embedding layer for diacritics alongside embedding layers for words and characters. In NER, our architecture incorporates diacritic and POS embeddings alongside word and character embeddings. Our experiments are conducted on 7 datasets (4 NER and 3 POS). We show that embedding the information that is encoded in automatically acquired Arabic diacritics improves the performance across all datasets on both tasks. Embedding the information in automatically assigned POS tags further improves performance on the NER task."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="younes-weeds-2020-embed">
<titleInfo>
<title>Embed More Ignore Less (EMIL): Exploiting Enriched Representations for Arabic NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ahmed</namePart>
<namePart type="family">Younes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julie</namePart>
<namePart type="family">Weeds</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth Arabic Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Muhammad</namePart>
<namePart type="family">Abdul-Mageed</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmoud</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Our research focuses on the potential improvements of exploiting language specific characteristics in the form of embeddings by neural networks. More specifically, we investigate the capability of neural techniques and embeddings to represent language specific characteristics in two sequence labeling tasks: named entity recognition (NER) and part of speech (POS) tagging. In both tasks, our preprocessing is designed to use enriched Arabic representation by adding diacritics to undiacritized text. In POS tagging, we test the ability of a neural model to capture syntactic characteristics encoded within these diacritics by incorporating an embedding layer for diacritics alongside embedding layers for words and characters. In NER, our architecture incorporates diacritic and POS embeddings alongside word and character embeddings. Our experiments are conducted on 7 datasets (4 NER and 3 POS). We show that embedding the information that is encoded in automatically acquired Arabic diacritics improves the performance across all datasets on both tasks. Embedding the information in automatically assigned POS tags further improves performance on the NER task.</abstract>
<identifier type="citekey">younes-weeds-2020-embed</identifier>
<location>
<url>https://aclanthology.org/2020.wanlp-1.13/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>139</start>
<end>154</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Embed More Ignore Less (EMIL): Exploiting Enriched Representations for Arabic NLP
%A Younes, Ahmed
%A Weeds, Julie
%Y Zitouni, Imed
%Y Abdul-Mageed, Muhammad
%Y Bouamor, Houda
%Y Bougares, Fethi
%Y El-Haj, Mahmoud
%Y Tomeh, Nadi
%Y Zaghouani, Wajdi
%S Proceedings of the Fifth Arabic Natural Language Processing Workshop
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain (Online)
%F younes-weeds-2020-embed
%X Our research focuses on the potential improvements of exploiting language specific characteristics in the form of embeddings by neural networks. More specifically, we investigate the capability of neural techniques and embeddings to represent language specific characteristics in two sequence labeling tasks: named entity recognition (NER) and part of speech (POS) tagging. In both tasks, our preprocessing is designed to use enriched Arabic representation by adding diacritics to undiacritized text. In POS tagging, we test the ability of a neural model to capture syntactic characteristics encoded within these diacritics by incorporating an embedding layer for diacritics alongside embedding layers for words and characters. In NER, our architecture incorporates diacritic and POS embeddings alongside word and character embeddings. Our experiments are conducted on 7 datasets (4 NER and 3 POS). We show that embedding the information that is encoded in automatically acquired Arabic diacritics improves the performance across all datasets on both tasks. Embedding the information in automatically assigned POS tags further improves performance on the NER task.
%U https://aclanthology.org/2020.wanlp-1.13/
%P 139-154
Markdown (Informal)
[Embed More Ignore Less (EMIL): Exploiting Enriched Representations for Arabic NLP](https://aclanthology.org/2020.wanlp-1.13/) (Younes & Weeds, WANLP 2020)
ACL