@inproceedings{lippincott-etal-2019-jhu,
title = "{JHU} System Description for the {MADAR} {A}rabic Dialect Identification Shared Task",
author = "Lippincott, Tom and
Shapiro, Pamela and
Duh, Kevin and
McNamee, Paul",
editor = "El-Hajj, Wassim and
Belguith, Lamia Hadrich and
Bougares, Fethi and
Magdy, Walid and
Zitouni, Imed and
Tomeh, Nadi and
El-Haj, Mahmoud and
Zaghouani, Wajdi",
booktitle = "Proceedings of the Fourth Arabic Natural Language Processing Workshop",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4634",
doi = "10.18653/v1/W19-4634",
pages = "264--268",
abstract = "Our submission to the MADAR shared task on Arabic dialect identification employed a language modeling technique called Prediction by Partial Matching, an ensemble of neural architectures, and sources of additional data for training word embeddings and auxiliary language models. We found several of these techniques provided small boosts in performance, though a simple character-level language model was a strong baseline, and a lower-order LM achieved best performance on Subtask 2. Interestingly, word embeddings provided no consistent benefit, and ensembling struggled to outperform the best component submodel. This suggests the variety of architectures are learning redundant information, and future work may focus on encouraging decorrelated learning.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lippincott-etal-2019-jhu">
<titleInfo>
<title>JHU System Description for the MADAR Arabic Dialect Identification Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tom</namePart>
<namePart type="family">Lippincott</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pamela</namePart>
<namePart type="family">Shapiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="family">Duh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Paul</namePart>
<namePart type="family">McNamee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Arabic Natural Language Processing Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wassim</namePart>
<namePart type="family">El-Hajj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lamia</namePart>
<namePart type="given">Hadrich</namePart>
<namePart type="family">Belguith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fethi</namePart>
<namePart type="family">Bougares</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Walid</namePart>
<namePart type="family">Magdy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Imed</namePart>
<namePart type="family">Zitouni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nadi</namePart>
<namePart type="family">Tomeh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mahmoud</namePart>
<namePart type="family">El-Haj</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Our submission to the MADAR shared task on Arabic dialect identification employed a language modeling technique called Prediction by Partial Matching, an ensemble of neural architectures, and sources of additional data for training word embeddings and auxiliary language models. We found several of these techniques provided small boosts in performance, though a simple character-level language model was a strong baseline, and a lower-order LM achieved best performance on Subtask 2. Interestingly, word embeddings provided no consistent benefit, and ensembling struggled to outperform the best component submodel. This suggests the variety of architectures are learning redundant information, and future work may focus on encouraging decorrelated learning.</abstract>
<identifier type="citekey">lippincott-etal-2019-jhu</identifier>
<identifier type="doi">10.18653/v1/W19-4634</identifier>
<location>
<url>https://aclanthology.org/W19-4634</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>264</start>
<end>268</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T JHU System Description for the MADAR Arabic Dialect Identification Shared Task
%A Lippincott, Tom
%A Shapiro, Pamela
%A Duh, Kevin
%A McNamee, Paul
%Y El-Hajj, Wassim
%Y Belguith, Lamia Hadrich
%Y Bougares, Fethi
%Y Magdy, Walid
%Y Zitouni, Imed
%Y Tomeh, Nadi
%Y El-Haj, Mahmoud
%Y Zaghouani, Wajdi
%S Proceedings of the Fourth Arabic Natural Language Processing Workshop
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F lippincott-etal-2019-jhu
%X Our submission to the MADAR shared task on Arabic dialect identification employed a language modeling technique called Prediction by Partial Matching, an ensemble of neural architectures, and sources of additional data for training word embeddings and auxiliary language models. We found several of these techniques provided small boosts in performance, though a simple character-level language model was a strong baseline, and a lower-order LM achieved best performance on Subtask 2. Interestingly, word embeddings provided no consistent benefit, and ensembling struggled to outperform the best component submodel. This suggests the variety of architectures are learning redundant information, and future work may focus on encouraging decorrelated learning.
%R 10.18653/v1/W19-4634
%U https://aclanthology.org/W19-4634
%U https://doi.org/10.18653/v1/W19-4634
%P 264-268
Markdown (Informal)
[JHU System Description for the MADAR Arabic Dialect Identification Shared Task](https://aclanthology.org/W19-4634) (Lippincott et al., WANLP 2019)
ACL