@inproceedings{tirosh-becker-etal-2022-part,
title = "Part-of-Speech and Morphological Tagging of {A}lgerian {J}udeo-{A}rabic",
author = "Tirosh-Becker, Ofra and
Kessler, Michal and
Becker, Oren and
Belinkov, Yonatan",
editor = "Derczynski, Leon",
booktitle = "Northern European Journal of Language Technology, Volume 8",
year = "2022",
address = "Copenhagen, Denmark",
publisher = "Northern European Association of Language Technology",
url = "https://aclanthology.org/2022.nejlt-1.7",
doi = "https://doi.org/10.3384/nejlt.2000-1533.2022.4315",
abstract = "Most linguistic studies of Judeo-Arabic, the ensemble of dialects spoken and written by Jews in Arab lands, are qualitative in nature and rely on laborious manual annotation work, and are therefore limited in scale. In this work, we develop automatic methods for morpho-syntactic tagging of Algerian Judeo-Arabic texts published by Algerian Jews in the 19th{--}20th centuries, based on a linguistically tagged corpus. First, we describe our semi-automatic approach for preprocessing these texts. Then, we experiment with both an off-the-shelf morphological tagger and several specially designed neural network taggers. Finally, we perform a real-world evaluation of new texts that were never tagged before in comparison with human expert annotators. Our experimental results demonstrate that these methods can dramatically speed up and improve the linguistic research pipeline, enabling linguists to study these dialects on a much greater scale.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tirosh-becker-etal-2022-part">
<titleInfo>
<title>Part-of-Speech and Morphological Tagging of Algerian Judeo-Arabic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ofra</namePart>
<namePart type="family">Tirosh-Becker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michal</namePart>
<namePart type="family">Kessler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Oren</namePart>
<namePart type="family">Becker</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Northern European Journal of Language Technology, Volume 8</title>
</titleInfo>
<name type="personal">
<namePart type="given">Leon</namePart>
<namePart type="family">Derczynski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Northern European Association of Language Technology</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Most linguistic studies of Judeo-Arabic, the ensemble of dialects spoken and written by Jews in Arab lands, are qualitative in nature and rely on laborious manual annotation work, and are therefore limited in scale. In this work, we develop automatic methods for morpho-syntactic tagging of Algerian Judeo-Arabic texts published by Algerian Jews in the 19th–20th centuries, based on a linguistically tagged corpus. First, we describe our semi-automatic approach for preprocessing these texts. Then, we experiment with both an off-the-shelf morphological tagger and several specially designed neural network taggers. Finally, we perform a real-world evaluation of new texts that were never tagged before in comparison with human expert annotators. Our experimental results demonstrate that these methods can dramatically speed up and improve the linguistic research pipeline, enabling linguists to study these dialects on a much greater scale.</abstract>
<identifier type="citekey">tirosh-becker-etal-2022-part</identifier>
<identifier type="doi">https://doi.org/10.3384/nejlt.2000-1533.2022.4315</identifier>
<location>
<url>https://aclanthology.org/2022.nejlt-1.7</url>
</location>
<part>
<date>2022</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Part-of-Speech and Morphological Tagging of Algerian Judeo-Arabic
%A Tirosh-Becker, Ofra
%A Kessler, Michal
%A Becker, Oren
%A Belinkov, Yonatan
%Y Derczynski, Leon
%S Northern European Journal of Language Technology, Volume 8
%D 2022
%I Northern European Association of Language Technology
%C Copenhagen, Denmark
%F tirosh-becker-etal-2022-part
%X Most linguistic studies of Judeo-Arabic, the ensemble of dialects spoken and written by Jews in Arab lands, are qualitative in nature and rely on laborious manual annotation work, and are therefore limited in scale. In this work, we develop automatic methods for morpho-syntactic tagging of Algerian Judeo-Arabic texts published by Algerian Jews in the 19th–20th centuries, based on a linguistically tagged corpus. First, we describe our semi-automatic approach for preprocessing these texts. Then, we experiment with both an off-the-shelf morphological tagger and several specially designed neural network taggers. Finally, we perform a real-world evaluation of new texts that were never tagged before in comparison with human expert annotators. Our experimental results demonstrate that these methods can dramatically speed up and improve the linguistic research pipeline, enabling linguists to study these dialects on a much greater scale.
%R https://doi.org/10.3384/nejlt.2000-1533.2022.4315
%U https://aclanthology.org/2022.nejlt-1.7
%U https://doi.org/https://doi.org/10.3384/nejlt.2000-1533.2022.4315
Markdown (Informal)
[Part-of-Speech and Morphological Tagging of Algerian Judeo-Arabic](https://aclanthology.org/2022.nejlt-1.7) (Tirosh-Becker et al., NEJLT 2022)
ACL