@inproceedings{dichy-farghaly-2003-roots,
title = "Roots {\&} patterns vs. stems plus grammar-lexis specifications: on what basis should a multilingual database centred on {A}rabic be built?",
author = "Dichy, Joseph and
Farghaly, Ali",
booktitle = "Workshop on Machine Translation for Semitic languages: issues and approaches",
month = sep # " 23-27",
year = "2003",
address = "New Orleans, USA",
url = "https://aclanthology.org/2003.mtsummit-semit.5",
abstract = "Machine translation engines draw on various types of databases. This paper is concerned with Arabic as a source or target language, and focuses on lexical databases. The non-concatenative nature of Arabic morphology, the complex structure of Arabic word-forms, and the general use of vowel-free writing present a real challenge to NLP developers. We show here how and why a stem-grounded lexical database, the items of which are associated with grammar-lexis specifications {--} as opposed to a root-{\&}-pattern database {--}, is motivated both linguistically and with regards to efficiency, economy and modularity. Arguments in favour of databases relying on stems associated with grammar-lexis specifications (such as DIINAR.1 or the Arabic dB under development at SYSTRAN), rather than on roots and patterns, are the following: (a) The latter include huge numbers of rule-generated word-forms, which do not actually appear in the language. (b) Rule-generated lemmas {--} as opposed to existing ones {--} are widely under-specified with regards to grammar-lexis relations. (c) In a Semitic language such as Arabic, the mapping of grammar-lexis specifications that need to be associated with every lexical entry of the database is decisive. (d) These specifications can only be included in a stem-based dB. Points (a) to (d) are crucial and in the context of machine translation involving Arabic.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dichy-farghaly-2003-roots">
<titleInfo>
<title>Roots & patterns vs. stems plus grammar-lexis specifications: on what basis should a multilingual database centred on Arabic be built?</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Dichy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ali</namePart>
<namePart type="family">Farghaly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2003-sep 23-27</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Workshop on Machine Translation for Semitic languages: issues and approaches</title>
</titleInfo>
<originInfo>
<place>
<placeTerm type="text">New Orleans, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine translation engines draw on various types of databases. This paper is concerned with Arabic as a source or target language, and focuses on lexical databases. The non-concatenative nature of Arabic morphology, the complex structure of Arabic word-forms, and the general use of vowel-free writing present a real challenge to NLP developers. We show here how and why a stem-grounded lexical database, the items of which are associated with grammar-lexis specifications – as opposed to a root-&-pattern database –, is motivated both linguistically and with regards to efficiency, economy and modularity. Arguments in favour of databases relying on stems associated with grammar-lexis specifications (such as DIINAR.1 or the Arabic dB under development at SYSTRAN), rather than on roots and patterns, are the following: (a) The latter include huge numbers of rule-generated word-forms, which do not actually appear in the language. (b) Rule-generated lemmas – as opposed to existing ones – are widely under-specified with regards to grammar-lexis relations. (c) In a Semitic language such as Arabic, the mapping of grammar-lexis specifications that need to be associated with every lexical entry of the database is decisive. (d) These specifications can only be included in a stem-based dB. Points (a) to (d) are crucial and in the context of machine translation involving Arabic.</abstract>
<identifier type="citekey">dichy-farghaly-2003-roots</identifier>
<location>
<url>https://aclanthology.org/2003.mtsummit-semit.5</url>
</location>
<part>
<date>2003-sep 23-27</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Roots & patterns vs. stems plus grammar-lexis specifications: on what basis should a multilingual database centred on Arabic be built?
%A Dichy, Joseph
%A Farghaly, Ali
%S Workshop on Machine Translation for Semitic languages: issues and approaches
%D 2003
%8 sep 23 27
%C New Orleans, USA
%F dichy-farghaly-2003-roots
%X Machine translation engines draw on various types of databases. This paper is concerned with Arabic as a source or target language, and focuses on lexical databases. The non-concatenative nature of Arabic morphology, the complex structure of Arabic word-forms, and the general use of vowel-free writing present a real challenge to NLP developers. We show here how and why a stem-grounded lexical database, the items of which are associated with grammar-lexis specifications – as opposed to a root-&-pattern database –, is motivated both linguistically and with regards to efficiency, economy and modularity. Arguments in favour of databases relying on stems associated with grammar-lexis specifications (such as DIINAR.1 or the Arabic dB under development at SYSTRAN), rather than on roots and patterns, are the following: (a) The latter include huge numbers of rule-generated word-forms, which do not actually appear in the language. (b) Rule-generated lemmas – as opposed to existing ones – are widely under-specified with regards to grammar-lexis relations. (c) In a Semitic language such as Arabic, the mapping of grammar-lexis specifications that need to be associated with every lexical entry of the database is decisive. (d) These specifications can only be included in a stem-based dB. Points (a) to (d) are crucial and in the context of machine translation involving Arabic.
%U https://aclanthology.org/2003.mtsummit-semit.5
Markdown (Informal)
[Roots & patterns vs. stems plus grammar-lexis specifications: on what basis should a multilingual database centred on Arabic be built?](https://aclanthology.org/2003.mtsummit-semit.5) (Dichy & Farghaly, MTSummit 2003)
ACL