@inproceedings{sanchez-cartagena-etal-2012-choosing,
title = "Choosing the correct paradigm for unknown words in rule-based machine translation systems",
author = "S{\'a}nchez-Cartagena, V. M. and
Espl{\`a}-Gomis, M. and
S{\'a}nchez-Mart{\'\i}nez, F. and
P{\'e}rez-Ortiz, J. A.",
editor = "Espa{\~n}a-Bonet, Cristina and
Ranta, Aarne",
booktitle = "Proceedings of the Third International Workshop on Free/Open-Source Rule-Based Machine Translation",
month = jun # " 13-15",
year = "2012",
address = "Gothenburg, Sweden",
url = "https://aclanthology.org/2012.freeopmt-1.4",
pages = "27--40",
abstract = "Previous work on an interactive system aimed at helping non-expert users to enlarge the monolingual dictionaries of rule-based machine translation (MT) systems worked by discarding those inflection paradigms that cannot generate a set of inflected word forms validated by the user. This method, however, cannot deal with the common case where a set of different paradigms generate exactly the same set of inflected word forms, although with different inflection information attached. In this paper, we propose the use of an n-gram-based model of lexical categories and inflection information to select a single paradigm in cases where more than one paradigm generates the same set of word forms. Results obtained with a Spanish monolingual dictionary show that the correct paradigm is chosen for around 75{\%} of the unknown words, thus making the resulting system (available under an open-source license) of valuable help to enlarge the monolingual dictionaries used in MT involving non-expert users without technical linguistic knowledge.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sanchez-cartagena-etal-2012-choosing">
<titleInfo>
<title>Choosing the correct paradigm for unknown words in rule-based machine translation systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">V</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Sánchez-Cartagena</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">M</namePart>
<namePart type="family">Esplà-Gomis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">F</namePart>
<namePart type="family">Sánchez-Martínez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">J</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Pérez-Ortiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-jun 13-15</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third International Workshop on Free/Open-Source Rule-Based Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cristina</namePart>
<namePart type="family">España-Bonet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aarne</namePart>
<namePart type="family">Ranta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<place>
<placeTerm type="text">Gothenburg, Sweden</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Previous work on an interactive system aimed at helping non-expert users to enlarge the monolingual dictionaries of rule-based machine translation (MT) systems worked by discarding those inflection paradigms that cannot generate a set of inflected word forms validated by the user. This method, however, cannot deal with the common case where a set of different paradigms generate exactly the same set of inflected word forms, although with different inflection information attached. In this paper, we propose the use of an n-gram-based model of lexical categories and inflection information to select a single paradigm in cases where more than one paradigm generates the same set of word forms. Results obtained with a Spanish monolingual dictionary show that the correct paradigm is chosen for around 75% of the unknown words, thus making the resulting system (available under an open-source license) of valuable help to enlarge the monolingual dictionaries used in MT involving non-expert users without technical linguistic knowledge.</abstract>
<identifier type="citekey">sanchez-cartagena-etal-2012-choosing</identifier>
<location>
<url>https://aclanthology.org/2012.freeopmt-1.4</url>
</location>
<part>
<date>2012-jun 13-15</date>
<extent unit="page">
<start>27</start>
<end>40</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Choosing the correct paradigm for unknown words in rule-based machine translation systems
%A Sánchez-Cartagena, V. M.
%A Esplà-Gomis, M.
%A Sánchez-Martínez, F.
%A Pérez-Ortiz, J. A.
%Y España-Bonet, Cristina
%Y Ranta, Aarne
%S Proceedings of the Third International Workshop on Free/Open-Source Rule-Based Machine Translation
%D 2012
%8 jun 13 15
%C Gothenburg, Sweden
%F sanchez-cartagena-etal-2012-choosing
%X Previous work on an interactive system aimed at helping non-expert users to enlarge the monolingual dictionaries of rule-based machine translation (MT) systems worked by discarding those inflection paradigms that cannot generate a set of inflected word forms validated by the user. This method, however, cannot deal with the common case where a set of different paradigms generate exactly the same set of inflected word forms, although with different inflection information attached. In this paper, we propose the use of an n-gram-based model of lexical categories and inflection information to select a single paradigm in cases where more than one paradigm generates the same set of word forms. Results obtained with a Spanish monolingual dictionary show that the correct paradigm is chosen for around 75% of the unknown words, thus making the resulting system (available under an open-source license) of valuable help to enlarge the monolingual dictionaries used in MT involving non-expert users without technical linguistic knowledge.
%U https://aclanthology.org/2012.freeopmt-1.4
%P 27-40
Markdown (Informal)
[Choosing the correct paradigm for unknown words in rule-based machine translation systems](https://aclanthology.org/2012.freeopmt-1.4) (Sánchez-Cartagena et al., FreeOpMT 2012)
ACL