@inproceedings{stajner-etal-2016-use,
title = "Use of Domain-Specific Language Resources in Machine Translation",
author = "{\v{S}}tajner, Sanja and
Querido, Andreia and
Rendeiro, Nuno and
Rodrigues, Jo{\~a}o Ant{\'o}nio and
Branco, Ant{\'o}nio",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1094",
pages = "592--598",
abstract = "In this paper, we address the problem of Machine Translation (MT) for a specialised domain in a language pair for which only a very small domain-specific parallel corpus is available. We conduct a series of experiments using a purely phrase-based SMT (PBSMT) system and a hybrid MT system (TectoMT), testing three different strategies to overcome the problem of the small amount of in-domain training data. Our results show that adding a small size in-domain bilingual terminology to the small in-domain training corpus leads to the best improvements of a hybrid MT system, while the PBSMT system achieves the best results by adding a combination of in-domain bilingual terminology and a larger out-of-domain corpus. We focus on qualitative human evaluation of the output of two best systems (one for each approach) and perform a systematic in-depth error analysis which revealed advantages of the hybrid MT system over the pure PBSMT system for this specific task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="stajner-etal-2016-use">
<titleInfo>
<title>Use of Domain-Specific Language Resources in Machine Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sanja</namePart>
<namePart type="family">Štajner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreia</namePart>
<namePart type="family">Querido</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nuno</namePart>
<namePart type="family">Rendeiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="given">António</namePart>
<namePart type="family">Rodrigues</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">António</namePart>
<namePart type="family">Branco</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we address the problem of Machine Translation (MT) for a specialised domain in a language pair for which only a very small domain-specific parallel corpus is available. We conduct a series of experiments using a purely phrase-based SMT (PBSMT) system and a hybrid MT system (TectoMT), testing three different strategies to overcome the problem of the small amount of in-domain training data. Our results show that adding a small size in-domain bilingual terminology to the small in-domain training corpus leads to the best improvements of a hybrid MT system, while the PBSMT system achieves the best results by adding a combination of in-domain bilingual terminology and a larger out-of-domain corpus. We focus on qualitative human evaluation of the output of two best systems (one for each approach) and perform a systematic in-depth error analysis which revealed advantages of the hybrid MT system over the pure PBSMT system for this specific task.</abstract>
<identifier type="citekey">stajner-etal-2016-use</identifier>
<location>
<url>https://aclanthology.org/L16-1094</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>592</start>
<end>598</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Use of Domain-Specific Language Resources in Machine Translation
%A Štajner, Sanja
%A Querido, Andreia
%A Rendeiro, Nuno
%A Rodrigues, João António
%A Branco, António
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F stajner-etal-2016-use
%X In this paper, we address the problem of Machine Translation (MT) for a specialised domain in a language pair for which only a very small domain-specific parallel corpus is available. We conduct a series of experiments using a purely phrase-based SMT (PBSMT) system and a hybrid MT system (TectoMT), testing three different strategies to overcome the problem of the small amount of in-domain training data. Our results show that adding a small size in-domain bilingual terminology to the small in-domain training corpus leads to the best improvements of a hybrid MT system, while the PBSMT system achieves the best results by adding a combination of in-domain bilingual terminology and a larger out-of-domain corpus. We focus on qualitative human evaluation of the output of two best systems (one for each approach) and perform a systematic in-depth error analysis which revealed advantages of the hybrid MT system over the pure PBSMT system for this specific task.
%U https://aclanthology.org/L16-1094
%P 592-598
Markdown (Informal)
[Use of Domain-Specific Language Resources in Machine Translation](https://aclanthology.org/L16-1094) (Štajner et al., LREC 2016)
ACL
- Sanja Štajner, Andreia Querido, Nuno Rendeiro, João António Rodrigues, and António Branco. 2016. Use of Domain-Specific Language Resources in Machine Translation. In Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16), pages 592–598, Portorož, Slovenia. European Language Resources Association (ELRA).