@inproceedings{espla-gomis-etal-2014-comparing,
    title = "Comparing two acquisition systems for automatically building an {E}nglish{---}{C}roatian parallel corpus from multilingual websites",
    author = "Espl{\`a}-Gomis, Miquel  and
      Klubi{\v{c}}ka, Filip  and
      Ljube{\v{s}}i{\'c}, Nikola  and
      Ortiz-Rojas, Sergio  and
      Papavassiliou, Vassilis  and
      Prokopidis, Prokopis",
    editor = "Calzolari, Nicoletta  and
      Choukri, Khalid  and
      Declerck, Thierry  and
      Loftsson, Hrafn  and
      Maegaard, Bente  and
      Mariani, Joseph  and
      Moreno, Asuncion  and
      Odijk, Jan  and
      Piperidis, Stelios",
    booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
    month = may,
    year = "2014",
    address = "Reykjavik, Iceland",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://aclanthology.org/L14-1437/",
    pages = "1252--1258",
    abstract = "In this paper we compare two tools for automatically harvesting bitexts from multilingual websites: bitextor and ILSP-FC. We used both tools for crawling 21 multilingual websites from the tourism domain to build a domain-specific English{\textemdash}Croatian parallel corpus. Different settings were tried for both tools and 10,662 unique document pairs were obtained. A sample of about 10{\%} of them was manually examined and the success rate was computed on the collection of pairs of documents detected by each setting. We compare the performance of the settings and the amount of different corpora detected by each setting. In addition, we describe the resource obtained, both by the settings and through the human evaluation, which has been released as a high-quality parallel corpus."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="espla-gomis-etal-2014-comparing">
    <titleInfo>
        <title>Comparing two acquisition systems for automatically building an English—Croatian parallel corpus from multilingual websites</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Miquel</namePart>
        <namePart type="family">Esplà-Gomis</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Filip</namePart>
        <namePart type="family">Klubička</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Nikola</namePart>
        <namePart type="family">Ljubešić</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Sergio</namePart>
        <namePart type="family">Ortiz-Rojas</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Vassilis</namePart>
        <namePart type="family">Papavassiliou</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Prokopis</namePart>
        <namePart type="family">Prokopidis</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2014-05</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Nicoletta</namePart>
            <namePart type="family">Calzolari</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Khalid</namePart>
            <namePart type="family">Choukri</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Thierry</namePart>
            <namePart type="family">Declerck</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Hrafn</namePart>
            <namePart type="family">Loftsson</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Bente</namePart>
            <namePart type="family">Maegaard</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Joseph</namePart>
            <namePart type="family">Mariani</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Asuncion</namePart>
            <namePart type="family">Moreno</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Jan</namePart>
            <namePart type="family">Odijk</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Stelios</namePart>
            <namePart type="family">Piperidis</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>European Language Resources Association (ELRA)</publisher>
            <place>
                <placeTerm type="text">Reykjavik, Iceland</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>In this paper we compare two tools for automatically harvesting bitexts from multilingual websites: bitextor and ILSP-FC. We used both tools for crawling 21 multilingual websites from the tourism domain to build a domain-specific English—Croatian parallel corpus. Different settings were tried for both tools and 10,662 unique document pairs were obtained. A sample of about 10% of them was manually examined and the success rate was computed on the collection of pairs of documents detected by each setting. We compare the performance of the settings and the amount of different corpora detected by each setting. In addition, we describe the resource obtained, both by the settings and through the human evaluation, which has been released as a high-quality parallel corpus.</abstract>
    <identifier type="citekey">espla-gomis-etal-2014-comparing</identifier>
    <location>
        <url>https://aclanthology.org/L14-1437/</url>
    </location>
    <part>
        <date>2014-05</date>
        <extent unit="page">
            <start>1252</start>
            <end>1258</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Comparing two acquisition systems for automatically building an English—Croatian parallel corpus from multilingual websites
%A Esplà-Gomis, Miquel
%A Klubička, Filip
%A Ljubešić, Nikola
%A Ortiz-Rojas, Sergio
%A Papavassiliou, Vassilis
%A Prokopidis, Prokopis
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F espla-gomis-etal-2014-comparing
%X In this paper we compare two tools for automatically harvesting bitexts from multilingual websites: bitextor and ILSP-FC. We used both tools for crawling 21 multilingual websites from the tourism domain to build a domain-specific English—Croatian parallel corpus. Different settings were tried for both tools and 10,662 unique document pairs were obtained. A sample of about 10% of them was manually examined and the success rate was computed on the collection of pairs of documents detected by each setting. We compare the performance of the settings and the amount of different corpora detected by each setting. In addition, we describe the resource obtained, both by the settings and through the human evaluation, which has been released as a high-quality parallel corpus.
%U https://aclanthology.org/L14-1437/
%P 1252-1258
Markdown (Informal)
[Comparing two acquisition systems for automatically building an English—Croatian parallel corpus from multilingual websites](https://aclanthology.org/L14-1437/) (Esplà-Gomis et al., LREC 2014)
ACL