@article{lui-etal-2014-automatic,
    title = "Automatic Detection and Language Identification of Multilingual Documents",
    author = "Lui, Marco  and
      Lau, Jey Han  and
      Baldwin, Timothy",
    editor = "Lin, Dekang  and
      Collins, Michael  and
      Lee, Lillian",
    journal = "Transactions of the Association for Computational Linguistics",
    volume = "2",
    year = "2014",
    address = "Cambridge, MA",
    publisher = "MIT Press",
    url = "https://aclanthology.org/Q14-1003/",
    doi = "10.1162/tacl_a_00163",
    pages = "27--40",
    abstract = "Language identification is the task of automatically detecting the language(s) present in a document based on the content of the document. In this work, we address the problem of detecting documents that contain text from more than one language (multilingual documents). We introduce a method that is able to detect that a document is multilingual, identify the languages present, and estimate their relative proportions. We demonstrate the effectiveness of our method over synthetic data, as well as real-world multilingual documents collected from the web."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lui-etal-2014-automatic">
    <titleInfo>
        <title>Automatic Detection and Language Identification of Multilingual Documents</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Marco</namePart>
        <namePart type="family">Lui</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Jey</namePart>
        <namePart type="given">Han</namePart>
        <namePart type="family">Lau</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Timothy</namePart>
        <namePart type="family">Baldwin</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2014</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <genre authority="bibutilsgt">journal article</genre>
    <relatedItem type="host">
        <titleInfo>
            <title>Transactions of the Association for Computational Linguistics</title>
        </titleInfo>
        <originInfo>
            <issuance>continuing</issuance>
            <publisher>MIT Press</publisher>
            <place>
                <placeTerm type="text">Cambridge, MA</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">periodical</genre>
        <genre authority="bibutilsgt">academic journal</genre>
    </relatedItem>
    <abstract>Language identification is the task of automatically detecting the language(s) present in a document based on the content of the document. In this work, we address the problem of detecting documents that contain text from more than one language (multilingual documents). We introduce a method that is able to detect that a document is multilingual, identify the languages present, and estimate their relative proportions. We demonstrate the effectiveness of our method over synthetic data, as well as real-world multilingual documents collected from the web.</abstract>
    <identifier type="citekey">lui-etal-2014-automatic</identifier>
    <identifier type="doi">10.1162/tacl_a_00163</identifier>
    <location>
        <url>https://aclanthology.org/Q14-1003/</url>
    </location>
    <part>
        <date>2014</date>
        <detail type="volume"><number>2</number></detail>
        <extent unit="page">
            <start>27</start>
            <end>40</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Journal Article
%T Automatic Detection and Language Identification of Multilingual Documents
%A Lui, Marco
%A Lau, Jey Han
%A Baldwin, Timothy
%J Transactions of the Association for Computational Linguistics
%D 2014
%V 2
%I MIT Press
%C Cambridge, MA
%F lui-etal-2014-automatic
%X Language identification is the task of automatically detecting the language(s) present in a document based on the content of the document. In this work, we address the problem of detecting documents that contain text from more than one language (multilingual documents). We introduce a method that is able to detect that a document is multilingual, identify the languages present, and estimate their relative proportions. We demonstrate the effectiveness of our method over synthetic data, as well as real-world multilingual documents collected from the web.
%R 10.1162/tacl_a_00163
%U https://aclanthology.org/Q14-1003/
%U https://doi.org/10.1162/tacl_a_00163
%P 27-40
Markdown (Informal)
[Automatic Detection and Language Identification of Multilingual Documents](https://aclanthology.org/Q14-1003/) (Lui et al., TACL 2014)
ACL