@inproceedings{yang-lepage-2016-improving,
    title = "Improving Patent Translation using Bilingual Term Extraction and Re-tokenization for {C}hinese{--}{J}apanese",
    author = "Yang, Wei  and
      Lepage, Yves",
    editor = "Nakazawa, Toshiaki  and
      Mino, Hideya  and
      Ding, Chenchen  and
      Goto, Isao  and
      Neubig, Graham  and
      Kurohashi, Sadao  and
      Riza, Ir. Hammam  and
      Bhattacharyya, Pushpak",
    booktitle = "Proceedings of the 3rd Workshop on {A}sian Translation ({WAT}2016)",
    month = dec,
    year = "2016",
    address = "Osaka, Japan",
    publisher = "The COLING 2016 Organizing Committee",
    url = "https://aclanthology.org/W16-4619/",
    pages = "194--202",
    abstract = "Unlike European languages, many Asian languages like Chinese and Japanese do not have typographic boundaries in written system. Word segmentation (tokenization) that break sentences down into individual words (tokens) is normally treated as the first step for machine translation (MT). For Chinese and Japanese, different rules and segmentation tools lead different segmentation results in different level of granularity between Chinese and Japanese. To improve the translation accuracy, we adjust and balance the granularity of segmentation results around terms for Chinese{--}Japanese patent corpus for training translation model. In this paper, we describe a statistical machine translation (SMT) system which is built on re-tokenized Chinese-Japanese patent training corpus using extracted bilingual multi-word terms."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yang-lepage-2016-improving">
    <titleInfo>
        <title>Improving Patent Translation using Bilingual Term Extraction and Re-tokenization for Chinese–Japanese</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Wei</namePart>
        <namePart type="family">Yang</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Yves</namePart>
        <namePart type="family">Lepage</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2016-12</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 3rd Workshop on Asian Translation (WAT2016)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Toshiaki</namePart>
            <namePart type="family">Nakazawa</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Hideya</namePart>
            <namePart type="family">Mino</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Chenchen</namePart>
            <namePart type="family">Ding</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Isao</namePart>
            <namePart type="family">Goto</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Graham</namePart>
            <namePart type="family">Neubig</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Sadao</namePart>
            <namePart type="family">Kurohashi</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Ir.</namePart>
            <namePart type="given">Hammam</namePart>
            <namePart type="family">Riza</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Pushpak</namePart>
            <namePart type="family">Bhattacharyya</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>The COLING 2016 Organizing Committee</publisher>
            <place>
                <placeTerm type="text">Osaka, Japan</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>Unlike European languages, many Asian languages like Chinese and Japanese do not have typographic boundaries in written system. Word segmentation (tokenization) that break sentences down into individual words (tokens) is normally treated as the first step for machine translation (MT). For Chinese and Japanese, different rules and segmentation tools lead different segmentation results in different level of granularity between Chinese and Japanese. To improve the translation accuracy, we adjust and balance the granularity of segmentation results around terms for Chinese–Japanese patent corpus for training translation model. In this paper, we describe a statistical machine translation (SMT) system which is built on re-tokenized Chinese-Japanese patent training corpus using extracted bilingual multi-word terms.</abstract>
    <identifier type="citekey">yang-lepage-2016-improving</identifier>
    <location>
        <url>https://aclanthology.org/W16-4619/</url>
    </location>
    <part>
        <date>2016-12</date>
        <extent unit="page">
            <start>194</start>
            <end>202</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Improving Patent Translation using Bilingual Term Extraction and Re-tokenization for Chinese–Japanese
%A Yang, Wei
%A Lepage, Yves
%Y Nakazawa, Toshiaki
%Y Mino, Hideya
%Y Ding, Chenchen
%Y Goto, Isao
%Y Neubig, Graham
%Y Kurohashi, Sadao
%Y Riza, Ir. Hammam
%Y Bhattacharyya, Pushpak
%S Proceedings of the 3rd Workshop on Asian Translation (WAT2016)
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F yang-lepage-2016-improving
%X Unlike European languages, many Asian languages like Chinese and Japanese do not have typographic boundaries in written system. Word segmentation (tokenization) that break sentences down into individual words (tokens) is normally treated as the first step for machine translation (MT). For Chinese and Japanese, different rules and segmentation tools lead different segmentation results in different level of granularity between Chinese and Japanese. To improve the translation accuracy, we adjust and balance the granularity of segmentation results around terms for Chinese–Japanese patent corpus for training translation model. In this paper, we describe a statistical machine translation (SMT) system which is built on re-tokenized Chinese-Japanese patent training corpus using extracted bilingual multi-word terms.
%U https://aclanthology.org/W16-4619/
%P 194-202
Markdown (Informal)
[Improving Patent Translation using Bilingual Term Extraction and Re-tokenization for Chinese–Japanese](https://aclanthology.org/W16-4619/) (Yang & Lepage, WAT 2016)
ACL