@inproceedings{nguyen-son-etal-2019-detecting,
title = "Detecting Machine-Translated Text using Back Translation",
author = "Nguyen-Son, Hoang-Quoc and
Tran Phuong, Thao and
Hidano, Seira and
Kiyomoto, Shinsaku",
editor = "van Deemter, Kees and
Lin, Chenghua and
Takamura, Hiroya",
booktitle = "Proceedings of the 12th International Conference on Natural Language Generation",
month = oct # "–" # nov,
year = "2019",
address = "Tokyo, Japan",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-8626/",
doi = "10.18653/v1/W19-8626",
pages = "189--197",
abstract = "Machine-translated text plays a crucial role in the communication of people using different languages. However, adversaries can use such text for malicious purposes such as plagiarism and fake review. The existing methods detected a machine-translated text only using the text`s intrinsic content, but they are unsuitable for classifying the machine-translated and human-written texts with the same meanings. We have proposed a method to extract features used to distinguish machine/human text based on the similarity between the intrinsic text and its back-translation. The evaluation of detecting translated sentences with French shows that our method achieves 75.0{\%} of both accuracy and F-score. It outperforms the existing methods whose the best accuracy is 62.8{\%} and the F-score is 62.7{\%}. The proposed method even detects more efficiently the back-translated text with 83.4{\%} of accuracy, which is higher than 66.7{\%} of the best previous accuracy. We also achieve similar results not only with F-score but also with similar experiments related to Japanese. Moreover, we prove that our detector can recognize both machine-translated and machine-back-translated texts without the language information which is used to generate these machine texts. It demonstrates the persistence of our method in various applications in both low- and rich-resource languages."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="nguyen-son-etal-2019-detecting">
<titleInfo>
<title>Detecting Machine-Translated Text using Back Translation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hoang-Quoc</namePart>
<namePart type="family">Nguyen-Son</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thao</namePart>
<namePart type="family">Tran Phuong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Seira</namePart>
<namePart type="family">Hidano</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shinsaku</namePart>
<namePart type="family">Kiyomoto</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-oct–nov</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 12th International Conference on Natural Language Generation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kees</namePart>
<namePart type="family">van Deemter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chenghua</namePart>
<namePart type="family">Lin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hiroya</namePart>
<namePart type="family">Takamura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Tokyo, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Machine-translated text plays a crucial role in the communication of people using different languages. However, adversaries can use such text for malicious purposes such as plagiarism and fake review. The existing methods detected a machine-translated text only using the text‘s intrinsic content, but they are unsuitable for classifying the machine-translated and human-written texts with the same meanings. We have proposed a method to extract features used to distinguish machine/human text based on the similarity between the intrinsic text and its back-translation. The evaluation of detecting translated sentences with French shows that our method achieves 75.0% of both accuracy and F-score. It outperforms the existing methods whose the best accuracy is 62.8% and the F-score is 62.7%. The proposed method even detects more efficiently the back-translated text with 83.4% of accuracy, which is higher than 66.7% of the best previous accuracy. We also achieve similar results not only with F-score but also with similar experiments related to Japanese. Moreover, we prove that our detector can recognize both machine-translated and machine-back-translated texts without the language information which is used to generate these machine texts. It demonstrates the persistence of our method in various applications in both low- and rich-resource languages.</abstract>
<identifier type="citekey">nguyen-son-etal-2019-detecting</identifier>
<identifier type="doi">10.18653/v1/W19-8626</identifier>
<location>
<url>https://aclanthology.org/W19-8626/</url>
</location>
<part>
<date>2019-oct–nov</date>
<extent unit="page">
<start>189</start>
<end>197</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Detecting Machine-Translated Text using Back Translation
%A Nguyen-Son, Hoang-Quoc
%A Tran Phuong, Thao
%A Hidano, Seira
%A Kiyomoto, Shinsaku
%Y van Deemter, Kees
%Y Lin, Chenghua
%Y Takamura, Hiroya
%S Proceedings of the 12th International Conference on Natural Language Generation
%D 2019
%8 oct–nov
%I Association for Computational Linguistics
%C Tokyo, Japan
%F nguyen-son-etal-2019-detecting
%X Machine-translated text plays a crucial role in the communication of people using different languages. However, adversaries can use such text for malicious purposes such as plagiarism and fake review. The existing methods detected a machine-translated text only using the text‘s intrinsic content, but they are unsuitable for classifying the machine-translated and human-written texts with the same meanings. We have proposed a method to extract features used to distinguish machine/human text based on the similarity between the intrinsic text and its back-translation. The evaluation of detecting translated sentences with French shows that our method achieves 75.0% of both accuracy and F-score. It outperforms the existing methods whose the best accuracy is 62.8% and the F-score is 62.7%. The proposed method even detects more efficiently the back-translated text with 83.4% of accuracy, which is higher than 66.7% of the best previous accuracy. We also achieve similar results not only with F-score but also with similar experiments related to Japanese. Moreover, we prove that our detector can recognize both machine-translated and machine-back-translated texts without the language information which is used to generate these machine texts. It demonstrates the persistence of our method in various applications in both low- and rich-resource languages.
%R 10.18653/v1/W19-8626
%U https://aclanthology.org/W19-8626/
%U https://doi.org/10.18653/v1/W19-8626
%P 189-197
Markdown (Informal)
[Detecting Machine-Translated Text using Back Translation](https://aclanthology.org/W19-8626/) (Nguyen-Son et al., INLG 2019)
ACL
- Hoang-Quoc Nguyen-Son, Thao Tran Phuong, Seira Hidano, and Shinsaku Kiyomoto. 2019. Detecting Machine-Translated Text using Back Translation. In Proceedings of the 12th International Conference on Natural Language Generation, pages 189–197, Tokyo, Japan. Association for Computational Linguistics.