@inproceedings{colakoglu-etal-2019-normalizing,
title = "Normalizing Non-canonical {T}urkish Texts Using Machine Translation Approaches",
author = {{\c{C}}olako{\u{g}}lu, Talha and
Sulubacak, Umut and
Tantu{\u{g}}, Ahmet C{\"u}neyd},
editor = "Alva-Manchego, Fernando and
Choi, Eunsol and
Khashabi, Daniel",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/P19-2037",
doi = "10.18653/v1/P19-2037",
pages = "267--272",
abstract = "With the growth of the social web, user-generated text data has reached unprecedented sizes. Non-canonical text normalization provides a way to exploit this as a practical source of training data for language processing systems. The state of the art in Turkish text normalization is composed of a token level pipeline of modules, heavily dependent on external linguistic resources and manually defined rules. Instead, we propose a fully automated, context-aware machine translation approach with fewer stages of processing. Experiments with various implementations of our approach show that we are able to surpass the current best-performing system by a large margin.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="colakoglu-etal-2019-normalizing">
<titleInfo>
<title>Normalizing Non-canonical Turkish Texts Using Machine Translation Approaches</title>
</titleInfo>
<name type="personal">
<namePart type="given">Talha</namePart>
<namePart type="family">Çolakoğlu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Umut</namePart>
<namePart type="family">Sulubacak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ahmet</namePart>
<namePart type="given">Cüneyd</namePart>
<namePart type="family">Tantuğ</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fernando</namePart>
<namePart type="family">Alva-Manchego</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eunsol</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Khashabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>With the growth of the social web, user-generated text data has reached unprecedented sizes. Non-canonical text normalization provides a way to exploit this as a practical source of training data for language processing systems. The state of the art in Turkish text normalization is composed of a token level pipeline of modules, heavily dependent on external linguistic resources and manually defined rules. Instead, we propose a fully automated, context-aware machine translation approach with fewer stages of processing. Experiments with various implementations of our approach show that we are able to surpass the current best-performing system by a large margin.</abstract>
<identifier type="citekey">colakoglu-etal-2019-normalizing</identifier>
<identifier type="doi">10.18653/v1/P19-2037</identifier>
<location>
<url>https://aclanthology.org/P19-2037</url>
</location>
<part>
<date>2019-07</date>
<extent unit="page">
<start>267</start>
<end>272</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Normalizing Non-canonical Turkish Texts Using Machine Translation Approaches
%A Çolakoğlu, Talha
%A Sulubacak, Umut
%A Tantuğ, Ahmet Cüneyd
%Y Alva-Manchego, Fernando
%Y Choi, Eunsol
%Y Khashabi, Daniel
%S Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics: Student Research Workshop
%D 2019
%8 July
%I Association for Computational Linguistics
%C Florence, Italy
%F colakoglu-etal-2019-normalizing
%X With the growth of the social web, user-generated text data has reached unprecedented sizes. Non-canonical text normalization provides a way to exploit this as a practical source of training data for language processing systems. The state of the art in Turkish text normalization is composed of a token level pipeline of modules, heavily dependent on external linguistic resources and manually defined rules. Instead, we propose a fully automated, context-aware machine translation approach with fewer stages of processing. Experiments with various implementations of our approach show that we are able to surpass the current best-performing system by a large margin.
%R 10.18653/v1/P19-2037
%U https://aclanthology.org/P19-2037
%U https://doi.org/10.18653/v1/P19-2037
%P 267-272
Markdown (Informal)
[Normalizing Non-canonical Turkish Texts Using Machine Translation Approaches](https://aclanthology.org/P19-2037) (Çolakoğlu et al., ACL 2019)
ACL