@inproceedings{colbath-2012-language,
title = "Language and Translation Challenges in Social Media",
author = "Colbath, Sean",
booktitle = "Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Government MT User Program",
month = oct # " 28-" # nov # " 1",
year = "2012",
address = "San Diego, California, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2012.amta-government.3",
abstract = "The explosive growth of social media has led to a wide range of new challenges for machine translation and language processing. The language used in social media occupies a new space between structured and unstructured media, formal and informal language, and dialect and standard usage. Yet these new platforms have given a digital voice to millions of user on the Internet, giving them the opportunity to communicate on the first truly global stage {--} the Internet. Social media covers a broad category of communications formats, ranging from threaded conversations on Facebook, to microblog and short message content on platforms like Twitter and Weibo {--} but it also includes user-generated comments on YouTube, as well as the contents of the video itself, and even includes {`}traditional{'} blogs and forums. The common thread linking all of these is that the media is generated by, and is targeted at individuals. This talk will survey some of the most popular social media platforms, and identify key challenges in translating the content found in them {--} including dialect, code switching, mixed encodings, the use of {``}internet speak{''}, and platform-specific language phenomena, as well as volume and genre. In addition, we will talk about some of the challenges in analyzing social media from an operational point of view, and how language and translation issues influence higher-level analytic processes such as entity extraction, topic classification and clustering, geo-spatial analysis and other technologies that enable comprehension of social media. These latter capabilities are being adapted for social media analytics for US Government analysts under the support of the Technical Support Working Group at the US DoD, enabling translingual comprehension of this style of content in an operational environment.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="colbath-2012-language">
<titleInfo>
<title>Language and Translation Challenges in Social Media</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sean</namePart>
<namePart type="family">Colbath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-oct 28-nov 1</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Government MT User Program</title>
</titleInfo>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The explosive growth of social media has led to a wide range of new challenges for machine translation and language processing. The language used in social media occupies a new space between structured and unstructured media, formal and informal language, and dialect and standard usage. Yet these new platforms have given a digital voice to millions of user on the Internet, giving them the opportunity to communicate on the first truly global stage – the Internet. Social media covers a broad category of communications formats, ranging from threaded conversations on Facebook, to microblog and short message content on platforms like Twitter and Weibo – but it also includes user-generated comments on YouTube, as well as the contents of the video itself, and even includes ‘traditional’ blogs and forums. The common thread linking all of these is that the media is generated by, and is targeted at individuals. This talk will survey some of the most popular social media platforms, and identify key challenges in translating the content found in them – including dialect, code switching, mixed encodings, the use of “internet speak”, and platform-specific language phenomena, as well as volume and genre. In addition, we will talk about some of the challenges in analyzing social media from an operational point of view, and how language and translation issues influence higher-level analytic processes such as entity extraction, topic classification and clustering, geo-spatial analysis and other technologies that enable comprehension of social media. These latter capabilities are being adapted for social media analytics for US Government analysts under the support of the Technical Support Working Group at the US DoD, enabling translingual comprehension of this style of content in an operational environment.</abstract>
<identifier type="citekey">colbath-2012-language</identifier>
<location>
<url>https://aclanthology.org/2012.amta-government.3</url>
</location>
<part>
<date>2012-oct 28-nov 1</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language and Translation Challenges in Social Media
%A Colbath, Sean
%S Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Government MT User Program
%D 2012
%8 oct 28 nov 1
%I Association for Machine Translation in the Americas
%C San Diego, California, USA
%F colbath-2012-language
%X The explosive growth of social media has led to a wide range of new challenges for machine translation and language processing. The language used in social media occupies a new space between structured and unstructured media, formal and informal language, and dialect and standard usage. Yet these new platforms have given a digital voice to millions of user on the Internet, giving them the opportunity to communicate on the first truly global stage – the Internet. Social media covers a broad category of communications formats, ranging from threaded conversations on Facebook, to microblog and short message content on platforms like Twitter and Weibo – but it also includes user-generated comments on YouTube, as well as the contents of the video itself, and even includes ‘traditional’ blogs and forums. The common thread linking all of these is that the media is generated by, and is targeted at individuals. This talk will survey some of the most popular social media platforms, and identify key challenges in translating the content found in them – including dialect, code switching, mixed encodings, the use of “internet speak”, and platform-specific language phenomena, as well as volume and genre. In addition, we will talk about some of the challenges in analyzing social media from an operational point of view, and how language and translation issues influence higher-level analytic processes such as entity extraction, topic classification and clustering, geo-spatial analysis and other technologies that enable comprehension of social media. These latter capabilities are being adapted for social media analytics for US Government analysts under the support of the Technical Support Working Group at the US DoD, enabling translingual comprehension of this style of content in an operational environment.
%U https://aclanthology.org/2012.amta-government.3
Markdown (Informal)
[Language and Translation Challenges in Social Media](https://aclanthology.org/2012.amta-government.3) (Colbath, AMTA 2012)
ACL
- Sean Colbath. 2012. Language and Translation Challenges in Social Media. In Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Government MT User Program, San Diego, California, USA. Association for Machine Translation in the Americas.