@inproceedings{marsh-2012-return,
title = "Return on Investment for Government Human Language Technology Systems",
author = "Marsh, Elaine",
booktitle = "Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Government MT User Program",
month = oct # " 28-" # nov # " 1",
year = "2012",
address = "San Diego, California, USA",
publisher = "Association for Machine Translation in the Americas",
url = "https://aclanthology.org/2012.amta-government.10",
abstract = "Over the years, the government has translated reams of material, transcribed decades of audio, and processed years of text. Where is that material now? How valuable would it be to have that material available to push research and applications and to support foreign language training? Over 20 years ago, DARPA funded the Linguistic Data Consortium (LDC) at the University of Pennsylvania to collect, catalog, store and provide access to language resources. Since that time, the LDC has collected thousands of corpora in many different genres and languages. Although the government has access to the full range of LDC data through a community license, until recently corpora specific to government needs were usually deleted soon after they were created. In order to address the need for a government-only catalog and repository, the Government Catalog of Language Resources was funded through the ODNI, and an initial prototype has been built. The GCLR will be transferred to a government executive agent who will be responsible for making improvements, adding corpora, and maintaining and sustaining the effort. The purpose of this talk is to present the model behind GCLR, to demonstrate its purpose, and to invite attendees to contribute and use contents. Background leading up to the current version will be presented. Use cases of parallel corpora in teaching, technology development and language maintenance will also be covered. Learning from the LDC on how corpora are used, and linking with the LDC will be part of future directions to enable government applications to utilize these resources.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="marsh-2012-return">
<titleInfo>
<title>Return on Investment for Government Human Language Technology Systems</title>
</titleInfo>
<name type="personal">
<namePart type="given">Elaine</namePart>
<namePart type="family">Marsh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2012-oct 28-nov 1</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Government MT User Program</title>
</titleInfo>
<originInfo>
<publisher>Association for Machine Translation in the Americas</publisher>
<place>
<placeTerm type="text">San Diego, California, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Over the years, the government has translated reams of material, transcribed decades of audio, and processed years of text. Where is that material now? How valuable would it be to have that material available to push research and applications and to support foreign language training? Over 20 years ago, DARPA funded the Linguistic Data Consortium (LDC) at the University of Pennsylvania to collect, catalog, store and provide access to language resources. Since that time, the LDC has collected thousands of corpora in many different genres and languages. Although the government has access to the full range of LDC data through a community license, until recently corpora specific to government needs were usually deleted soon after they were created. In order to address the need for a government-only catalog and repository, the Government Catalog of Language Resources was funded through the ODNI, and an initial prototype has been built. The GCLR will be transferred to a government executive agent who will be responsible for making improvements, adding corpora, and maintaining and sustaining the effort. The purpose of this talk is to present the model behind GCLR, to demonstrate its purpose, and to invite attendees to contribute and use contents. Background leading up to the current version will be presented. Use cases of parallel corpora in teaching, technology development and language maintenance will also be covered. Learning from the LDC on how corpora are used, and linking with the LDC will be part of future directions to enable government applications to utilize these resources.</abstract>
<identifier type="citekey">marsh-2012-return</identifier>
<location>
<url>https://aclanthology.org/2012.amta-government.10</url>
</location>
<part>
<date>2012-oct 28-nov 1</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Return on Investment for Government Human Language Technology Systems
%A Marsh, Elaine
%S Proceedings of the 10th Conference of the Association for Machine Translation in the Americas: Government MT User Program
%D 2012
%8 oct 28 nov 1
%I Association for Machine Translation in the Americas
%C San Diego, California, USA
%F marsh-2012-return
%X Over the years, the government has translated reams of material, transcribed decades of audio, and processed years of text. Where is that material now? How valuable would it be to have that material available to push research and applications and to support foreign language training? Over 20 years ago, DARPA funded the Linguistic Data Consortium (LDC) at the University of Pennsylvania to collect, catalog, store and provide access to language resources. Since that time, the LDC has collected thousands of corpora in many different genres and languages. Although the government has access to the full range of LDC data through a community license, until recently corpora specific to government needs were usually deleted soon after they were created. In order to address the need for a government-only catalog and repository, the Government Catalog of Language Resources was funded through the ODNI, and an initial prototype has been built. The GCLR will be transferred to a government executive agent who will be responsible for making improvements, adding corpora, and maintaining and sustaining the effort. The purpose of this talk is to present the model behind GCLR, to demonstrate its purpose, and to invite attendees to contribute and use contents. Background leading up to the current version will be presented. Use cases of parallel corpora in teaching, technology development and language maintenance will also be covered. Learning from the LDC on how corpora are used, and linking with the LDC will be part of future directions to enable government applications to utilize these resources.
%U https://aclanthology.org/2012.amta-government.10
Markdown (Informal)
[Return on Investment for Government Human Language Technology Systems](https://aclanthology.org/2012.amta-government.10) (Marsh, AMTA 2012)
ACL