@inproceedings{warburton-2014-narrowing,
title = "Narrowing the Gap Between Termbases and Corpora in Commercial Environments",
author = "Warburton, Kara",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/466_Paper.pdf",
pages = "722--727",
abstract = "Terminological resources offer potential to support applications beyond translation, such as controlled authoring and indexing, which are increasingly of interest to commercial enterprises. The ad-hoc semasiological approach adopted by commercial terminographers diverges considerably from methodologies prescribed by conventional theory. The notion of termhood in such production-oriented environments is driven by pragmatic criteria such as frequency and repurposability of the terminological unit. A high degree of correspondence between the commercial corpus and the termbase is desired. Research carried out at the City University of Hong Kong using four IT companies as case studies revealed a large gap between corpora and termbases. Problems in selecting terms and in encoding them properly in termbases account for a significant portion of this gap. A rigorous corpus-based approach to term selection would significantly reduce this gap and improve the effectiveness of commercial termbases. In particular, single-word terms (keywords) identified by comparison to a reference corpus offer great potential for identifying important multi-word terms in this context. We conclude that terminography for production purposes should be more corpus-based than is currently the norm.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="warburton-2014-narrowing">
<titleInfo>
<title>Narrowing the Gap Between Termbases and Corpora in Commercial Environments</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kara</namePart>
<namePart type="family">Warburton</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Terminological resources offer potential to support applications beyond translation, such as controlled authoring and indexing, which are increasingly of interest to commercial enterprises. The ad-hoc semasiological approach adopted by commercial terminographers diverges considerably from methodologies prescribed by conventional theory. The notion of termhood in such production-oriented environments is driven by pragmatic criteria such as frequency and repurposability of the terminological unit. A high degree of correspondence between the commercial corpus and the termbase is desired. Research carried out at the City University of Hong Kong using four IT companies as case studies revealed a large gap between corpora and termbases. Problems in selecting terms and in encoding them properly in termbases account for a significant portion of this gap. A rigorous corpus-based approach to term selection would significantly reduce this gap and improve the effectiveness of commercial termbases. In particular, single-word terms (keywords) identified by comparison to a reference corpus offer great potential for identifying important multi-word terms in this context. We conclude that terminography for production purposes should be more corpus-based than is currently the norm.</abstract>
<identifier type="citekey">warburton-2014-narrowing</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/466_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>722</start>
<end>727</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Narrowing the Gap Between Termbases and Corpora in Commercial Environments
%A Warburton, Kara
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F warburton-2014-narrowing
%X Terminological resources offer potential to support applications beyond translation, such as controlled authoring and indexing, which are increasingly of interest to commercial enterprises. The ad-hoc semasiological approach adopted by commercial terminographers diverges considerably from methodologies prescribed by conventional theory. The notion of termhood in such production-oriented environments is driven by pragmatic criteria such as frequency and repurposability of the terminological unit. A high degree of correspondence between the commercial corpus and the termbase is desired. Research carried out at the City University of Hong Kong using four IT companies as case studies revealed a large gap between corpora and termbases. Problems in selecting terms and in encoding them properly in termbases account for a significant portion of this gap. A rigorous corpus-based approach to term selection would significantly reduce this gap and improve the effectiveness of commercial termbases. In particular, single-word terms (keywords) identified by comparison to a reference corpus offer great potential for identifying important multi-word terms in this context. We conclude that terminography for production purposes should be more corpus-based than is currently the norm.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/466_Paper.pdf
%P 722-727
Markdown (Informal)
[Narrowing the Gap Between Termbases and Corpora in Commercial Environments](http://www.lrec-conf.org/proceedings/lrec2014/pdf/466_Paper.pdf) (Warburton, LREC 2014)
ACL