@inproceedings{temnikova-etal-2014-sublanguage,
title = "Sublanguage Corpus Analysis Toolkit: A tool for assessing the representativeness and sublanguage characteristics of corpora",
author = "Temnikova, Irina and
Baumgartner Jr., William A. and
Hailu, Negacy D. and
Nikolova, Ivelina and
McEnery, Tony and
Kilgarriff, Adam and
Angelova, Galia and
Cohen, K. Bretonnel",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Loftsson, Hrafn and
Maegaard, Bente and
Mariani, Joseph and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Ninth International Conference on Language Resources and Evaluation ({LREC}'14)",
month = may,
year = "2014",
address = "Reykjavik, Iceland",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2014/pdf/675_Paper.pdf",
pages = "1714--1718",
abstract = "Sublanguages are varieties of language that form subsets of the general language, typically exhibiting particular types of lexical, semantic, and other restrictions and deviance. SubCAT, the Sublanguage Corpus Analysis Toolkit, assesses the representativeness and closure properties of corpora to analyze the extent to which they are either sublanguages, or representative samples of the general language. The current version of SubCAT contains scripts and applications for assessing lexical closure, morphological closure, sentence type closure, over-represented words, and syntactic deviance. Its operation is illustrated with three case studies concerning scientific journal articles, patents, and clinical records. Materials from two language families are analyzed―English (Germanic), and Bulgarian (Slavic). The software is available at sublanguage.sourceforge.net under a liberal Open Source license.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="temnikova-etal-2014-sublanguage">
<titleInfo>
<title>Sublanguage Corpus Analysis Toolkit: A tool for assessing the representativeness and sublanguage characteristics of corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Temnikova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">William</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Baumgartner Jr.</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Negacy</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Hailu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ivelina</namePart>
<namePart type="family">Nikolova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tony</namePart>
<namePart type="family">McEnery</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Kilgarriff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Galia</namePart>
<namePart type="family">Angelova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">K</namePart>
<namePart type="given">Bretonnel</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hrafn</namePart>
<namePart type="family">Loftsson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Reykjavik, Iceland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Sublanguages are varieties of language that form subsets of the general language, typically exhibiting particular types of lexical, semantic, and other restrictions and deviance. SubCAT, the Sublanguage Corpus Analysis Toolkit, assesses the representativeness and closure properties of corpora to analyze the extent to which they are either sublanguages, or representative samples of the general language. The current version of SubCAT contains scripts and applications for assessing lexical closure, morphological closure, sentence type closure, over-represented words, and syntactic deviance. Its operation is illustrated with three case studies concerning scientific journal articles, patents, and clinical records. Materials from two language families are analyzed―English (Germanic), and Bulgarian (Slavic). The software is available at sublanguage.sourceforge.net under a liberal Open Source license.</abstract>
<identifier type="citekey">temnikova-etal-2014-sublanguage</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2014/pdf/675_Paper.pdf</url>
</location>
<part>
<date>2014-05</date>
<extent unit="page">
<start>1714</start>
<end>1718</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Sublanguage Corpus Analysis Toolkit: A tool for assessing the representativeness and sublanguage characteristics of corpora
%A Temnikova, Irina
%A Baumgartner Jr., William A.
%A Hailu, Negacy D.
%A Nikolova, Ivelina
%A McEnery, Tony
%A Kilgarriff, Adam
%A Angelova, Galia
%A Cohen, K. Bretonnel
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Loftsson, Hrafn
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC’14)
%D 2014
%8 May
%I European Language Resources Association (ELRA)
%C Reykjavik, Iceland
%F temnikova-etal-2014-sublanguage
%X Sublanguages are varieties of language that form subsets of the general language, typically exhibiting particular types of lexical, semantic, and other restrictions and deviance. SubCAT, the Sublanguage Corpus Analysis Toolkit, assesses the representativeness and closure properties of corpora to analyze the extent to which they are either sublanguages, or representative samples of the general language. The current version of SubCAT contains scripts and applications for assessing lexical closure, morphological closure, sentence type closure, over-represented words, and syntactic deviance. Its operation is illustrated with three case studies concerning scientific journal articles, patents, and clinical records. Materials from two language families are analyzed―English (Germanic), and Bulgarian (Slavic). The software is available at sublanguage.sourceforge.net under a liberal Open Source license.
%U http://www.lrec-conf.org/proceedings/lrec2014/pdf/675_Paper.pdf
%P 1714-1718
Markdown (Informal)
[Sublanguage Corpus Analysis Toolkit: A tool for assessing the representativeness and sublanguage characteristics of corpora](http://www.lrec-conf.org/proceedings/lrec2014/pdf/675_Paper.pdf) (Temnikova et al., LREC 2014)
ACL
- Irina Temnikova, William A. Baumgartner Jr., Negacy D. Hailu, Ivelina Nikolova, Tony McEnery, Adam Kilgarriff, Galia Angelova, and K. Bretonnel Cohen. 2014. Sublanguage Corpus Analysis Toolkit: A tool for assessing the representativeness and sublanguage characteristics of corpora. In Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), pages 1714–1718, Reykjavik, Iceland. European Language Resources Association (ELRA).