@inproceedings{chen-etal-2006-study,
title = "A Study on Terminology Extraction Based on Classified Corpora",
author = "Chen, Yirong and
Lu, Qin and
Li, Wenjie and
Sui, Zhifang and
Ji, Luning",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/407_pdf.pdf",
abstract = "Algorithms for automatic term extraction in a specific domain should consider at least two issues, namely Unithood and Termhood (Kageura, 1996). Unithood refers to the degree of a string to occur as a word or a phrase. Termhood (Chen Yirong, 2005) refers to the degree of a word or a phrase to occur as a domain specific concept. Unlike unithood, study on termhood is not yet widely reported. In classified corpora, the class information provides the cue to the nature of data and can be used in termhood calculation. Three algorithms are provided and evaluated to investigate termhood based on classified corpora. The three algorithms are based on lexicon set computing, term frequency and document frequency, and the strength of the relation between a term and its document class respectively. Our objective is to investigate the effects of these different termhood measurement features. After evaluation, we can find which features are more effective and also, how we can improve these different features to achieve the best performance. Preliminary results show that the first measure can effectively filter out independent terms or terms of general use.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="chen-etal-2006-study">
<titleInfo>
<title>A Study on Terminology Extraction Based on Classified Corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yirong</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qin</namePart>
<namePart type="family">Lu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wenjie</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhifang</namePart>
<namePart type="family">Sui</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luning</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Algorithms for automatic term extraction in a specific domain should consider at least two issues, namely Unithood and Termhood (Kageura, 1996). Unithood refers to the degree of a string to occur as a word or a phrase. Termhood (Chen Yirong, 2005) refers to the degree of a word or a phrase to occur as a domain specific concept. Unlike unithood, study on termhood is not yet widely reported. In classified corpora, the class information provides the cue to the nature of data and can be used in termhood calculation. Three algorithms are provided and evaluated to investigate termhood based on classified corpora. The three algorithms are based on lexicon set computing, term frequency and document frequency, and the strength of the relation between a term and its document class respectively. Our objective is to investigate the effects of these different termhood measurement features. After evaluation, we can find which features are more effective and also, how we can improve these different features to achieve the best performance. Preliminary results show that the first measure can effectively filter out independent terms or terms of general use.</abstract>
<identifier type="citekey">chen-etal-2006-study</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/407_pdf.pdf</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Study on Terminology Extraction Based on Classified Corpora
%A Chen, Yirong
%A Lu, Qin
%A Li, Wenjie
%A Sui, Zhifang
%A Ji, Luning
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F chen-etal-2006-study
%X Algorithms for automatic term extraction in a specific domain should consider at least two issues, namely Unithood and Termhood (Kageura, 1996). Unithood refers to the degree of a string to occur as a word or a phrase. Termhood (Chen Yirong, 2005) refers to the degree of a word or a phrase to occur as a domain specific concept. Unlike unithood, study on termhood is not yet widely reported. In classified corpora, the class information provides the cue to the nature of data and can be used in termhood calculation. Three algorithms are provided and evaluated to investigate termhood based on classified corpora. The three algorithms are based on lexicon set computing, term frequency and document frequency, and the strength of the relation between a term and its document class respectively. Our objective is to investigate the effects of these different termhood measurement features. After evaluation, we can find which features are more effective and also, how we can improve these different features to achieve the best performance. Preliminary results show that the first measure can effectively filter out independent terms or terms of general use.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/407_pdf.pdf
Markdown (Informal)
[A Study on Terminology Extraction Based on Classified Corpora](http://www.lrec-conf.org/proceedings/lrec2006/pdf/407_pdf.pdf) (Chen et al., LREC 2006)
ACL