@inproceedings{okazaki-ananiadou-2006-clustering,
title = "Clustering acronyms in biomedical text for disambiguation",
author = "Okazaki, Naoaki and
Ananiadou, Sophia",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Gangemi, Aldo and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Tapias, Daniel",
booktitle = "Proceedings of the Fifth International Conference on Language Resources and Evaluation ({LREC}{'}06)",
month = may,
year = "2006",
address = "Genoa, Italy",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2006/pdf/351_pdf.pdf",
abstract = "Given the increasing number of neologisms in biomedicine (names of genes, diseases, molecules, etc.), the rate of acronyms used in literature also increases. Existing acronym dictionaries cannot keep up with the rate of new creations. Thus, discovering and disambiguating acronyms and their expanded forms are essential aspects of text mining and terminology management. We present a method for clustering long forms identified by an acronym recognition method. Applying the acronym recognition method to MEDLINE abstracts, we obtained a list of short/long forms. The recognized short/long forms were classified by abiologist to construct an evaluation set for clustering sets of similar long forms. We observed five types of term variation in the evaluation set and defined four similarity measures to gathers the similar longforms (i.e., orthographic, morphological, syntactic, lexico semantic variants, nested abbreviations). The complete-link clustering with the four similarity measures achieved 87.5{\%} precision and 84.9{\%} recall on the evaluation set.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="okazaki-ananiadou-2006-clustering">
<titleInfo>
<title>Clustering acronyms in biomedical text for disambiguation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aldo</namePart>
<namePart type="family">Gangemi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Genoa, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Given the increasing number of neologisms in biomedicine (names of genes, diseases, molecules, etc.), the rate of acronyms used in literature also increases. Existing acronym dictionaries cannot keep up with the rate of new creations. Thus, discovering and disambiguating acronyms and their expanded forms are essential aspects of text mining and terminology management. We present a method for clustering long forms identified by an acronym recognition method. Applying the acronym recognition method to MEDLINE abstracts, we obtained a list of short/long forms. The recognized short/long forms were classified by abiologist to construct an evaluation set for clustering sets of similar long forms. We observed five types of term variation in the evaluation set and defined four similarity measures to gathers the similar longforms (i.e., orthographic, morphological, syntactic, lexico semantic variants, nested abbreviations). The complete-link clustering with the four similarity measures achieved 87.5% precision and 84.9% recall on the evaluation set.</abstract>
<identifier type="citekey">okazaki-ananiadou-2006-clustering</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2006/pdf/351_pdf.pdf</url>
</location>
<part>
<date>2006-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Clustering acronyms in biomedical text for disambiguation
%A Okazaki, Naoaki
%A Ananiadou, Sophia
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Gangemi, Aldo
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Tapias, Daniel
%S Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC’06)
%D 2006
%8 May
%I European Language Resources Association (ELRA)
%C Genoa, Italy
%F okazaki-ananiadou-2006-clustering
%X Given the increasing number of neologisms in biomedicine (names of genes, diseases, molecules, etc.), the rate of acronyms used in literature also increases. Existing acronym dictionaries cannot keep up with the rate of new creations. Thus, discovering and disambiguating acronyms and their expanded forms are essential aspects of text mining and terminology management. We present a method for clustering long forms identified by an acronym recognition method. Applying the acronym recognition method to MEDLINE abstracts, we obtained a list of short/long forms. The recognized short/long forms were classified by abiologist to construct an evaluation set for clustering sets of similar long forms. We observed five types of term variation in the evaluation set and defined four similarity measures to gathers the similar longforms (i.e., orthographic, morphological, syntactic, lexico semantic variants, nested abbreviations). The complete-link clustering with the four similarity measures achieved 87.5% precision and 84.9% recall on the evaluation set.
%U http://www.lrec-conf.org/proceedings/lrec2006/pdf/351_pdf.pdf
Markdown (Informal)
[Clustering acronyms in biomedical text for disambiguation](http://www.lrec-conf.org/proceedings/lrec2006/pdf/351_pdf.pdf) (Okazaki & Ananiadou, LREC 2006)
ACL