@inproceedings{dirkson-etal-2019-lexical,
title = "Lexical Normalization of User-Generated Medical Text",
author = "Dirkson, Anne and
Verberne, Suzan and
Kraaij, Wessel",
editor = "Weissenbacher, Davy and
Gonzalez-Hernandez, Graciela",
booktitle = "Proceedings of the Fourth Social Media Mining for Health Applications ({\#}SMM4H) Workshop {\&} Shared Task",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-3202",
doi = "10.18653/v1/W19-3202",
pages = "11--20",
abstract = "In the medical domain, user-generated social media text is increasingly used as a valuable complementary knowledge source to scientific medical literature. The extraction of this knowledge is complicated by colloquial language use and misspellings. Yet, lexical normalization of such data has not been addressed properly. This paper presents an unsupervised, data-driven spelling correction module for medical social media. Our method outperforms state-of-the-art spelling correction and can detect mistakes with an F0.5 of 0.888. Additionally, we present a novel corpus for spelling mistake detection and correction on a medical patient forum.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="dirkson-etal-2019-lexical">
<titleInfo>
<title>Lexical Normalization of User-Generated Medical Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anne</namePart>
<namePart type="family">Dirkson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suzan</namePart>
<namePart type="family">Verberne</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wessel</namePart>
<namePart type="family">Kraaij</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth Social Media Mining for Health Applications (#SMM4H) Workshop & Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Davy</namePart>
<namePart type="family">Weissenbacher</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Graciela</namePart>
<namePart type="family">Gonzalez-Hernandez</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In the medical domain, user-generated social media text is increasingly used as a valuable complementary knowledge source to scientific medical literature. The extraction of this knowledge is complicated by colloquial language use and misspellings. Yet, lexical normalization of such data has not been addressed properly. This paper presents an unsupervised, data-driven spelling correction module for medical social media. Our method outperforms state-of-the-art spelling correction and can detect mistakes with an F0.5 of 0.888. Additionally, we present a novel corpus for spelling mistake detection and correction on a medical patient forum.</abstract>
<identifier type="citekey">dirkson-etal-2019-lexical</identifier>
<identifier type="doi">10.18653/v1/W19-3202</identifier>
<location>
<url>https://aclanthology.org/W19-3202</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>11</start>
<end>20</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Lexical Normalization of User-Generated Medical Text
%A Dirkson, Anne
%A Verberne, Suzan
%A Kraaij, Wessel
%Y Weissenbacher, Davy
%Y Gonzalez-Hernandez, Graciela
%S Proceedings of the Fourth Social Media Mining for Health Applications (#SMM4H) Workshop & Shared Task
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F dirkson-etal-2019-lexical
%X In the medical domain, user-generated social media text is increasingly used as a valuable complementary knowledge source to scientific medical literature. The extraction of this knowledge is complicated by colloquial language use and misspellings. Yet, lexical normalization of such data has not been addressed properly. This paper presents an unsupervised, data-driven spelling correction module for medical social media. Our method outperforms state-of-the-art spelling correction and can detect mistakes with an F0.5 of 0.888. Additionally, we present a novel corpus for spelling mistake detection and correction on a medical patient forum.
%R 10.18653/v1/W19-3202
%U https://aclanthology.org/W19-3202
%U https://doi.org/10.18653/v1/W19-3202
%P 11-20
Markdown (Informal)
[Lexical Normalization of User-Generated Medical Text](https://aclanthology.org/W19-3202) (Dirkson et al., ACL 2019)
ACL
- Anne Dirkson, Suzan Verberne, and Wessel Kraaij. 2019. Lexical Normalization of User-Generated Medical Text. In Proceedings of the Fourth Social Media Mining for Health Applications (#SMM4H) Workshop & Shared Task, pages 11–20, Florence, Italy. Association for Computational Linguistics.