@inproceedings{zhou-etal-2022-cxr,
title = "{CXR} Data Annotation and Classification with Pre-trained Language Models",
author = "Zhou, Nina and
Aw, Ai Ti and
Liu, Zhuo Han and
Tan, Cher heng and
Ting, Yonghan and
Chen, Wen Xiang and
Ting, Jordan sim zheng",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.247",
pages = "2801--2811",
abstract = "Clinical data annotation has been one of the major obstacles for applying machine learning approaches in clinical NLP. Open-source tools such as NegBio and CheXpert are usually designed on data from specific institutions, which limit their applications to other institutions due to the differences in writing style, structure, language use as well as label definition. In this paper, we propose a new weak supervision annotation framework with two improvements compared to existing annotation frameworks: 1) we propose to select representative samples for efficient manual annotation; 2) we propose to auto-annotate the remaining samples, both leveraging on a self-trained sentence encoder. This framework also provides a function for identifying inconsistent annotation errors. The utility of our proposed weak supervision annotation framework is applicable to any given data annotation task, and it provides an efficient form of sample selection and data auto-annotation with better classification results for real applications.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zhou-etal-2022-cxr">
<titleInfo>
<title>CXR Data Annotation and Classification with Pre-trained Language Models</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nina</namePart>
<namePart type="family">Zhou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ai</namePart>
<namePart type="given">Ti</namePart>
<namePart type="family">Aw</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhuo</namePart>
<namePart type="given">Han</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cher</namePart>
<namePart type="given">heng</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonghan</namePart>
<namePart type="family">Ting</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Wen</namePart>
<namePart type="given">Xiang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="given">sim</namePart>
<namePart type="given">zheng</namePart>
<namePart type="family">Ting</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-10</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 29th International Conference on Computational Linguistics</title>
</titleInfo>
<originInfo>
<publisher>International Committee on Computational Linguistics</publisher>
<place>
<placeTerm type="text">Gyeongju, Republic of Korea</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Clinical data annotation has been one of the major obstacles for applying machine learning approaches in clinical NLP. Open-source tools such as NegBio and CheXpert are usually designed on data from specific institutions, which limit their applications to other institutions due to the differences in writing style, structure, language use as well as label definition. In this paper, we propose a new weak supervision annotation framework with two improvements compared to existing annotation frameworks: 1) we propose to select representative samples for efficient manual annotation; 2) we propose to auto-annotate the remaining samples, both leveraging on a self-trained sentence encoder. This framework also provides a function for identifying inconsistent annotation errors. The utility of our proposed weak supervision annotation framework is applicable to any given data annotation task, and it provides an efficient form of sample selection and data auto-annotation with better classification results for real applications.</abstract>
<identifier type="citekey">zhou-etal-2022-cxr</identifier>
<location>
<url>https://aclanthology.org/2022.coling-1.247</url>
</location>
<part>
<date>2022-10</date>
<extent unit="page">
<start>2801</start>
<end>2811</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T CXR Data Annotation and Classification with Pre-trained Language Models
%A Zhou, Nina
%A Aw, Ai Ti
%A Liu, Zhuo Han
%A Tan, Cher heng
%A Ting, Yonghan
%A Chen, Wen Xiang
%A Ting, Jordan sim zheng
%S Proceedings of the 29th International Conference on Computational Linguistics
%D 2022
%8 October
%I International Committee on Computational Linguistics
%C Gyeongju, Republic of Korea
%F zhou-etal-2022-cxr
%X Clinical data annotation has been one of the major obstacles for applying machine learning approaches in clinical NLP. Open-source tools such as NegBio and CheXpert are usually designed on data from specific institutions, which limit their applications to other institutions due to the differences in writing style, structure, language use as well as label definition. In this paper, we propose a new weak supervision annotation framework with two improvements compared to existing annotation frameworks: 1) we propose to select representative samples for efficient manual annotation; 2) we propose to auto-annotate the remaining samples, both leveraging on a self-trained sentence encoder. This framework also provides a function for identifying inconsistent annotation errors. The utility of our proposed weak supervision annotation framework is applicable to any given data annotation task, and it provides an efficient form of sample selection and data auto-annotation with better classification results for real applications.
%U https://aclanthology.org/2022.coling-1.247
%P 2801-2811
Markdown (Informal)
[CXR Data Annotation and Classification with Pre-trained Language Models](https://aclanthology.org/2022.coling-1.247) (Zhou et al., COLING 2022)
ACL
- Nina Zhou, Ai Ti Aw, Zhuo Han Liu, Cher heng Tan, Yonghan Ting, Wen Xiang Chen, and Jordan sim zheng Ting. 2022. CXR Data Annotation and Classification with Pre-trained Language Models. In Proceedings of the 29th International Conference on Computational Linguistics, pages 2801–2811, Gyeongju, Republic of Korea. International Committee on Computational Linguistics.