@inproceedings{barros-etal-2022-divide,
title = "Divide and Conquer: An Extreme Multi-Label Classification Approach for Coding Diseases and Procedures in {S}panish",
author = "Barros, Jose and
Rojas, Matias and
Dunstan, Jocelyn and
Abeliuk, Andres",
booktitle = "Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.louhi-1.16",
pages = "138--147",
abstract = "Clinical coding is the task of transforming medical documents into structured codes following a standard ontology. Since these terminologies are composed of hundreds of codes, this problem can be considered an Extreme Multi-label Classification task. This paper proposes a novel neural network-based architecture for clinical coding. First, we take full advantage of the hierarchical nature of ontologies to create clusters based on semantic relations. Then, we use a Matcher module to assign the probability of documents belonging to each cluster. Finally, the Ranker calculates the probability of each code considering only the documents in the cluster. This division allows a fine-grained differentiation within the cluster, which cannot be addressed using a single classifier. In addition, since most of the previous work has focused on solving this task in English, we conducted our experiments on three clinical coding corpora in Spanish. The experimental results demonstrate the effectiveness of our model, achieving state-of-the-art results on two of the three datasets. Specifically, we outperformed previous models on two subtasks of the CodiEsp shared task: CodiEsp-D (diseases) and CodiEsp-P (procedures). Automatic coding can profoundly impact healthcare by structuring critical information written in free text in electronic health records.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="barros-etal-2022-divide">
<titleInfo>
<title>Divide and Conquer: An Extreme Multi-Label Classification Approach for Coding Diseases and Procedures in Spanish</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jose</namePart>
<namePart type="family">Barros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matias</namePart>
<namePart type="family">Rojas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jocelyn</namePart>
<namePart type="family">Dunstan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andres</namePart>
<namePart type="family">Abeliuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)</title>
</titleInfo>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Clinical coding is the task of transforming medical documents into structured codes following a standard ontology. Since these terminologies are composed of hundreds of codes, this problem can be considered an Extreme Multi-label Classification task. This paper proposes a novel neural network-based architecture for clinical coding. First, we take full advantage of the hierarchical nature of ontologies to create clusters based on semantic relations. Then, we use a Matcher module to assign the probability of documents belonging to each cluster. Finally, the Ranker calculates the probability of each code considering only the documents in the cluster. This division allows a fine-grained differentiation within the cluster, which cannot be addressed using a single classifier. In addition, since most of the previous work has focused on solving this task in English, we conducted our experiments on three clinical coding corpora in Spanish. The experimental results demonstrate the effectiveness of our model, achieving state-of-the-art results on two of the three datasets. Specifically, we outperformed previous models on two subtasks of the CodiEsp shared task: CodiEsp-D (diseases) and CodiEsp-P (procedures). Automatic coding can profoundly impact healthcare by structuring critical information written in free text in electronic health records.</abstract>
<identifier type="citekey">barros-etal-2022-divide</identifier>
<location>
<url>https://aclanthology.org/2022.louhi-1.16</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>138</start>
<end>147</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Divide and Conquer: An Extreme Multi-Label Classification Approach for Coding Diseases and Procedures in Spanish
%A Barros, Jose
%A Rojas, Matias
%A Dunstan, Jocelyn
%A Abeliuk, Andres
%S Proceedings of the 13th International Workshop on Health Text Mining and Information Analysis (LOUHI)
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F barros-etal-2022-divide
%X Clinical coding is the task of transforming medical documents into structured codes following a standard ontology. Since these terminologies are composed of hundreds of codes, this problem can be considered an Extreme Multi-label Classification task. This paper proposes a novel neural network-based architecture for clinical coding. First, we take full advantage of the hierarchical nature of ontologies to create clusters based on semantic relations. Then, we use a Matcher module to assign the probability of documents belonging to each cluster. Finally, the Ranker calculates the probability of each code considering only the documents in the cluster. This division allows a fine-grained differentiation within the cluster, which cannot be addressed using a single classifier. In addition, since most of the previous work has focused on solving this task in English, we conducted our experiments on three clinical coding corpora in Spanish. The experimental results demonstrate the effectiveness of our model, achieving state-of-the-art results on two of the three datasets. Specifically, we outperformed previous models on two subtasks of the CodiEsp shared task: CodiEsp-D (diseases) and CodiEsp-P (procedures). Automatic coding can profoundly impact healthcare by structuring critical information written in free text in electronic health records.
%U https://aclanthology.org/2022.louhi-1.16
%P 138-147
Markdown (Informal)
[Divide and Conquer: An Extreme Multi-Label Classification Approach for Coding Diseases and Procedures in Spanish](https://aclanthology.org/2022.louhi-1.16) (Barros et al., Louhi 2022)
ACL