@inproceedings{cardillo-debole-2024-italian,
title = "{I}talian Word Embeddings for the Medical Domain",
author = "Cardillo, Franco Alberto and
Debole, Franca",
editor = "Calzolari, Nicoletta and
Kan, Min-Yen and
Hoste, Veronique and
Lenci, Alessandro and
Sakti, Sakriani and
Xue, Nianwen",
booktitle = "Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.lrec-main.824",
pages = "9434--9440",
abstract = "Neural word embeddings have proven valuable in the development of medical applications. However, for the Italian language, there are no publicly available corpora, embeddings, or evaluation resources tailored to this domain. In this paper, we introduce an Italian corpus for the medical domain, that includes texts from Wikipedia, medical journals, drug leaflets, and specialized websites. Using this corpus, we generate neural word embeddings from scratch. These embeddings are then evaluated using standard evaluation resources, that we translated into Italian exploiting the concept graph in the UMLS Metathesaurus. Despite the relatively small size of the corpus, our experimental results indicate that the new embeddings correlate well with human judgments regarding the similarity and the relatedness of medical concepts. Moreover, these medical-specific embeddings outperform a baseline model trained on the full Wikipedia corpus, which includes the medical pages we used. We believe that our embeddings and the newly introduced textual resources will foster further advancements in the field of Italian medical Natural Language Processing.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cardillo-debole-2024-italian">
<titleInfo>
<title>Italian Word Embeddings for the Medical Domain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Franco</namePart>
<namePart type="given">Alberto</namePart>
<namePart type="family">Cardillo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Franca</namePart>
<namePart type="family">Debole</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Min-Yen</namePart>
<namePart type="family">Kan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veronique</namePart>
<namePart type="family">Hoste</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alessandro</namePart>
<namePart type="family">Lenci</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sakriani</namePart>
<namePart type="family">Sakti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nianwen</namePart>
<namePart type="family">Xue</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Neural word embeddings have proven valuable in the development of medical applications. However, for the Italian language, there are no publicly available corpora, embeddings, or evaluation resources tailored to this domain. In this paper, we introduce an Italian corpus for the medical domain, that includes texts from Wikipedia, medical journals, drug leaflets, and specialized websites. Using this corpus, we generate neural word embeddings from scratch. These embeddings are then evaluated using standard evaluation resources, that we translated into Italian exploiting the concept graph in the UMLS Metathesaurus. Despite the relatively small size of the corpus, our experimental results indicate that the new embeddings correlate well with human judgments regarding the similarity and the relatedness of medical concepts. Moreover, these medical-specific embeddings outperform a baseline model trained on the full Wikipedia corpus, which includes the medical pages we used. We believe that our embeddings and the newly introduced textual resources will foster further advancements in the field of Italian medical Natural Language Processing.</abstract>
<identifier type="citekey">cardillo-debole-2024-italian</identifier>
<location>
<url>https://aclanthology.org/2024.lrec-main.824</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>9434</start>
<end>9440</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Italian Word Embeddings for the Medical Domain
%A Cardillo, Franco Alberto
%A Debole, Franca
%Y Calzolari, Nicoletta
%Y Kan, Min-Yen
%Y Hoste, Veronique
%Y Lenci, Alessandro
%Y Sakti, Sakriani
%Y Xue, Nianwen
%S Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F cardillo-debole-2024-italian
%X Neural word embeddings have proven valuable in the development of medical applications. However, for the Italian language, there are no publicly available corpora, embeddings, or evaluation resources tailored to this domain. In this paper, we introduce an Italian corpus for the medical domain, that includes texts from Wikipedia, medical journals, drug leaflets, and specialized websites. Using this corpus, we generate neural word embeddings from scratch. These embeddings are then evaluated using standard evaluation resources, that we translated into Italian exploiting the concept graph in the UMLS Metathesaurus. Despite the relatively small size of the corpus, our experimental results indicate that the new embeddings correlate well with human judgments regarding the similarity and the relatedness of medical concepts. Moreover, these medical-specific embeddings outperform a baseline model trained on the full Wikipedia corpus, which includes the medical pages we used. We believe that our embeddings and the newly introduced textual resources will foster further advancements in the field of Italian medical Natural Language Processing.
%U https://aclanthology.org/2024.lrec-main.824
%P 9434-9440
Markdown (Informal)
[Italian Word Embeddings for the Medical Domain](https://aclanthology.org/2024.lrec-main.824) (Cardillo & Debole, LREC-COLING 2024)
ACL
- Franco Alberto Cardillo and Franca Debole. 2024. Italian Word Embeddings for the Medical Domain. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024), pages 9434–9440, Torino, Italia. ELRA and ICCL.