@inproceedings{horiguchi-etal-2025-multimsd,
title = "{M}ulti{MSD}: A Corpus for Multilingual Medical Text Simplification from Online Medical References",
author = "Horiguchi, Koki and
Kajiwara, Tomoyuki and
Ninomiya, Takashi and
Wakamiya, Shoko and
Aramaki, Eiji",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2025",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.findings-acl.481/",
doi = "10.18653/v1/2025.findings-acl.481",
pages = "9248--9258",
ISBN = "979-8-89176-256-5",
abstract = "We release a parallel corpus for medical text simplification, which paraphrases medical terms into expressions easily understood by patients. Medical texts written by medical practitioners contain a lot of technical terms, and patients who are non-experts are often unable to use the information effectively. Therefore, there is a strong social demand for medical text simplification that paraphrases input sentences without using medical terms. However, this task has not been sufficiently studied in non-English languages. We therefore developed parallel corpora for medical text simplification in nine languages: German, English, Spanish, French, Italian, Japanese, Portuguese, Russian, and Chinese, each with 10,000 sentence pairs, by automatic sentence alignment to online medical references for professionals and consumers. We also propose a method for training text simplification models to actively paraphrase complex expressions, including medical terms. Experimental results show that the proposed method improves the performance of medical text simplification. In addition, we confirmed that training with a multilingual dataset is more effective than training with a monolingual dataset."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="horiguchi-etal-2025-multimsd">
<titleInfo>
<title>MultiMSD: A Corpus for Multilingual Medical Text Simplification from Online Medical References</title>
</titleInfo>
<name type="personal">
<namePart type="given">Koki</namePart>
<namePart type="family">Horiguchi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tomoyuki</namePart>
<namePart type="family">Kajiwara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Takashi</namePart>
<namePart type="family">Ninomiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shoko</namePart>
<namePart type="family">Wakamiya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eiji</namePart>
<namePart type="family">Aramaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2025</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-256-5</identifier>
</relatedItem>
<abstract>We release a parallel corpus for medical text simplification, which paraphrases medical terms into expressions easily understood by patients. Medical texts written by medical practitioners contain a lot of technical terms, and patients who are non-experts are often unable to use the information effectively. Therefore, there is a strong social demand for medical text simplification that paraphrases input sentences without using medical terms. However, this task has not been sufficiently studied in non-English languages. We therefore developed parallel corpora for medical text simplification in nine languages: German, English, Spanish, French, Italian, Japanese, Portuguese, Russian, and Chinese, each with 10,000 sentence pairs, by automatic sentence alignment to online medical references for professionals and consumers. We also propose a method for training text simplification models to actively paraphrase complex expressions, including medical terms. Experimental results show that the proposed method improves the performance of medical text simplification. In addition, we confirmed that training with a multilingual dataset is more effective than training with a monolingual dataset.</abstract>
<identifier type="citekey">horiguchi-etal-2025-multimsd</identifier>
<identifier type="doi">10.18653/v1/2025.findings-acl.481</identifier>
<location>
<url>https://aclanthology.org/2025.findings-acl.481/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>9248</start>
<end>9258</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MultiMSD: A Corpus for Multilingual Medical Text Simplification from Online Medical References
%A Horiguchi, Koki
%A Kajiwara, Tomoyuki
%A Ninomiya, Takashi
%A Wakamiya, Shoko
%A Aramaki, Eiji
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Findings of the Association for Computational Linguistics: ACL 2025
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-256-5
%F horiguchi-etal-2025-multimsd
%X We release a parallel corpus for medical text simplification, which paraphrases medical terms into expressions easily understood by patients. Medical texts written by medical practitioners contain a lot of technical terms, and patients who are non-experts are often unable to use the information effectively. Therefore, there is a strong social demand for medical text simplification that paraphrases input sentences without using medical terms. However, this task has not been sufficiently studied in non-English languages. We therefore developed parallel corpora for medical text simplification in nine languages: German, English, Spanish, French, Italian, Japanese, Portuguese, Russian, and Chinese, each with 10,000 sentence pairs, by automatic sentence alignment to online medical references for professionals and consumers. We also propose a method for training text simplification models to actively paraphrase complex expressions, including medical terms. Experimental results show that the proposed method improves the performance of medical text simplification. In addition, we confirmed that training with a multilingual dataset is more effective than training with a monolingual dataset.
%R 10.18653/v1/2025.findings-acl.481
%U https://aclanthology.org/2025.findings-acl.481/
%U https://doi.org/10.18653/v1/2025.findings-acl.481
%P 9248-9258
Markdown (Informal)
[MultiMSD: A Corpus for Multilingual Medical Text Simplification from Online Medical References](https://aclanthology.org/2025.findings-acl.481/) (Horiguchi et al., Findings 2025)
ACL