@inproceedings{buhnila-todirascu-2024-largemed,
title = "{LARGEMED}: A Resource for Identifying and Generating Paraphrases for {F}rench Medical Terms",
author = "Buhnila, Ioana and
Todirascu, Amalia",
editor = "Nunzio, Giorgio Maria Di and
Vezzani, Federica and
Ermakova, Liana and
Azarbonyad, Hosein and
Kamps, Jaap",
booktitle = "Proceedings of the Workshop on DeTermIt! Evaluating Text Difficulty in a Multilingual Context @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.determit-1.14",
pages = "141--151",
abstract = "This article presents a method extending an existing French corpus of paraphrases of medical terms ANONYMOUS with new data from Web archives created during the Covid-19 pandemic. Our method semi-automatically detects new terms and paraphrase markers introducing paraphrases from these Web archives, followed by a manual annotation step to identify paraphrases and their lexical and semantic properties. The extended large corpus LARGEMED could be used for automatic medical text simplification for patients and their families. To automatise data collection, we propose two experiments. The first experiment uses the new LARGEMED dataset to train a binary classifier aiming to detect new sentences containing possible paraphrases. The second experiment aims to use correct paraphrases to train a model for paraphrase generation, by adapting T5 Language Model to the paraphrase generation task using an adversarial algorithm.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="buhnila-todirascu-2024-largemed">
<titleInfo>
<title>LARGEMED: A Resource for Identifying and Generating Paraphrases for French Medical Terms</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ioana</namePart>
<namePart type="family">Buhnila</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amalia</namePart>
<namePart type="family">Todirascu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on DeTermIt! Evaluating Text Difficulty in a Multilingual Context @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giorgio</namePart>
<namePart type="given">Maria</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Nunzio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federica</namePart>
<namePart type="family">Vezzani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liana</namePart>
<namePart type="family">Ermakova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hosein</namePart>
<namePart type="family">Azarbonyad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaap</namePart>
<namePart type="family">Kamps</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This article presents a method extending an existing French corpus of paraphrases of medical terms ANONYMOUS with new data from Web archives created during the Covid-19 pandemic. Our method semi-automatically detects new terms and paraphrase markers introducing paraphrases from these Web archives, followed by a manual annotation step to identify paraphrases and their lexical and semantic properties. The extended large corpus LARGEMED could be used for automatic medical text simplification for patients and their families. To automatise data collection, we propose two experiments. The first experiment uses the new LARGEMED dataset to train a binary classifier aiming to detect new sentences containing possible paraphrases. The second experiment aims to use correct paraphrases to train a model for paraphrase generation, by adapting T5 Language Model to the paraphrase generation task using an adversarial algorithm.</abstract>
<identifier type="citekey">buhnila-todirascu-2024-largemed</identifier>
<location>
<url>https://aclanthology.org/2024.determit-1.14</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>141</start>
<end>151</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LARGEMED: A Resource for Identifying and Generating Paraphrases for French Medical Terms
%A Buhnila, Ioana
%A Todirascu, Amalia
%Y Nunzio, Giorgio Maria Di
%Y Vezzani, Federica
%Y Ermakova, Liana
%Y Azarbonyad, Hosein
%Y Kamps, Jaap
%S Proceedings of the Workshop on DeTermIt! Evaluating Text Difficulty in a Multilingual Context @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F buhnila-todirascu-2024-largemed
%X This article presents a method extending an existing French corpus of paraphrases of medical terms ANONYMOUS with new data from Web archives created during the Covid-19 pandemic. Our method semi-automatically detects new terms and paraphrase markers introducing paraphrases from these Web archives, followed by a manual annotation step to identify paraphrases and their lexical and semantic properties. The extended large corpus LARGEMED could be used for automatic medical text simplification for patients and their families. To automatise data collection, we propose two experiments. The first experiment uses the new LARGEMED dataset to train a binary classifier aiming to detect new sentences containing possible paraphrases. The second experiment aims to use correct paraphrases to train a model for paraphrase generation, by adapting T5 Language Model to the paraphrase generation task using an adversarial algorithm.
%U https://aclanthology.org/2024.determit-1.14
%P 141-151
Markdown (Informal)
[LARGEMED: A Resource for Identifying and Generating Paraphrases for French Medical Terms](https://aclanthology.org/2024.determit-1.14) (Buhnila & Todirascu, DeTermIt-WS 2024)
ACL