@inproceedings{ormaechea-etal-2024-simplification,
title = "Simplification Strategies in {F}rench Spontaneous Speech",
author = "Ormaechea, Luc{\'\i}a and
Tsourakis, Nikos and
Schwab, Didier and
Bouillon, Pierrette and
Lecouteux, Benjamin",
editor = "Nunzio, Giorgio Maria Di and
Vezzani, Federica and
Ermakova, Liana and
Azarbonyad, Hosein and
Kamps, Jaap",
booktitle = "Proceedings of the Workshop on DeTermIt! Evaluating Text Difficulty in a Multilingual Context @ LREC-COLING 2024",
month = may,
year = "2024",
address = "Torino, Italia",
publisher = "ELRA and ICCL",
url = "https://aclanthology.org/2024.determit-1.9",
pages = "90--102",
abstract = "Automatic Text Simplification (ATS) aims at rewriting texts into simpler variants while preserving their original meaning, so they can be more easily understood by different audiences. While ATS has been widely used for written texts, its application to spoken language remains unexplored, even if it is not exempt from difficulty. This study aims to characterize the edit operations performed in order to simplify French transcripts for non-native speakers. To do so, we relied on a data sample randomly extracted from the Orf{\'e}o-CEFC French spontaneous speech dataset. In the absence of guidelines to direct this process, we adopted an intuitive simplification approach, so as to investigate the crafted simplifications based on expert linguists{'} criteria, and to compare them with those produced by a generative AI (namely, ChatGPT). The results, analyzed quantitatively and qualitatively, reveal that the most common edits are deletions, and affect oral production aspects, like restarts or hesitations. Consequently, candidate simplifications are typically register-standardized sentences that solely include the propositional content of the input. The study also examines the alignment between human- and machine-based simplifications, revealing a moderate level of agreement, and highlighting the subjective nature of the task. The findings contribute to understanding the intricacies of simplifying spontaneous spoken language. In addition, the provision of a small-scale parallel dataset derived from such expert simplifications, Propicto-Orf{\'e}o-Simple, can facilitate the evaluation of speech simplification solutions.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ormaechea-etal-2024-simplification">
<titleInfo>
<title>Simplification Strategies in French Spontaneous Speech</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucía</namePart>
<namePart type="family">Ormaechea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nikos</namePart>
<namePart type="family">Tsourakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Didier</namePart>
<namePart type="family">Schwab</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pierrette</namePart>
<namePart type="family">Bouillon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Lecouteux</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on DeTermIt! Evaluating Text Difficulty in a Multilingual Context @ LREC-COLING 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Giorgio</namePart>
<namePart type="given">Maria</namePart>
<namePart type="given">Di</namePart>
<namePart type="family">Nunzio</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Federica</namePart>
<namePart type="family">Vezzani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liana</namePart>
<namePart type="family">Ermakova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hosein</namePart>
<namePart type="family">Azarbonyad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaap</namePart>
<namePart type="family">Kamps</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ELRA and ICCL</publisher>
<place>
<placeTerm type="text">Torino, Italia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Automatic Text Simplification (ATS) aims at rewriting texts into simpler variants while preserving their original meaning, so they can be more easily understood by different audiences. While ATS has been widely used for written texts, its application to spoken language remains unexplored, even if it is not exempt from difficulty. This study aims to characterize the edit operations performed in order to simplify French transcripts for non-native speakers. To do so, we relied on a data sample randomly extracted from the Orféo-CEFC French spontaneous speech dataset. In the absence of guidelines to direct this process, we adopted an intuitive simplification approach, so as to investigate the crafted simplifications based on expert linguists’ criteria, and to compare them with those produced by a generative AI (namely, ChatGPT). The results, analyzed quantitatively and qualitatively, reveal that the most common edits are deletions, and affect oral production aspects, like restarts or hesitations. Consequently, candidate simplifications are typically register-standardized sentences that solely include the propositional content of the input. The study also examines the alignment between human- and machine-based simplifications, revealing a moderate level of agreement, and highlighting the subjective nature of the task. The findings contribute to understanding the intricacies of simplifying spontaneous spoken language. In addition, the provision of a small-scale parallel dataset derived from such expert simplifications, Propicto-Orféo-Simple, can facilitate the evaluation of speech simplification solutions.</abstract>
<identifier type="citekey">ormaechea-etal-2024-simplification</identifier>
<location>
<url>https://aclanthology.org/2024.determit-1.9</url>
</location>
<part>
<date>2024-05</date>
<extent unit="page">
<start>90</start>
<end>102</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Simplification Strategies in French Spontaneous Speech
%A Ormaechea, Lucía
%A Tsourakis, Nikos
%A Schwab, Didier
%A Bouillon, Pierrette
%A Lecouteux, Benjamin
%Y Nunzio, Giorgio Maria Di
%Y Vezzani, Federica
%Y Ermakova, Liana
%Y Azarbonyad, Hosein
%Y Kamps, Jaap
%S Proceedings of the Workshop on DeTermIt! Evaluating Text Difficulty in a Multilingual Context @ LREC-COLING 2024
%D 2024
%8 May
%I ELRA and ICCL
%C Torino, Italia
%F ormaechea-etal-2024-simplification
%X Automatic Text Simplification (ATS) aims at rewriting texts into simpler variants while preserving their original meaning, so they can be more easily understood by different audiences. While ATS has been widely used for written texts, its application to spoken language remains unexplored, even if it is not exempt from difficulty. This study aims to characterize the edit operations performed in order to simplify French transcripts for non-native speakers. To do so, we relied on a data sample randomly extracted from the Orféo-CEFC French spontaneous speech dataset. In the absence of guidelines to direct this process, we adopted an intuitive simplification approach, so as to investigate the crafted simplifications based on expert linguists’ criteria, and to compare them with those produced by a generative AI (namely, ChatGPT). The results, analyzed quantitatively and qualitatively, reveal that the most common edits are deletions, and affect oral production aspects, like restarts or hesitations. Consequently, candidate simplifications are typically register-standardized sentences that solely include the propositional content of the input. The study also examines the alignment between human- and machine-based simplifications, revealing a moderate level of agreement, and highlighting the subjective nature of the task. The findings contribute to understanding the intricacies of simplifying spontaneous spoken language. In addition, the provision of a small-scale parallel dataset derived from such expert simplifications, Propicto-Orféo-Simple, can facilitate the evaluation of speech simplification solutions.
%U https://aclanthology.org/2024.determit-1.9
%P 90-102
Markdown (Informal)
[Simplification Strategies in French Spontaneous Speech](https://aclanthology.org/2024.determit-1.9) (Ormaechea et al., DeTermIt-WS 2024)
ACL
- Lucía Ormaechea, Nikos Tsourakis, Didier Schwab, Pierrette Bouillon, and Benjamin Lecouteux. 2024. Simplification Strategies in French Spontaneous Speech. In Proceedings of the Workshop on DeTermIt! Evaluating Text Difficulty in a Multilingual Context @ LREC-COLING 2024, pages 90–102, Torino, Italia. ELRA and ICCL.