@inproceedings{mcgrath-etal-2025-dataset,
title = "A Dataset and Benchmark on Extraction of Novel Concepts on Trust in {AI} from Scientific Literature",
author = {McGrath, Melanie and
Bailey, Harrison and
B{\"o}l{\"u}c{\"u}, Necva and
Dai, Xiang and
Karimi, Sarvnaz and
Duenser, Andreas and
Paris, C{\'e}cile},
editor = "Kummerfeld, Jonathan K. and
Joshi, Aditya and
Dras, Mark",
booktitle = "Proceedings of the 23rd Annual Workshop of the Australasian Language Technology Association",
month = nov,
year = "2025",
address = "Sydney, Australia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.alta-main.11/",
pages = "159--175",
ISBN = "1834-7037",
abstract = "This study investigates the extent to which linguistic typology influences the performance of two automatic speech recognition (ASR) systems across diverse language families. Using the FLEURS corpus and typological features from the World Atlas of Language Structures (WALS), we analysed 40 languages grouped by phonological, morphological, syntactic, and semantic domains. We evaluated two state-of-the-art multilingual ASR systems, Whisper and Seamless, to examine how their performance, measured by word error rate (WER), correlates with linguistic structures. Random Forests and Mixed Effects Models were used to quantify feature impact and statistical significance. Results reveal that while both systems leverage typological patterns, they differ in their sensitivity to specific domains. Our findings highlight how structural and functional linguistic features shape ASR performance, offering insights into model generalisability and typology-aware system development."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mcgrath-etal-2025-dataset">
<titleInfo>
<title>A Dataset and Benchmark on Extraction of Novel Concepts on Trust in AI from Scientific Literature</title>
</titleInfo>
<name type="personal">
<namePart type="given">Melanie</namePart>
<namePart type="family">McGrath</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Harrison</namePart>
<namePart type="family">Bailey</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Necva</namePart>
<namePart type="family">Bölücü</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xiang</namePart>
<namePart type="family">Dai</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarvnaz</namePart>
<namePart type="family">Karimi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Duenser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cécile</namePart>
<namePart type="family">Paris</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 23rd Annual Workshop of the Australasian Language Technology Association</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jonathan</namePart>
<namePart type="given">K</namePart>
<namePart type="family">Kummerfeld</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aditya</namePart>
<namePart type="family">Joshi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Dras</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Sydney, Australia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">1834-7037</identifier>
</relatedItem>
<abstract>This study investigates the extent to which linguistic typology influences the performance of two automatic speech recognition (ASR) systems across diverse language families. Using the FLEURS corpus and typological features from the World Atlas of Language Structures (WALS), we analysed 40 languages grouped by phonological, morphological, syntactic, and semantic domains. We evaluated two state-of-the-art multilingual ASR systems, Whisper and Seamless, to examine how their performance, measured by word error rate (WER), correlates with linguistic structures. Random Forests and Mixed Effects Models were used to quantify feature impact and statistical significance. Results reveal that while both systems leverage typological patterns, they differ in their sensitivity to specific domains. Our findings highlight how structural and functional linguistic features shape ASR performance, offering insights into model generalisability and typology-aware system development.</abstract>
<identifier type="citekey">mcgrath-etal-2025-dataset</identifier>
<location>
<url>https://aclanthology.org/2025.alta-main.11/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>159</start>
<end>175</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Dataset and Benchmark on Extraction of Novel Concepts on Trust in AI from Scientific Literature
%A McGrath, Melanie
%A Bailey, Harrison
%A Bölücü, Necva
%A Dai, Xiang
%A Karimi, Sarvnaz
%A Duenser, Andreas
%A Paris, Cécile
%Y Kummerfeld, Jonathan K.
%Y Joshi, Aditya
%Y Dras, Mark
%S Proceedings of the 23rd Annual Workshop of the Australasian Language Technology Association
%D 2025
%8 November
%I Association for Computational Linguistics
%C Sydney, Australia
%@ 1834-7037
%F mcgrath-etal-2025-dataset
%X This study investigates the extent to which linguistic typology influences the performance of two automatic speech recognition (ASR) systems across diverse language families. Using the FLEURS corpus and typological features from the World Atlas of Language Structures (WALS), we analysed 40 languages grouped by phonological, morphological, syntactic, and semantic domains. We evaluated two state-of-the-art multilingual ASR systems, Whisper and Seamless, to examine how their performance, measured by word error rate (WER), correlates with linguistic structures. Random Forests and Mixed Effects Models were used to quantify feature impact and statistical significance. Results reveal that while both systems leverage typological patterns, they differ in their sensitivity to specific domains. Our findings highlight how structural and functional linguistic features shape ASR performance, offering insights into model generalisability and typology-aware system development.
%U https://aclanthology.org/2025.alta-main.11/
%P 159-175
Markdown (Informal)
[A Dataset and Benchmark on Extraction of Novel Concepts on Trust in AI from Scientific Literature](https://aclanthology.org/2025.alta-main.11/) (McGrath et al., ALTA 2025)
ACL