@inproceedings{calvo-bartolome-etal-2025-discrepancy,
title = "Discrepancy Detection at the Data Level: Toward Consistent Multilingual Question Answering",
author = "Calvo-Bartolom{\'e}, Lorena and
Aldana, Val{\'e}rie and
Cantarero, Karla and
de Mesa, Alonso Madro{\~n}al and
Arenas-Garc{\'i}a, Jer{\'o}nimo and
Boyd-Graber, Jordan Lee",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.emnlp-main.1120/",
doi = "10.18653/v1/2025.emnlp-main.1120",
pages = "22013--22054",
ISBN = "979-8-89176-332-6",
abstract = "Multilingual question answering (QA) systems must ensure factual consistency across languages, especially for objective queries such as What is jaundice?, while also accounting for cultural variation in subjective responses. We propose MIND, a user-in-the-loop fact-checking pipeline to detect factual and cultural discrepancies in multilingual QA knowledge bases. MIND highlights divergent answers to culturally sensitive questions (e.g., Who assists in childbirth?) that vary by region and context. We evaluate MIND on a bilingual QA system in the maternal and infant health domain and release a dataset of bilingual questions annotated for factual and cultural inconsistencies. We further test MIND on datasets from other domains to assess generalization. In all cases, MIND reliably identifies inconsistencies, supporting the development of more culturally aware and factually consistent QA systems."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="calvo-bartolome-etal-2025-discrepancy">
<titleInfo>
<title>Discrepancy Detection at the Data Level: Toward Consistent Multilingual Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lorena</namePart>
<namePart type="family">Calvo-Bartolomé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valérie</namePart>
<namePart type="family">Aldana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karla</namePart>
<namePart type="family">Cantarero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alonso</namePart>
<namePart type="given">Madroñal</namePart>
<namePart type="family">de Mesa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jerónimo</namePart>
<namePart type="family">Arenas-García</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="given">Lee</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tanmoy</namePart>
<namePart type="family">Chakraborty</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carolyn</namePart>
<namePart type="family">Rose</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Violet</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Suzhou, China</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-332-6</identifier>
</relatedItem>
<abstract>Multilingual question answering (QA) systems must ensure factual consistency across languages, especially for objective queries such as What is jaundice?, while also accounting for cultural variation in subjective responses. We propose MIND, a user-in-the-loop fact-checking pipeline to detect factual and cultural discrepancies in multilingual QA knowledge bases. MIND highlights divergent answers to culturally sensitive questions (e.g., Who assists in childbirth?) that vary by region and context. We evaluate MIND on a bilingual QA system in the maternal and infant health domain and release a dataset of bilingual questions annotated for factual and cultural inconsistencies. We further test MIND on datasets from other domains to assess generalization. In all cases, MIND reliably identifies inconsistencies, supporting the development of more culturally aware and factually consistent QA systems.</abstract>
<identifier type="citekey">calvo-bartolome-etal-2025-discrepancy</identifier>
<identifier type="doi">10.18653/v1/2025.emnlp-main.1120</identifier>
<location>
<url>https://aclanthology.org/2025.emnlp-main.1120/</url>
</location>
<part>
<date>2025-11</date>
<extent unit="page">
<start>22013</start>
<end>22054</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Discrepancy Detection at the Data Level: Toward Consistent Multilingual Question Answering
%A Calvo-Bartolomé, Lorena
%A Aldana, Valérie
%A Cantarero, Karla
%A de Mesa, Alonso Madroñal
%A Arenas-García, Jerónimo
%A Boyd-Graber, Jordan Lee
%Y Christodoulopoulos, Christos
%Y Chakraborty, Tanmoy
%Y Rose, Carolyn
%Y Peng, Violet
%S Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing
%D 2025
%8 November
%I Association for Computational Linguistics
%C Suzhou, China
%@ 979-8-89176-332-6
%F calvo-bartolome-etal-2025-discrepancy
%X Multilingual question answering (QA) systems must ensure factual consistency across languages, especially for objective queries such as What is jaundice?, while also accounting for cultural variation in subjective responses. We propose MIND, a user-in-the-loop fact-checking pipeline to detect factual and cultural discrepancies in multilingual QA knowledge bases. MIND highlights divergent answers to culturally sensitive questions (e.g., Who assists in childbirth?) that vary by region and context. We evaluate MIND on a bilingual QA system in the maternal and infant health domain and release a dataset of bilingual questions annotated for factual and cultural inconsistencies. We further test MIND on datasets from other domains to assess generalization. In all cases, MIND reliably identifies inconsistencies, supporting the development of more culturally aware and factually consistent QA systems.
%R 10.18653/v1/2025.emnlp-main.1120
%U https://aclanthology.org/2025.emnlp-main.1120/
%U https://doi.org/10.18653/v1/2025.emnlp-main.1120
%P 22013-22054
Markdown (Informal)
[Discrepancy Detection at the Data Level: Toward Consistent Multilingual Question Answering](https://aclanthology.org/2025.emnlp-main.1120/) (Calvo-Bartolomé et al., EMNLP 2025)
ACL