@inproceedings{alsagheer-etal-2025-lawyer,
title = "The Lawyer That Never Thinks: Consistency and Fairness as Keys to Reliable {AI}",
author = "Alsagheer, Dana R and
Kamal, Abdulrahman and
Kamal, Mohammad and
Wu, Cosmo Yang and
Shi, Weidong",
editor = "Che, Wanxiang and
Nabende, Joyce and
Shutova, Ekaterina and
Pilehvar, Mohammad Taher",
booktitle = "Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.acl-long.491/",
doi = "10.18653/v1/2025.acl-long.491",
pages = "9943--9954",
ISBN = "979-8-89176-251-0",
abstract = "Large Language Models (LLMs) are increasingly used in high-stakes domains like law and research, yet their inconsistencies and response instability raise concerns about trustworthiness. This study evaluates six leading LLMs{---}GPT-3.5, GPT-4, Claude, Gemini, Mistral, and LLaMA 2{---}on rationality, stability, and ethical fairness through reasoning tests, legal challenges, and bias-sensitive scenarios. Results reveal significant inconsistencies, highlighting trade-offs between model scale, architecture, and logical coherence. These findings underscore the risks of deploying LLMs in legal and policy settings, emphasizing the need for AI systems that prioritize transparency, fairness, and ethical robustness."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alsagheer-etal-2025-lawyer">
<titleInfo>
<title>The Lawyer That Never Thinks: Consistency and Fairness as Keys to Reliable AI</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dana</namePart>
<namePart type="given">R</namePart>
<namePart type="family">Alsagheer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abdulrahman</namePart>
<namePart type="family">Kamal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="family">Kamal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cosmo</namePart>
<namePart type="given">Yang</namePart>
<namePart type="family">Wu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Weidong</namePart>
<namePart type="family">Shi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joyce</namePart>
<namePart type="family">Nabende</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohammad</namePart>
<namePart type="given">Taher</namePart>
<namePart type="family">Pilehvar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vienna, Austria</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-251-0</identifier>
</relatedItem>
<abstract>Large Language Models (LLMs) are increasingly used in high-stakes domains like law and research, yet their inconsistencies and response instability raise concerns about trustworthiness. This study evaluates six leading LLMs—GPT-3.5, GPT-4, Claude, Gemini, Mistral, and LLaMA 2—on rationality, stability, and ethical fairness through reasoning tests, legal challenges, and bias-sensitive scenarios. Results reveal significant inconsistencies, highlighting trade-offs between model scale, architecture, and logical coherence. These findings underscore the risks of deploying LLMs in legal and policy settings, emphasizing the need for AI systems that prioritize transparency, fairness, and ethical robustness.</abstract>
<identifier type="citekey">alsagheer-etal-2025-lawyer</identifier>
<identifier type="doi">10.18653/v1/2025.acl-long.491</identifier>
<location>
<url>https://aclanthology.org/2025.acl-long.491/</url>
</location>
<part>
<date>2025-07</date>
<extent unit="page">
<start>9943</start>
<end>9954</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T The Lawyer That Never Thinks: Consistency and Fairness as Keys to Reliable AI
%A Alsagheer, Dana R.
%A Kamal, Abdulrahman
%A Kamal, Mohammad
%A Wu, Cosmo Yang
%A Shi, Weidong
%Y Che, Wanxiang
%Y Nabende, Joyce
%Y Shutova, Ekaterina
%Y Pilehvar, Mohammad Taher
%S Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)
%D 2025
%8 July
%I Association for Computational Linguistics
%C Vienna, Austria
%@ 979-8-89176-251-0
%F alsagheer-etal-2025-lawyer
%X Large Language Models (LLMs) are increasingly used in high-stakes domains like law and research, yet their inconsistencies and response instability raise concerns about trustworthiness. This study evaluates six leading LLMs—GPT-3.5, GPT-4, Claude, Gemini, Mistral, and LLaMA 2—on rationality, stability, and ethical fairness through reasoning tests, legal challenges, and bias-sensitive scenarios. Results reveal significant inconsistencies, highlighting trade-offs between model scale, architecture, and logical coherence. These findings underscore the risks of deploying LLMs in legal and policy settings, emphasizing the need for AI systems that prioritize transparency, fairness, and ethical robustness.
%R 10.18653/v1/2025.acl-long.491
%U https://aclanthology.org/2025.acl-long.491/
%U https://doi.org/10.18653/v1/2025.acl-long.491
%P 9943-9954
Markdown (Informal)
[The Lawyer That Never Thinks: Consistency and Fairness as Keys to Reliable AI](https://aclanthology.org/2025.acl-long.491/) (Alsagheer et al., ACL 2025)
ACL