@inproceedings{trusca-allein-2025-mimicking,
title = "Mimicking How Humans Interpret Out-of-Context Sentences Through Controlled Toxicity Decoding",
author = "Trusca, Maria Mihaela and
Allein, Liesbeth",
editor = "Cao, Trista and
Das, Anubrata and
Kumarage, Tharindu and
Wan, Yixin and
Krishna, Satyapriya and
Mehrabi, Ninareh and
Dhamala, Jwala and
Ramakrishna, Anil and
Galystan, Aram and
Kumar, Anoop and
Gupta, Rahul and
Chang, Kai-Wei",
booktitle = "Proceedings of the 5th Workshop on Trustworthy NLP (TrustNLP 2025)",
month = may,
year = "2025",
address = "Albuquerque, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2025.trustnlp-main.19/",
doi = "10.18653/v1/2025.trustnlp-main.19",
pages = "291--297",
ISBN = "979-8-89176-233-6",
abstract = "Interpretations of a single sentence can vary, particularly when its context is lost. This paper aims to simulate how readers perceive content with varying toxicity levels by generating diverse interpretations of out-of-context sentences. By modeling toxicity we can anticipate misunderstandings and reveal hidden toxic meanings. Our proposed decoding strategy explicitly controls toxicity in the set of generated interpretations by (i) aligning interpretation toxicity with the input, (ii) relaxing toxicity constraints for more toxic input sentences, and (iii) promoting diversity in toxicity levels within the set of generated interpretations. Experimental results show that our method improves alignment with human-written interpretations in both syntax and semantics while reducing model prediction uncertainty."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="trusca-allein-2025-mimicking">
<titleInfo>
<title>Mimicking How Humans Interpret Out-of-Context Sentences Through Controlled Toxicity Decoding</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Mihaela</namePart>
<namePart type="family">Trusca</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liesbeth</namePart>
<namePart type="family">Allein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2025-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 5th Workshop on Trustworthy NLP (TrustNLP 2025)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Trista</namePart>
<namePart type="family">Cao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anubrata</namePart>
<namePart type="family">Das</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tharindu</namePart>
<namePart type="family">Kumarage</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yixin</namePart>
<namePart type="family">Wan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Satyapriya</namePart>
<namePart type="family">Krishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ninareh</namePart>
<namePart type="family">Mehrabi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jwala</namePart>
<namePart type="family">Dhamala</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anil</namePart>
<namePart type="family">Ramakrishna</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Aram</namePart>
<namePart type="family">Galystan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anoop</namePart>
<namePart type="family">Kumar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rahul</namePart>
<namePart type="family">Gupta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kai-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Albuquerque, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-233-6</identifier>
</relatedItem>
<abstract>Interpretations of a single sentence can vary, particularly when its context is lost. This paper aims to simulate how readers perceive content with varying toxicity levels by generating diverse interpretations of out-of-context sentences. By modeling toxicity we can anticipate misunderstandings and reveal hidden toxic meanings. Our proposed decoding strategy explicitly controls toxicity in the set of generated interpretations by (i) aligning interpretation toxicity with the input, (ii) relaxing toxicity constraints for more toxic input sentences, and (iii) promoting diversity in toxicity levels within the set of generated interpretations. Experimental results show that our method improves alignment with human-written interpretations in both syntax and semantics while reducing model prediction uncertainty.</abstract>
<identifier type="citekey">trusca-allein-2025-mimicking</identifier>
<identifier type="doi">10.18653/v1/2025.trustnlp-main.19</identifier>
<location>
<url>https://aclanthology.org/2025.trustnlp-main.19/</url>
</location>
<part>
<date>2025-05</date>
<extent unit="page">
<start>291</start>
<end>297</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mimicking How Humans Interpret Out-of-Context Sentences Through Controlled Toxicity Decoding
%A Trusca, Maria Mihaela
%A Allein, Liesbeth
%Y Cao, Trista
%Y Das, Anubrata
%Y Kumarage, Tharindu
%Y Wan, Yixin
%Y Krishna, Satyapriya
%Y Mehrabi, Ninareh
%Y Dhamala, Jwala
%Y Ramakrishna, Anil
%Y Galystan, Aram
%Y Kumar, Anoop
%Y Gupta, Rahul
%Y Chang, Kai-Wei
%S Proceedings of the 5th Workshop on Trustworthy NLP (TrustNLP 2025)
%D 2025
%8 May
%I Association for Computational Linguistics
%C Albuquerque, New Mexico
%@ 979-8-89176-233-6
%F trusca-allein-2025-mimicking
%X Interpretations of a single sentence can vary, particularly when its context is lost. This paper aims to simulate how readers perceive content with varying toxicity levels by generating diverse interpretations of out-of-context sentences. By modeling toxicity we can anticipate misunderstandings and reveal hidden toxic meanings. Our proposed decoding strategy explicitly controls toxicity in the set of generated interpretations by (i) aligning interpretation toxicity with the input, (ii) relaxing toxicity constraints for more toxic input sentences, and (iii) promoting diversity in toxicity levels within the set of generated interpretations. Experimental results show that our method improves alignment with human-written interpretations in both syntax and semantics while reducing model prediction uncertainty.
%R 10.18653/v1/2025.trustnlp-main.19
%U https://aclanthology.org/2025.trustnlp-main.19/
%U https://doi.org/10.18653/v1/2025.trustnlp-main.19
%P 291-297
Markdown (Informal)
[Mimicking How Humans Interpret Out-of-Context Sentences Through Controlled Toxicity Decoding](https://aclanthology.org/2025.trustnlp-main.19/) (Trusca & Allein, TrustNLP 2025)
ACL