@inproceedings{mcmurry-2026-quantifying,
title = "Quantifying Social Sentiment in Hostels Using A Domain-Specific Transformer Pipeline",
author = "McMurry, Ian W.",
editor = "Barnes, Jeremy and
Barriere, Valentin and
De Clercq, Orph{\'e}e and
Klinger, Roman and
Nouri, C{\'e}lia and
Nozza, Debora and
Singh, Pranaydeep",
booktitle = "The Proceedings for the 15th Workshop on Computational Approaches to Subjectivity, Sentiment Social Media Analysis ({WASSA} 2026)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.wassa-1.3/",
pages = "24--36",
ISBN = "979-8-89176-378-4",
abstract = "This paper presents a domain-specific transformer pipeline for quantifying social atmosphere in hostel reviews, an experiential dimension that travelers consistently prioritize but that existing NLP methods and booking platforms fail to capture. We train a cross-encoder on 4,994 manually annotated reviews and use it to pseudo-label 162,840 additional reviews; these labels are then distilled into a sentence-transformer bi-encoder, producing embeddings where proximity reflects social interaction level rather than generic sentiment. On held-out human-labeled data, the domain-adapted embeddings achieve F1 = 0.826, outperforming generic sentence embeddings (0.671) and zero-shot GPT-4o (0.774), with a 40-fold improvement in intra-class versus inter-class similarity. Aggregating predictions to the property level reveals that hostel socialness follows an approximate exponential distribution, confirming that highly social hostels are rare. This work formalizes socialness as a measurable semantic construct and provides a general template for extracting implicit experiential attributes from text at scale."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mcmurry-2026-quantifying">
<titleInfo>
<title>Quantifying Social Sentiment in Hostels Using A Domain-Specific Transformer Pipeline</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ian</namePart>
<namePart type="given">W</namePart>
<namePart type="family">McMurry</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>The Proceedings for the 15th Workshop on Computational Approaches to Subjectivity, Sentiment Social Media Analysis (WASSA 2026)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jeremy</namePart>
<namePart type="family">Barnes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Valentin</namePart>
<namePart type="family">Barriere</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Orphée</namePart>
<namePart type="family">De Clercq</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Klinger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Célia</namePart>
<namePart type="family">Nouri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Debora</namePart>
<namePart type="family">Nozza</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pranaydeep</namePart>
<namePart type="family">Singh</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-378-4</identifier>
</relatedItem>
<abstract>This paper presents a domain-specific transformer pipeline for quantifying social atmosphere in hostel reviews, an experiential dimension that travelers consistently prioritize but that existing NLP methods and booking platforms fail to capture. We train a cross-encoder on 4,994 manually annotated reviews and use it to pseudo-label 162,840 additional reviews; these labels are then distilled into a sentence-transformer bi-encoder, producing embeddings where proximity reflects social interaction level rather than generic sentiment. On held-out human-labeled data, the domain-adapted embeddings achieve F1 = 0.826, outperforming generic sentence embeddings (0.671) and zero-shot GPT-4o (0.774), with a 40-fold improvement in intra-class versus inter-class similarity. Aggregating predictions to the property level reveals that hostel socialness follows an approximate exponential distribution, confirming that highly social hostels are rare. This work formalizes socialness as a measurable semantic construct and provides a general template for extracting implicit experiential attributes from text at scale.</abstract>
<identifier type="citekey">mcmurry-2026-quantifying</identifier>
<location>
<url>https://aclanthology.org/2026.wassa-1.3/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>24</start>
<end>36</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Quantifying Social Sentiment in Hostels Using A Domain-Specific Transformer Pipeline
%A McMurry, Ian W.
%Y Barnes, Jeremy
%Y Barriere, Valentin
%Y De Clercq, Orphée
%Y Klinger, Roman
%Y Nouri, Célia
%Y Nozza, Debora
%Y Singh, Pranaydeep
%S The Proceedings for the 15th Workshop on Computational Approaches to Subjectivity, Sentiment Social Media Analysis (WASSA 2026)
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-378-4
%F mcmurry-2026-quantifying
%X This paper presents a domain-specific transformer pipeline for quantifying social atmosphere in hostel reviews, an experiential dimension that travelers consistently prioritize but that existing NLP methods and booking platforms fail to capture. We train a cross-encoder on 4,994 manually annotated reviews and use it to pseudo-label 162,840 additional reviews; these labels are then distilled into a sentence-transformer bi-encoder, producing embeddings where proximity reflects social interaction level rather than generic sentiment. On held-out human-labeled data, the domain-adapted embeddings achieve F1 = 0.826, outperforming generic sentence embeddings (0.671) and zero-shot GPT-4o (0.774), with a 40-fold improvement in intra-class versus inter-class similarity. Aggregating predictions to the property level reveals that hostel socialness follows an approximate exponential distribution, confirming that highly social hostels are rare. This work formalizes socialness as a measurable semantic construct and provides a general template for extracting implicit experiential attributes from text at scale.
%U https://aclanthology.org/2026.wassa-1.3/
%P 24-36
Markdown (Informal)
[Quantifying Social Sentiment in Hostels Using A Domain-Specific Transformer Pipeline](https://aclanthology.org/2026.wassa-1.3/) (McMurry, WASSA 2026)
ACL