@inproceedings{laato-etal-2026-measuring-social,
title = "Measuring Social Integration Through Participation: Categorizing Organizations and Leisure Activities in the Displaced Karelians Interview Archive using {LLM}s",
author = "Laato, Joonatan and
Schroderus, Veera and
Kanerva, Jenna and
Kauppi, Jenni and
Lummaa, Virpi and
Ginter, Filip",
editor = "Alves, Diego and
Bizzoni, Yuri and
Degaetano-Ortlieb, Stefania and
Kazantseva, Anna and
Pagel, Janis and
Szpakowicz, Stan",
booktitle = "Proceedings of the 10th Joint {SIGHUM} Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2026.latechclfl-1.11/",
pages = "111--127",
ISBN = "979-8-89176-373-9",
abstract = "We study how to better use digitized historical archives to answer sociological and historical questions that require more context than raw text mentions provide. Using Finnish World War II Karelian evacuee family interviews, we build on prior extraction of 350K mentions of leisure activities and organizational memberships (71K unique names) that are too diverse and unstructured to analyze directly. We introduce a categorization framework capturing key dimensions of participation: type of activity/organization, typical sociality, regularity, and the level of physical demand. After creating a gold-standard annotated set, we evaluate whether large language models can apply the schema at scale and find that an open-weight LLM, combined with simple multi-run voting, closely matches expert judgments. We then label all 350K entities to produce a structured resource for downstream analyses of social integration and related outcomes."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="laato-etal-2026-measuring-social">
<titleInfo>
<title>Measuring Social Integration Through Participation: Categorizing Organizations and Leisure Activities in the Displaced Karelians Interview Archive using LLMs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Joonatan</namePart>
<namePart type="family">Laato</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Veera</namePart>
<namePart type="family">Schroderus</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jenna</namePart>
<namePart type="family">Kanerva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jenni</namePart>
<namePart type="family">Kauppi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Virpi</namePart>
<namePart type="family">Lummaa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Filip</namePart>
<namePart type="family">Ginter</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2026-03</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 10th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026</title>
</titleInfo>
<name type="personal">
<namePart type="given">Diego</namePart>
<namePart type="family">Alves</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuri</namePart>
<namePart type="family">Bizzoni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefania</namePart>
<namePart type="family">Degaetano-Ortlieb</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Kazantseva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Janis</namePart>
<namePart type="family">Pagel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stan</namePart>
<namePart type="family">Szpakowicz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Rabat, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-8-89176-373-9</identifier>
</relatedItem>
<abstract>We study how to better use digitized historical archives to answer sociological and historical questions that require more context than raw text mentions provide. Using Finnish World War II Karelian evacuee family interviews, we build on prior extraction of 350K mentions of leisure activities and organizational memberships (71K unique names) that are too diverse and unstructured to analyze directly. We introduce a categorization framework capturing key dimensions of participation: type of activity/organization, typical sociality, regularity, and the level of physical demand. After creating a gold-standard annotated set, we evaluate whether large language models can apply the schema at scale and find that an open-weight LLM, combined with simple multi-run voting, closely matches expert judgments. We then label all 350K entities to produce a structured resource for downstream analyses of social integration and related outcomes.</abstract>
<identifier type="citekey">laato-etal-2026-measuring-social</identifier>
<location>
<url>https://aclanthology.org/2026.latechclfl-1.11/</url>
</location>
<part>
<date>2026-03</date>
<extent unit="page">
<start>111</start>
<end>127</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Measuring Social Integration Through Participation: Categorizing Organizations and Leisure Activities in the Displaced Karelians Interview Archive using LLMs
%A Laato, Joonatan
%A Schroderus, Veera
%A Kanerva, Jenna
%A Kauppi, Jenni
%A Lummaa, Virpi
%A Ginter, Filip
%Y Alves, Diego
%Y Bizzoni, Yuri
%Y Degaetano-Ortlieb, Stefania
%Y Kazantseva, Anna
%Y Pagel, Janis
%Y Szpakowicz, Stan
%S Proceedings of the 10th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026
%D 2026
%8 March
%I Association for Computational Linguistics
%C Rabat, Morocco
%@ 979-8-89176-373-9
%F laato-etal-2026-measuring-social
%X We study how to better use digitized historical archives to answer sociological and historical questions that require more context than raw text mentions provide. Using Finnish World War II Karelian evacuee family interviews, we build on prior extraction of 350K mentions of leisure activities and organizational memberships (71K unique names) that are too diverse and unstructured to analyze directly. We introduce a categorization framework capturing key dimensions of participation: type of activity/organization, typical sociality, regularity, and the level of physical demand. After creating a gold-standard annotated set, we evaluate whether large language models can apply the schema at scale and find that an open-weight LLM, combined with simple multi-run voting, closely matches expert judgments. We then label all 350K entities to produce a structured resource for downstream analyses of social integration and related outcomes.
%U https://aclanthology.org/2026.latechclfl-1.11/
%P 111-127
Markdown (Informal)
[Measuring Social Integration Through Participation: Categorizing Organizations and Leisure Activities in the Displaced Karelians Interview Archive using LLMs](https://aclanthology.org/2026.latechclfl-1.11/) (Laato et al., LaTeCH-CLfL 2026)
ACL
- Joonatan Laato, Veera Schroderus, Jenna Kanerva, Jenni Kauppi, Virpi Lummaa, and Filip Ginter. 2026. Measuring Social Integration Through Participation: Categorizing Organizations and Leisure Activities in the Displaced Karelians Interview Archive using LLMs. In Proceedings of the 10th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature 2026, pages 111–127, Rabat, Morocco. Association for Computational Linguistics.