@inproceedings{he-etal-2024-community,
title = "Community-Cross-Instruct: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities",
author = "He, Zihao and
Chu, Minh Duc and
Dorn, Rebecca and
Guo, Siyi and
Lerman, Kristina",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.emnlp-main.945/",
doi = "10.18653/v1/2024.emnlp-main.945",
pages = "17001--17019",
abstract = "Social scientists use surveys to probe the opinions and beliefs of populations, but these methods are slow, costly, and prone to biases. Recent advances in large language models (LLMs) enable the creating of computational representations or {\textquotedblleft}digital twins{\textquotedblright} of populations that generate human-like responses mimicking the population`s language, styles, and attitudes. We introduce Community-Cross-Instruct, an unsupervised framework for aligning LLMs to online communities to elicit their beliefs. Given a corpus of a community`s online discussions, Community-Cross-Instruct automatically generates instruction-output pairs by an advanced LLM to (1) finetune a foundational LLM to faithfully represent that community, and (2) evaluate the alignment of the finetuned model to the community. We demonstrate the method`s utility in accurately representing political and diet communities on Reddit. Unlike prior methods requiring human-authored instructions, Community-Cross-Instruct generates instructions in a fully unsupervised manner, enhancing scalability and generalization across domains. This work enables cost-effective and automated surveying of diverse online communities."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="he-etal-2024-community">
<titleInfo>
<title>Community-Cross-Instruct: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities</title>
</titleInfo>
<name type="personal">
<namePart type="given">Zihao</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Minh</namePart>
<namePart type="given">Duc</namePart>
<namePart type="family">Chu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rebecca</namePart>
<namePart type="family">Dorn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Siyi</namePart>
<namePart type="family">Guo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kristina</namePart>
<namePart type="family">Lerman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social scientists use surveys to probe the opinions and beliefs of populations, but these methods are slow, costly, and prone to biases. Recent advances in large language models (LLMs) enable the creating of computational representations or “digital twins” of populations that generate human-like responses mimicking the population‘s language, styles, and attitudes. We introduce Community-Cross-Instruct, an unsupervised framework for aligning LLMs to online communities to elicit their beliefs. Given a corpus of a community‘s online discussions, Community-Cross-Instruct automatically generates instruction-output pairs by an advanced LLM to (1) finetune a foundational LLM to faithfully represent that community, and (2) evaluate the alignment of the finetuned model to the community. We demonstrate the method‘s utility in accurately representing political and diet communities on Reddit. Unlike prior methods requiring human-authored instructions, Community-Cross-Instruct generates instructions in a fully unsupervised manner, enhancing scalability and generalization across domains. This work enables cost-effective and automated surveying of diverse online communities.</abstract>
<identifier type="citekey">he-etal-2024-community</identifier>
<identifier type="doi">10.18653/v1/2024.emnlp-main.945</identifier>
<location>
<url>https://aclanthology.org/2024.emnlp-main.945/</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>17001</start>
<end>17019</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Community-Cross-Instruct: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities
%A He, Zihao
%A Chu, Minh Duc
%A Dorn, Rebecca
%A Guo, Siyi
%A Lerman, Kristina
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F he-etal-2024-community
%X Social scientists use surveys to probe the opinions and beliefs of populations, but these methods are slow, costly, and prone to biases. Recent advances in large language models (LLMs) enable the creating of computational representations or “digital twins” of populations that generate human-like responses mimicking the population‘s language, styles, and attitudes. We introduce Community-Cross-Instruct, an unsupervised framework for aligning LLMs to online communities to elicit their beliefs. Given a corpus of a community‘s online discussions, Community-Cross-Instruct automatically generates instruction-output pairs by an advanced LLM to (1) finetune a foundational LLM to faithfully represent that community, and (2) evaluate the alignment of the finetuned model to the community. We demonstrate the method‘s utility in accurately representing political and diet communities on Reddit. Unlike prior methods requiring human-authored instructions, Community-Cross-Instruct generates instructions in a fully unsupervised manner, enhancing scalability and generalization across domains. This work enables cost-effective and automated surveying of diverse online communities.
%R 10.18653/v1/2024.emnlp-main.945
%U https://aclanthology.org/2024.emnlp-main.945/
%U https://doi.org/10.18653/v1/2024.emnlp-main.945
%P 17001-17019
Markdown (Informal)
[Community-Cross-Instruct: Unsupervised Instruction Generation for Aligning Large Language Models to Online Communities](https://aclanthology.org/2024.emnlp-main.945/) (He et al., EMNLP 2024)
ACL