@inproceedings{kahl-etal-2024-llms,
title = "{LLM}s Cannot (Yet) Match the Specificity and Simplicity of Online Communities in Long Form Question Answering",
author = "Kahl, Kris-Fillip and
Buz, Tolga and
Biswas, Russa and
De Melo, Gerard",
editor = "Al-Onaizan, Yaser and
Bansal, Mohit and
Chen, Yun-Nung",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2024",
month = nov,
year = "2024",
address = "Miami, Florida, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.findings-emnlp.111",
doi = "10.18653/v1/2024.findings-emnlp.111",
pages = "2028--2053",
abstract = "Retail investing is on the rise, and a growing number of users is relying on online finance communities to educate themselves.However, recent years have positioned Large Language Models (LLMs) as powerful question answering (QA) tools, shifting users away from interacting in communities towards discourse with AI-driven conversational interfaces.These AI tools are currently limited by the availability of labelled data containing domain-specific financial knowledge.Therefore, in this work, we curate a QA preference dataset SocialFinanceQA for fine-tuning and aligning LLMs, extracted from more than 7.4 million submissions and 82 million comments from 2008 to 2022 in Reddit{'}s 15 largest finance communities. Additionally, we propose a novel framework called SocialQA-Eval as a generally-applicable method to evaluate generated QA responses.We evaluate various LLMs fine-tuned on this dataset, using traditional metrics, LLM-based evaluation, and human annotation. Our results demonstrate the value of high-quality Reddit data, with even state-of-the-art LLMs improving on producing simpler and more specific responses.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kahl-etal-2024-llms">
<titleInfo>
<title>LLMs Cannot (Yet) Match the Specificity and Simplicity of Online Communities in Long Form Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kris-Fillip</namePart>
<namePart type="family">Kahl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tolga</namePart>
<namePart type="family">Buz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Russa</namePart>
<namePart type="family">Biswas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Gerard</namePart>
<namePart type="family">De Melo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2024</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yaser</namePart>
<namePart type="family">Al-Onaizan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohit</namePart>
<namePart type="family">Bansal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yun-Nung</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Miami, Florida, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Retail investing is on the rise, and a growing number of users is relying on online finance communities to educate themselves.However, recent years have positioned Large Language Models (LLMs) as powerful question answering (QA) tools, shifting users away from interacting in communities towards discourse with AI-driven conversational interfaces.These AI tools are currently limited by the availability of labelled data containing domain-specific financial knowledge.Therefore, in this work, we curate a QA preference dataset SocialFinanceQA for fine-tuning and aligning LLMs, extracted from more than 7.4 million submissions and 82 million comments from 2008 to 2022 in Reddit’s 15 largest finance communities. Additionally, we propose a novel framework called SocialQA-Eval as a generally-applicable method to evaluate generated QA responses.We evaluate various LLMs fine-tuned on this dataset, using traditional metrics, LLM-based evaluation, and human annotation. Our results demonstrate the value of high-quality Reddit data, with even state-of-the-art LLMs improving on producing simpler and more specific responses.</abstract>
<identifier type="citekey">kahl-etal-2024-llms</identifier>
<identifier type="doi">10.18653/v1/2024.findings-emnlp.111</identifier>
<location>
<url>https://aclanthology.org/2024.findings-emnlp.111</url>
</location>
<part>
<date>2024-11</date>
<extent unit="page">
<start>2028</start>
<end>2053</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T LLMs Cannot (Yet) Match the Specificity and Simplicity of Online Communities in Long Form Question Answering
%A Kahl, Kris-Fillip
%A Buz, Tolga
%A Biswas, Russa
%A De Melo, Gerard
%Y Al-Onaizan, Yaser
%Y Bansal, Mohit
%Y Chen, Yun-Nung
%S Findings of the Association for Computational Linguistics: EMNLP 2024
%D 2024
%8 November
%I Association for Computational Linguistics
%C Miami, Florida, USA
%F kahl-etal-2024-llms
%X Retail investing is on the rise, and a growing number of users is relying on online finance communities to educate themselves.However, recent years have positioned Large Language Models (LLMs) as powerful question answering (QA) tools, shifting users away from interacting in communities towards discourse with AI-driven conversational interfaces.These AI tools are currently limited by the availability of labelled data containing domain-specific financial knowledge.Therefore, in this work, we curate a QA preference dataset SocialFinanceQA for fine-tuning and aligning LLMs, extracted from more than 7.4 million submissions and 82 million comments from 2008 to 2022 in Reddit’s 15 largest finance communities. Additionally, we propose a novel framework called SocialQA-Eval as a generally-applicable method to evaluate generated QA responses.We evaluate various LLMs fine-tuned on this dataset, using traditional metrics, LLM-based evaluation, and human annotation. Our results demonstrate the value of high-quality Reddit data, with even state-of-the-art LLMs improving on producing simpler and more specific responses.
%R 10.18653/v1/2024.findings-emnlp.111
%U https://aclanthology.org/2024.findings-emnlp.111
%U https://doi.org/10.18653/v1/2024.findings-emnlp.111
%P 2028-2053
Markdown (Informal)
[LLMs Cannot (Yet) Match the Specificity and Simplicity of Online Communities in Long Form Question Answering](https://aclanthology.org/2024.findings-emnlp.111) (Kahl et al., Findings 2024)
ACL