@inproceedings{wen-etal-2023-towards,
title = "Towards Open-Domain {T}witter User Profile Inference",
author = "Wen, Haoyang and
Xiao, Zhenxin and
Hovy, Eduard and
Hauptmann, Alexander",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.198",
doi = "10.18653/v1/2023.findings-acl.198",
pages = "3172--3188",
abstract = "Twitter user profile inference utilizes information from Twitter to predict user attributes (e.g., occupation, location), which is controversial because of its usefulness for downstream applications and its potential to reveal users{'} privacy. Therefore, it is important for researchers to determine the extent of profiling in a safe environment to facilitate proper use and make the public aware of the potential risks. Contrary to existing approaches on limited attributes, we explore open-domain Twitter user profile inference. We conduct a case study where we collect publicly available WikiData public figure profiles and use diverse WikiData predicates for profile inference. After removing sensitive attributes, our data contains over 150K public figure profiles from WikiData, over 50 different attribute predicates, and over 700K attribute values. We further propose a prompt-based generation method, which can infer values that are implicitly mentioned in the Twitter information. Experimental results show that the generation-based approach can infer more comprehensive user profiles than baseline extraction-based methods, but limitations still remain to be applied for real-world use. We also enclose a detailed ethical statement for our data, potential benefits and risks from this work, and our efforts to mitigate the risks.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wen-etal-2023-towards">
<titleInfo>
<title>Towards Open-Domain Twitter User Profile Inference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Haoyang</namePart>
<namePart type="family">Wen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zhenxin</namePart>
<namePart type="family">Xiao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eduard</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Hauptmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Twitter user profile inference utilizes information from Twitter to predict user attributes (e.g., occupation, location), which is controversial because of its usefulness for downstream applications and its potential to reveal users’ privacy. Therefore, it is important for researchers to determine the extent of profiling in a safe environment to facilitate proper use and make the public aware of the potential risks. Contrary to existing approaches on limited attributes, we explore open-domain Twitter user profile inference. We conduct a case study where we collect publicly available WikiData public figure profiles and use diverse WikiData predicates for profile inference. After removing sensitive attributes, our data contains over 150K public figure profiles from WikiData, over 50 different attribute predicates, and over 700K attribute values. We further propose a prompt-based generation method, which can infer values that are implicitly mentioned in the Twitter information. Experimental results show that the generation-based approach can infer more comprehensive user profiles than baseline extraction-based methods, but limitations still remain to be applied for real-world use. We also enclose a detailed ethical statement for our data, potential benefits and risks from this work, and our efforts to mitigate the risks.</abstract>
<identifier type="citekey">wen-etal-2023-towards</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.198</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.198</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>3172</start>
<end>3188</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Towards Open-Domain Twitter User Profile Inference
%A Wen, Haoyang
%A Xiao, Zhenxin
%A Hovy, Eduard
%A Hauptmann, Alexander
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F wen-etal-2023-towards
%X Twitter user profile inference utilizes information from Twitter to predict user attributes (e.g., occupation, location), which is controversial because of its usefulness for downstream applications and its potential to reveal users’ privacy. Therefore, it is important for researchers to determine the extent of profiling in a safe environment to facilitate proper use and make the public aware of the potential risks. Contrary to existing approaches on limited attributes, we explore open-domain Twitter user profile inference. We conduct a case study where we collect publicly available WikiData public figure profiles and use diverse WikiData predicates for profile inference. After removing sensitive attributes, our data contains over 150K public figure profiles from WikiData, over 50 different attribute predicates, and over 700K attribute values. We further propose a prompt-based generation method, which can infer values that are implicitly mentioned in the Twitter information. Experimental results show that the generation-based approach can infer more comprehensive user profiles than baseline extraction-based methods, but limitations still remain to be applied for real-world use. We also enclose a detailed ethical statement for our data, potential benefits and risks from this work, and our efforts to mitigate the risks.
%R 10.18653/v1/2023.findings-acl.198
%U https://aclanthology.org/2023.findings-acl.198
%U https://doi.org/10.18653/v1/2023.findings-acl.198
%P 3172-3188
Markdown (Informal)
[Towards Open-Domain Twitter User Profile Inference](https://aclanthology.org/2023.findings-acl.198) (Wen et al., Findings 2023)
ACL
- Haoyang Wen, Zhenxin Xiao, Eduard Hovy, and Alexander Hauptmann. 2023. Towards Open-Domain Twitter User Profile Inference. In Findings of the Association for Computational Linguistics: ACL 2023, pages 3172–3188, Toronto, Canada. Association for Computational Linguistics.