@inproceedings{brunila-etal-2021-bridging,
title = "Bridging the gap between supervised classification and unsupervised topic modelling for social-media assisted crisis management",
author = "Brunila, Mikael and
Zhao, Rosie and
Mircea, Andrei and
Lumley, Sam and
Sieber, Renee",
editor = "Ben-David, Eyal and
Cohen, Shay and
McDonald, Ryan and
Plank, Barbara and
Reichart, Roi and
Rotman, Guy and
Ziser, Yftah",
booktitle = "Proceedings of the Second Workshop on Domain Adaptation for NLP",
month = apr,
year = "2021",
address = "Kyiv, Ukraine",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.adaptnlp-1.5",
pages = "33--49",
abstract = "Social media such as Twitter provide valuable information to crisis managers and affected people during natural disasters. Machine learning can help structure and extract information from the large volume of messages shared during a crisis; however, the constantly evolving nature of crises makes effective domain adaptation essential. Supervised classification is limited by unchangeable class labels that may not be relevant to new events, and unsupervised topic modelling by insufficient prior knowledge. In this paper, we bridge the gap between the two and show that BERT embeddings finetuned on crisis-related tweet classification can effectively be used to adapt to a new crisis, discovering novel topics while preserving relevant classes from supervised training, and leveraging bidirectional self-attention to extract topic keywords. We create a dataset of tweets from a snowstorm to evaluate our method{'}s transferability to new crises, and find that it outperforms traditional topic models in both automatic, and human evaluations grounded in the needs of crisis managers. More broadly, our method can be used for textual domain adaptation where the latent classes are unknown but overlap with known classes from other domains.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="brunila-etal-2021-bridging">
<titleInfo>
<title>Bridging the gap between supervised classification and unsupervised topic modelling for social-media assisted crisis management</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mikael</namePart>
<namePart type="family">Brunila</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rosie</namePart>
<namePart type="family">Zhao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andrei</namePart>
<namePart type="family">Mircea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sam</namePart>
<namePart type="family">Lumley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Renee</namePart>
<namePart type="family">Sieber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2021-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Second Workshop on Domain Adaptation for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eyal</namePart>
<namePart type="family">Ben-David</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shay</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">McDonald</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Barbara</namePart>
<namePart type="family">Plank</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roi</namePart>
<namePart type="family">Reichart</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Guy</namePart>
<namePart type="family">Rotman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yftah</namePart>
<namePart type="family">Ziser</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Kyiv, Ukraine</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Social media such as Twitter provide valuable information to crisis managers and affected people during natural disasters. Machine learning can help structure and extract information from the large volume of messages shared during a crisis; however, the constantly evolving nature of crises makes effective domain adaptation essential. Supervised classification is limited by unchangeable class labels that may not be relevant to new events, and unsupervised topic modelling by insufficient prior knowledge. In this paper, we bridge the gap between the two and show that BERT embeddings finetuned on crisis-related tweet classification can effectively be used to adapt to a new crisis, discovering novel topics while preserving relevant classes from supervised training, and leveraging bidirectional self-attention to extract topic keywords. We create a dataset of tweets from a snowstorm to evaluate our method’s transferability to new crises, and find that it outperforms traditional topic models in both automatic, and human evaluations grounded in the needs of crisis managers. More broadly, our method can be used for textual domain adaptation where the latent classes are unknown but overlap with known classes from other domains.</abstract>
<identifier type="citekey">brunila-etal-2021-bridging</identifier>
<location>
<url>https://aclanthology.org/2021.adaptnlp-1.5</url>
</location>
<part>
<date>2021-04</date>
<extent unit="page">
<start>33</start>
<end>49</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Bridging the gap between supervised classification and unsupervised topic modelling for social-media assisted crisis management
%A Brunila, Mikael
%A Zhao, Rosie
%A Mircea, Andrei
%A Lumley, Sam
%A Sieber, Renee
%Y Ben-David, Eyal
%Y Cohen, Shay
%Y McDonald, Ryan
%Y Plank, Barbara
%Y Reichart, Roi
%Y Rotman, Guy
%Y Ziser, Yftah
%S Proceedings of the Second Workshop on Domain Adaptation for NLP
%D 2021
%8 April
%I Association for Computational Linguistics
%C Kyiv, Ukraine
%F brunila-etal-2021-bridging
%X Social media such as Twitter provide valuable information to crisis managers and affected people during natural disasters. Machine learning can help structure and extract information from the large volume of messages shared during a crisis; however, the constantly evolving nature of crises makes effective domain adaptation essential. Supervised classification is limited by unchangeable class labels that may not be relevant to new events, and unsupervised topic modelling by insufficient prior knowledge. In this paper, we bridge the gap between the two and show that BERT embeddings finetuned on crisis-related tweet classification can effectively be used to adapt to a new crisis, discovering novel topics while preserving relevant classes from supervised training, and leveraging bidirectional self-attention to extract topic keywords. We create a dataset of tweets from a snowstorm to evaluate our method’s transferability to new crises, and find that it outperforms traditional topic models in both automatic, and human evaluations grounded in the needs of crisis managers. More broadly, our method can be used for textual domain adaptation where the latent classes are unknown but overlap with known classes from other domains.
%U https://aclanthology.org/2021.adaptnlp-1.5
%P 33-49
Markdown (Informal)
[Bridging the gap between supervised classification and unsupervised topic modelling for social-media assisted crisis management](https://aclanthology.org/2021.adaptnlp-1.5) (Brunila et al., AdaptNLP 2021)
ACL