@inproceedings{proskurina-etal-2023-bert,
title = "Can {BERT} eat {R}u{C}o{LA}? Topological Data Analysis to Explain",
author = "Proskurina, Irina and
Artemova, Ekaterina and
Piontkovskaya, Irina",
editor = "Piskorski, Jakub and
Marci{\'n}czuk, Micha{\l} and
Nakov, Preslav and
Ogrodniczuk, Maciej and
Pollak, Senja and
P{\v{r}}ib{\'a}{\v{n}}, Pavel and
Rybak, Piotr and
Steinberger, Josef and
Yangarber, Roman",
booktitle = "Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023)",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.bsnlp-1.15",
doi = "10.18653/v1/2023.bsnlp-1.15",
pages = "123--137",
abstract = "This paper investigates how Transformer language models (LMs) fine-tuned for acceptability classification capture linguistic features. Our approach is based on best practices of topological data analysis (TDA) in NLP: we construct directed attention graphs from attention matrices, derive topological features from them and feed them to linear classifiers. We introduce two novel features, chordality and the matching number, and show that TDA-based classifiers outperform fine-tuning baselines. We experiment with two datasets, CoLA and RuCoLA, in English and Russian, which are typologically different languages. On top of that, we propose several black-box introspection techniques aimed at detecting changes in the attention mode of the LM{'}s during fine-tuning, defining the LM{'}s prediction confidences, and associating individual heads with fine-grained grammar phenomena. Our results contribute to understanding the behaviour of monolingual LMs in the acceptability classification task, provide insights into the functional roles of attention heads, and highlight the advantages of TDA-based approaches for analyzing LMs.We release the code and the experimental results for further uptake.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="proskurina-etal-2023-bert">
<titleInfo>
<title>Can BERT eat RuCoLA? Topological Data Analysis to Explain</title>
</titleInfo>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Proskurina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Artemova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Irina</namePart>
<namePart type="family">Piontkovskaya</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jakub</namePart>
<namePart type="family">Piskorski</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michał</namePart>
<namePart type="family">Marcińczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Preslav</namePart>
<namePart type="family">Nakov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maciej</namePart>
<namePart type="family">Ogrodniczuk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Senja</namePart>
<namePart type="family">Pollak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pavel</namePart>
<namePart type="family">Přibáň</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Piotr</namePart>
<namePart type="family">Rybak</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josef</namePart>
<namePart type="family">Steinberger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Roman</namePart>
<namePart type="family">Yangarber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper investigates how Transformer language models (LMs) fine-tuned for acceptability classification capture linguistic features. Our approach is based on best practices of topological data analysis (TDA) in NLP: we construct directed attention graphs from attention matrices, derive topological features from them and feed them to linear classifiers. We introduce two novel features, chordality and the matching number, and show that TDA-based classifiers outperform fine-tuning baselines. We experiment with two datasets, CoLA and RuCoLA, in English and Russian, which are typologically different languages. On top of that, we propose several black-box introspection techniques aimed at detecting changes in the attention mode of the LM’s during fine-tuning, defining the LM’s prediction confidences, and associating individual heads with fine-grained grammar phenomena. Our results contribute to understanding the behaviour of monolingual LMs in the acceptability classification task, provide insights into the functional roles of attention heads, and highlight the advantages of TDA-based approaches for analyzing LMs.We release the code and the experimental results for further uptake.</abstract>
<identifier type="citekey">proskurina-etal-2023-bert</identifier>
<identifier type="doi">10.18653/v1/2023.bsnlp-1.15</identifier>
<location>
<url>https://aclanthology.org/2023.bsnlp-1.15</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>123</start>
<end>137</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Can BERT eat RuCoLA? Topological Data Analysis to Explain
%A Proskurina, Irina
%A Artemova, Ekaterina
%A Piontkovskaya, Irina
%Y Piskorski, Jakub
%Y Marcińczuk, Michał
%Y Nakov, Preslav
%Y Ogrodniczuk, Maciej
%Y Pollak, Senja
%Y Přibáň, Pavel
%Y Rybak, Piotr
%Y Steinberger, Josef
%Y Yangarber, Roman
%S Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023)
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F proskurina-etal-2023-bert
%X This paper investigates how Transformer language models (LMs) fine-tuned for acceptability classification capture linguistic features. Our approach is based on best practices of topological data analysis (TDA) in NLP: we construct directed attention graphs from attention matrices, derive topological features from them and feed them to linear classifiers. We introduce two novel features, chordality and the matching number, and show that TDA-based classifiers outperform fine-tuning baselines. We experiment with two datasets, CoLA and RuCoLA, in English and Russian, which are typologically different languages. On top of that, we propose several black-box introspection techniques aimed at detecting changes in the attention mode of the LM’s during fine-tuning, defining the LM’s prediction confidences, and associating individual heads with fine-grained grammar phenomena. Our results contribute to understanding the behaviour of monolingual LMs in the acceptability classification task, provide insights into the functional roles of attention heads, and highlight the advantages of TDA-based approaches for analyzing LMs.We release the code and the experimental results for further uptake.
%R 10.18653/v1/2023.bsnlp-1.15
%U https://aclanthology.org/2023.bsnlp-1.15
%U https://doi.org/10.18653/v1/2023.bsnlp-1.15
%P 123-137
Markdown (Informal)
[Can BERT eat RuCoLA? Topological Data Analysis to Explain](https://aclanthology.org/2023.bsnlp-1.15) (Proskurina et al., BSNLP 2023)
ACL
- Irina Proskurina, Ekaterina Artemova, and Irina Piontkovskaya. 2023. Can BERT eat RuCoLA? Topological Data Analysis to Explain. In Proceedings of the 9th Workshop on Slavic Natural Language Processing 2023 (SlavicNLP 2023), pages 123–137, Dubrovnik, Croatia. Association for Computational Linguistics.