@inproceedings{denk-peleteiro-ramallo-2020-contextual,
title = "Contextual {BERT}: Conditioning the Language Model Using a Global State",
author = "Denk, Timo I. and
Peleteiro Ramallo, Ana",
editor = "Ustalov, Dmitry and
Somasundaran, Swapna and
Panchenko, Alexander and
Malliaros, Fragkiskos D. and
Hulpu{\textcommabelow{s}}, Ioana and
Jansen, Peter and
Jana, Abhik",
booktitle = "Proceedings of the Graph-based Methods for Natural Language Processing (TextGraphs)",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.textgraphs-1.5",
doi = "10.18653/v1/2020.textgraphs-1.5",
pages = "46--50",
abstract = "BERT is a popular language model whose main pre-training task is to fill in the blank, i.e., predicting a word that was masked out of a sentence, based on the remaining words. In some applications, however, having an additional context can help the model make the right prediction, e.g., by taking the domain or the time of writing into account. This motivates us to advance the BERT architecture by adding a global state for conditioning on a fixed-sized context. We present our two novel approaches and apply them to an industry use-case, where we complete fashion outfits with missing articles, conditioned on a specific customer. An experimental comparison to other methods from the literature shows that our methods improve personalization significantly.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="denk-peleteiro-ramallo-2020-contextual">
<titleInfo>
<title>Contextual BERT: Conditioning the Language Model Using a Global State</title>
</titleInfo>
<name type="personal">
<namePart type="given">Timo</namePart>
<namePart type="given">I</namePart>
<namePart type="family">Denk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ana</namePart>
<namePart type="family">Peleteiro Ramallo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Graph-based Methods for Natural Language Processing (TextGraphs)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dmitry</namePart>
<namePart type="family">Ustalov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Swapna</namePart>
<namePart type="family">Somasundaran</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alexander</namePart>
<namePart type="family">Panchenko</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fragkiskos</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Malliaros</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ioana</namePart>
<namePart type="family">Hulpu\textcommabelows</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Jansen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Abhik</namePart>
<namePart type="family">Jana</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>BERT is a popular language model whose main pre-training task is to fill in the blank, i.e., predicting a word that was masked out of a sentence, based on the remaining words. In some applications, however, having an additional context can help the model make the right prediction, e.g., by taking the domain or the time of writing into account. This motivates us to advance the BERT architecture by adding a global state for conditioning on a fixed-sized context. We present our two novel approaches and apply them to an industry use-case, where we complete fashion outfits with missing articles, conditioned on a specific customer. An experimental comparison to other methods from the literature shows that our methods improve personalization significantly.</abstract>
<identifier type="citekey">denk-peleteiro-ramallo-2020-contextual</identifier>
<identifier type="doi">10.18653/v1/2020.textgraphs-1.5</identifier>
<location>
<url>https://aclanthology.org/2020.textgraphs-1.5</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>46</start>
<end>50</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Contextual BERT: Conditioning the Language Model Using a Global State
%A Denk, Timo I.
%A Peleteiro Ramallo, Ana
%Y Ustalov, Dmitry
%Y Somasundaran, Swapna
%Y Panchenko, Alexander
%Y Malliaros, Fragkiskos D.
%Y Hulpu\textcommabelows, Ioana
%Y Jansen, Peter
%Y Jana, Abhik
%S Proceedings of the Graph-based Methods for Natural Language Processing (TextGraphs)
%D 2020
%8 December
%I Association for Computational Linguistics
%C Barcelona, Spain (Online)
%F denk-peleteiro-ramallo-2020-contextual
%X BERT is a popular language model whose main pre-training task is to fill in the blank, i.e., predicting a word that was masked out of a sentence, based on the remaining words. In some applications, however, having an additional context can help the model make the right prediction, e.g., by taking the domain or the time of writing into account. This motivates us to advance the BERT architecture by adding a global state for conditioning on a fixed-sized context. We present our two novel approaches and apply them to an industry use-case, where we complete fashion outfits with missing articles, conditioned on a specific customer. An experimental comparison to other methods from the literature shows that our methods improve personalization significantly.
%R 10.18653/v1/2020.textgraphs-1.5
%U https://aclanthology.org/2020.textgraphs-1.5
%U https://doi.org/10.18653/v1/2020.textgraphs-1.5
%P 46-50
Markdown (Informal)
[Contextual BERT: Conditioning the Language Model Using a Global State](https://aclanthology.org/2020.textgraphs-1.5) (Denk & Peleteiro Ramallo, TextGraphs 2020)
ACL