@inproceedings{wiegmann-etal-2022-language,
title = "Language Models as Context-sensitive Word Search Engines",
author = {Wiegmann, Matti and
V{\"o}lske, Michael and
Stein, Benno and
Potthast, Martin},
editor = "Huang, Ting-Hao 'Kenneth' and
Raheja, Vipul and
Kang, Dongyeop and
Chung, John Joon Young and
Gissin, Daniel and
Lee, Mina and
Gero, Katy Ilonka",
booktitle = "Proceedings of the First Workshop on Intelligent and Interactive Writing Assistants (In2Writing 2022)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.in2writing-1.5",
doi = "10.18653/v1/2022.in2writing-1.5",
pages = "39--45",
abstract = "Context-sensitive word search engines are writing assistants that support word choice, phrasing, and idiomatic language use by indexing large-scale n-gram collections and implementing a wildcard search. However, search results become unreliable with increasing context size (e.g., n{\textgreater}=5), when observations become sparse. This paper proposes two strategies for word search with larger n, based on masked and conditional language modeling. We build such search engines using BERT and BART and compare their capabilities in answering English context queries with those of the n-gram-based word search engine Netspeak. Our proposed strategies score within 5 percentage points MRR of n-gram collections while answering up to 5 times as many queries.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="wiegmann-etal-2022-language">
<titleInfo>
<title>Language Models as Context-sensitive Word Search Engines</title>
</titleInfo>
<name type="personal">
<namePart type="given">Matti</namePart>
<namePart type="family">Wiegmann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Völske</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Benno</namePart>
<namePart type="family">Stein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Martin</namePart>
<namePart type="family">Potthast</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Intelligent and Interactive Writing Assistants (In2Writing 2022)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ting-Hao</namePart>
<namePart type="given">’Kenneth’</namePart>
<namePart type="family">Huang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vipul</namePart>
<namePart type="family">Raheja</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dongyeop</namePart>
<namePart type="family">Kang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">John</namePart>
<namePart type="given">Joon</namePart>
<namePart type="given">Young</namePart>
<namePart type="family">Chung</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Gissin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mina</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Katy</namePart>
<namePart type="given">Ilonka</namePart>
<namePart type="family">Gero</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dublin, Ireland</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Context-sensitive word search engines are writing assistants that support word choice, phrasing, and idiomatic language use by indexing large-scale n-gram collections and implementing a wildcard search. However, search results become unreliable with increasing context size (e.g., n\textgreater=5), when observations become sparse. This paper proposes two strategies for word search with larger n, based on masked and conditional language modeling. We build such search engines using BERT and BART and compare their capabilities in answering English context queries with those of the n-gram-based word search engine Netspeak. Our proposed strategies score within 5 percentage points MRR of n-gram collections while answering up to 5 times as many queries.</abstract>
<identifier type="citekey">wiegmann-etal-2022-language</identifier>
<identifier type="doi">10.18653/v1/2022.in2writing-1.5</identifier>
<location>
<url>https://aclanthology.org/2022.in2writing-1.5</url>
</location>
<part>
<date>2022-05</date>
<extent unit="page">
<start>39</start>
<end>45</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Language Models as Context-sensitive Word Search Engines
%A Wiegmann, Matti
%A Völske, Michael
%A Stein, Benno
%A Potthast, Martin
%Y Huang, Ting-Hao ’Kenneth’
%Y Raheja, Vipul
%Y Kang, Dongyeop
%Y Chung, John Joon Young
%Y Gissin, Daniel
%Y Lee, Mina
%Y Gero, Katy Ilonka
%S Proceedings of the First Workshop on Intelligent and Interactive Writing Assistants (In2Writing 2022)
%D 2022
%8 May
%I Association for Computational Linguistics
%C Dublin, Ireland
%F wiegmann-etal-2022-language
%X Context-sensitive word search engines are writing assistants that support word choice, phrasing, and idiomatic language use by indexing large-scale n-gram collections and implementing a wildcard search. However, search results become unreliable with increasing context size (e.g., n\textgreater=5), when observations become sparse. This paper proposes two strategies for word search with larger n, based on masked and conditional language modeling. We build such search engines using BERT and BART and compare their capabilities in answering English context queries with those of the n-gram-based word search engine Netspeak. Our proposed strategies score within 5 percentage points MRR of n-gram collections while answering up to 5 times as many queries.
%R 10.18653/v1/2022.in2writing-1.5
%U https://aclanthology.org/2022.in2writing-1.5
%U https://doi.org/10.18653/v1/2022.in2writing-1.5
%P 39-45
Markdown (Informal)
[Language Models as Context-sensitive Word Search Engines](https://aclanthology.org/2022.in2writing-1.5) (Wiegmann et al., In2Writing 2022)
ACL
- Matti Wiegmann, Michael Völske, Benno Stein, and Martin Potthast. 2022. Language Models as Context-sensitive Word Search Engines. In Proceedings of the First Workshop on Intelligent and Interactive Writing Assistants (In2Writing 2022), pages 39–45, Dublin, Ireland. Association for Computational Linguistics.