@article{patil-etal-2024-filtered,
title = "Filtered Corpus Training ({F}i{CT}) Shows that Language Models Can Generalize from Indirect Evidence",
author = "Patil, Abhinav and
Jumelet, Jaap and
Chiu, Yu Ying and
Lapastora, Andy and
Shen, Peter and
Wang, Lexie and
Willrich, Clevis and
Steinert-Threlkeld, Shane",
journal = "Transactions of the Association for Computational Linguistics",
volume = "12",
year = "2024",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2024.tacl-1.87/",
doi = "10.1162/tacl_a_00720",
pages = "1597--1615",
abstract = "This paper introduces Filtered Corpus Training, a method that trains language models (LMs) on corpora with certain linguistic constructions filtered out from the training data, and uses it to measure the ability of LMs to perform linguistic generalization on the basis of indirect evidence. We apply the method to both LSTM and Transformer LMs (of roughly comparable size), developing filtered corpora that target a wide range of linguistic phenomena. Our results show that while transformers are better qua LMs (as measured by perplexity), both models perform equally and surprisingly well on linguistic generalization measures, suggesting that they are capable of generalizing from indirect evidence."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="patil-etal-2024-filtered">
<titleInfo>
<title>Filtered Corpus Training (FiCT) Shows that Language Models Can Generalize from Indirect Evidence</title>
</titleInfo>
<name type="personal">
<namePart type="given">Abhinav</namePart>
<namePart type="family">Patil</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jaap</namePart>
<namePart type="family">Jumelet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yu</namePart>
<namePart type="given">Ying</namePart>
<namePart type="family">Chiu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Andy</namePart>
<namePart type="family">Lapastora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Peter</namePart>
<namePart type="family">Shen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Lexie</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Clevis</namePart>
<namePart type="family">Willrich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shane</namePart>
<namePart type="family">Steinert-Threlkeld</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>This paper introduces Filtered Corpus Training, a method that trains language models (LMs) on corpora with certain linguistic constructions filtered out from the training data, and uses it to measure the ability of LMs to perform linguistic generalization on the basis of indirect evidence. We apply the method to both LSTM and Transformer LMs (of roughly comparable size), developing filtered corpora that target a wide range of linguistic phenomena. Our results show that while transformers are better qua LMs (as measured by perplexity), both models perform equally and surprisingly well on linguistic generalization measures, suggesting that they are capable of generalizing from indirect evidence.</abstract>
<identifier type="citekey">patil-etal-2024-filtered</identifier>
<identifier type="doi">10.1162/tacl_a_00720</identifier>
<location>
<url>https://aclanthology.org/2024.tacl-1.87/</url>
</location>
<part>
<date>2024</date>
<detail type="volume"><number>12</number></detail>
<extent unit="page">
<start>1597</start>
<end>1615</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Filtered Corpus Training (FiCT) Shows that Language Models Can Generalize from Indirect Evidence
%A Patil, Abhinav
%A Jumelet, Jaap
%A Chiu, Yu Ying
%A Lapastora, Andy
%A Shen, Peter
%A Wang, Lexie
%A Willrich, Clevis
%A Steinert-Threlkeld, Shane
%J Transactions of the Association for Computational Linguistics
%D 2024
%V 12
%I MIT Press
%C Cambridge, MA
%F patil-etal-2024-filtered
%X This paper introduces Filtered Corpus Training, a method that trains language models (LMs) on corpora with certain linguistic constructions filtered out from the training data, and uses it to measure the ability of LMs to perform linguistic generalization on the basis of indirect evidence. We apply the method to both LSTM and Transformer LMs (of roughly comparable size), developing filtered corpora that target a wide range of linguistic phenomena. Our results show that while transformers are better qua LMs (as measured by perplexity), both models perform equally and surprisingly well on linguistic generalization measures, suggesting that they are capable of generalizing from indirect evidence.
%R 10.1162/tacl_a_00720
%U https://aclanthology.org/2024.tacl-1.87/
%U https://doi.org/10.1162/tacl_a_00720
%P 1597-1615
Markdown (Informal)
[Filtered Corpus Training (FiCT) Shows that Language Models Can Generalize from Indirect Evidence](https://aclanthology.org/2024.tacl-1.87/) (Patil et al., TACL 2024)
ACL