@inproceedings{merlo-etal-2023-blackbird,
title = "Blackbird Language Matrices Tasks for Generalization",
author = "Merlo, Paola and
Jiang, Chunyang and
Samo, Giuseppe and
Nastase, Vivi",
editor = "Hupkes, Dieuwke and
Dankers, Verna and
Batsuren, Khuyagbaatar and
Sinha, Koustuv and
Kazemnejad, Amirhossein and
Christodoulopoulos, Christos and
Cotterell, Ryan and
Bruni, Elia",
booktitle = "Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.genbench-1.13",
doi = "10.18653/v1/2023.genbench-1.13",
pages = "163--172",
abstract = "To develop a system with near-human language capabilities, we need to understand current systems{'} generalisation and compositional abilities. We approach this by generating compositional, structured data, inspired from visual intelligence tests, that depend on the problem-solvers being able to disentangle objects and their absolute and relative properties in a sequence of images. We design an analogous task and develop the corresponding datasets that capture specific linguistic phenomena and their properties. Solving each problem instance depends on detecting the relevant linguistic objects and generative rules of the problem. We propose two datasets modelling two linguistic phenomena {--} subject-verb agreement in French, and verb alternations in English. The datasets can be used to investigate how LLMs encode linguistic objects, such as phrases, their grammatical and semantic properties, such as number or semantic role, and how such information is combined to correctly solve each problem. Specifically generated error types help investigate the behaviour of the system, which important information it is able to detect, and which structures mislead it.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="merlo-etal-2023-blackbird">
<titleInfo>
<title>Blackbird Language Matrices Tasks for Generalization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Paola</namePart>
<namePart type="family">Merlo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chunyang</namePart>
<namePart type="family">Jiang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Giuseppe</namePart>
<namePart type="family">Samo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vivi</namePart>
<namePart type="family">Nastase</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Verna</namePart>
<namePart type="family">Dankers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khuyagbaatar</namePart>
<namePart type="family">Batsuren</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Koustuv</namePart>
<namePart type="family">Sinha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amirhossein</namePart>
<namePart type="family">Kazemnejad</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christos</namePart>
<namePart type="family">Christodoulopoulos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ryan</namePart>
<namePart type="family">Cotterell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Elia</namePart>
<namePart type="family">Bruni</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>To develop a system with near-human language capabilities, we need to understand current systems’ generalisation and compositional abilities. We approach this by generating compositional, structured data, inspired from visual intelligence tests, that depend on the problem-solvers being able to disentangle objects and their absolute and relative properties in a sequence of images. We design an analogous task and develop the corresponding datasets that capture specific linguistic phenomena and their properties. Solving each problem instance depends on detecting the relevant linguistic objects and generative rules of the problem. We propose two datasets modelling two linguistic phenomena – subject-verb agreement in French, and verb alternations in English. The datasets can be used to investigate how LLMs encode linguistic objects, such as phrases, their grammatical and semantic properties, such as number or semantic role, and how such information is combined to correctly solve each problem. Specifically generated error types help investigate the behaviour of the system, which important information it is able to detect, and which structures mislead it.</abstract>
<identifier type="citekey">merlo-etal-2023-blackbird</identifier>
<identifier type="doi">10.18653/v1/2023.genbench-1.13</identifier>
<location>
<url>https://aclanthology.org/2023.genbench-1.13</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>163</start>
<end>172</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Blackbird Language Matrices Tasks for Generalization
%A Merlo, Paola
%A Jiang, Chunyang
%A Samo, Giuseppe
%A Nastase, Vivi
%Y Hupkes, Dieuwke
%Y Dankers, Verna
%Y Batsuren, Khuyagbaatar
%Y Sinha, Koustuv
%Y Kazemnejad, Amirhossein
%Y Christodoulopoulos, Christos
%Y Cotterell, Ryan
%Y Bruni, Elia
%S Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F merlo-etal-2023-blackbird
%X To develop a system with near-human language capabilities, we need to understand current systems’ generalisation and compositional abilities. We approach this by generating compositional, structured data, inspired from visual intelligence tests, that depend on the problem-solvers being able to disentangle objects and their absolute and relative properties in a sequence of images. We design an analogous task and develop the corresponding datasets that capture specific linguistic phenomena and their properties. Solving each problem instance depends on detecting the relevant linguistic objects and generative rules of the problem. We propose two datasets modelling two linguistic phenomena – subject-verb agreement in French, and verb alternations in English. The datasets can be used to investigate how LLMs encode linguistic objects, such as phrases, their grammatical and semantic properties, such as number or semantic role, and how such information is combined to correctly solve each problem. Specifically generated error types help investigate the behaviour of the system, which important information it is able to detect, and which structures mislead it.
%R 10.18653/v1/2023.genbench-1.13
%U https://aclanthology.org/2023.genbench-1.13
%U https://doi.org/10.18653/v1/2023.genbench-1.13
%P 163-172
Markdown (Informal)
[Blackbird Language Matrices Tasks for Generalization](https://aclanthology.org/2023.genbench-1.13) (Merlo et al., GenBench-WS 2023)
ACL
- Paola Merlo, Chunyang Jiang, Giuseppe Samo, and Vivi Nastase. 2023. Blackbird Language Matrices Tasks for Generalization. In Proceedings of the 1st GenBench Workshop on (Benchmarking) Generalisation in NLP, pages 163–172, Singapore. Association for Computational Linguistics.