@inproceedings{louvan-magnini-2020-far,
title = "How Far Can We Go with Data Selection? A Case Study on Semantic Sequence Tagging Tasks",
author = "Louvan, Samuel and
Magnini, Bernardo",
editor = "Rogers, Anna and
Sedoc, Jo{\~a}o and
Rumshisky, Anna",
booktitle = "Proceedings of the First Workshop on Insights from Negative Results in NLP",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.insights-1.3",
doi = "10.18653/v1/2020.insights-1.3",
pages = "15--21",
abstract = "Although several works have addressed the role of data selection to improve transfer learning for various NLP tasks, there is no consensus about its real benefits and, more generally, there is a lack of shared practices on how it can be best applied. We propose a systematic approach aimed at evaluating data selection in scenarios of increasing complexity. Specifically, we compare the case in which source and target tasks are the same while source and target domains are different, against the more challenging scenario where both tasks and domains are different. We run a number of experiments on semantic sequence tagging tasks, which are relatively less investigated in data selection, and conclude that data selection has more benefit on the scenario when the tasks are the same, while in case of different (although related) tasks from distant domains, a combination of data selection and multi-task learning is ineffective for most cases.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="louvan-magnini-2020-far">
<titleInfo>
<title>How Far Can We Go with Data Selection? A Case Study on Semantic Sequence Tagging Tasks</title>
</titleInfo>
<name type="personal">
<namePart type="given">Samuel</namePart>
<namePart type="family">Louvan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bernardo</namePart>
<namePart type="family">Magnini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First Workshop on Insights from Negative Results in NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Sedoc</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Although several works have addressed the role of data selection to improve transfer learning for various NLP tasks, there is no consensus about its real benefits and, more generally, there is a lack of shared practices on how it can be best applied. We propose a systematic approach aimed at evaluating data selection in scenarios of increasing complexity. Specifically, we compare the case in which source and target tasks are the same while source and target domains are different, against the more challenging scenario where both tasks and domains are different. We run a number of experiments on semantic sequence tagging tasks, which are relatively less investigated in data selection, and conclude that data selection has more benefit on the scenario when the tasks are the same, while in case of different (although related) tasks from distant domains, a combination of data selection and multi-task learning is ineffective for most cases.</abstract>
<identifier type="citekey">louvan-magnini-2020-far</identifier>
<identifier type="doi">10.18653/v1/2020.insights-1.3</identifier>
<location>
<url>https://aclanthology.org/2020.insights-1.3</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>15</start>
<end>21</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T How Far Can We Go with Data Selection? A Case Study on Semantic Sequence Tagging Tasks
%A Louvan, Samuel
%A Magnini, Bernardo
%Y Rogers, Anna
%Y Sedoc, João
%Y Rumshisky, Anna
%S Proceedings of the First Workshop on Insights from Negative Results in NLP
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F louvan-magnini-2020-far
%X Although several works have addressed the role of data selection to improve transfer learning for various NLP tasks, there is no consensus about its real benefits and, more generally, there is a lack of shared practices on how it can be best applied. We propose a systematic approach aimed at evaluating data selection in scenarios of increasing complexity. Specifically, we compare the case in which source and target tasks are the same while source and target domains are different, against the more challenging scenario where both tasks and domains are different. We run a number of experiments on semantic sequence tagging tasks, which are relatively less investigated in data selection, and conclude that data selection has more benefit on the scenario when the tasks are the same, while in case of different (although related) tasks from distant domains, a combination of data selection and multi-task learning is ineffective for most cases.
%R 10.18653/v1/2020.insights-1.3
%U https://aclanthology.org/2020.insights-1.3
%U https://doi.org/10.18653/v1/2020.insights-1.3
%P 15-21
Markdown (Informal)
[How Far Can We Go with Data Selection? A Case Study on Semantic Sequence Tagging Tasks](https://aclanthology.org/2020.insights-1.3) (Louvan & Magnini, insights 2020)
ACL