@inproceedings{mieskes-2017-quantitative,
title = "A Quantitative Study of Data in the {NLP} community",
author = "Mieskes, Margot",
editor = "Hovy, Dirk and
Spruit, Shannon and
Mitchell, Margaret and
Bender, Emily M. and
Strube, Michael and
Wallach, Hanna",
booktitle = "Proceedings of the First {ACL} Workshop on Ethics in Natural Language Processing",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-1603",
doi = "10.18653/v1/W17-1603",
pages = "23--29",
abstract = "We present results on a quantitative analysis of publications in the NLP domain on collecting, publishing and availability of research data. We find that a wide range of publications rely on data crawled from the web, but few give details on how potentially sensitive data was treated. Additionally, we find that while links to repositories of data are given, they often do not work even a short time after publication. We put together several suggestions on how to improve this situation based on publications from the NLP domain, but also other research areas.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="mieskes-2017-quantitative">
<titleInfo>
<title>A Quantitative Study of Data in the NLP community</title>
</titleInfo>
<name type="personal">
<namePart type="given">Margot</namePart>
<namePart type="family">Mieskes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the First ACL Workshop on Ethics in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Shannon</namePart>
<namePart type="family">Spruit</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Margaret</namePart>
<namePart type="family">Mitchell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Emily</namePart>
<namePart type="given">M</namePart>
<namePart type="family">Bender</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Strube</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hanna</namePart>
<namePart type="family">Wallach</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present results on a quantitative analysis of publications in the NLP domain on collecting, publishing and availability of research data. We find that a wide range of publications rely on data crawled from the web, but few give details on how potentially sensitive data was treated. Additionally, we find that while links to repositories of data are given, they often do not work even a short time after publication. We put together several suggestions on how to improve this situation based on publications from the NLP domain, but also other research areas.</abstract>
<identifier type="citekey">mieskes-2017-quantitative</identifier>
<identifier type="doi">10.18653/v1/W17-1603</identifier>
<location>
<url>https://aclanthology.org/W17-1603</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>23</start>
<end>29</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Quantitative Study of Data in the NLP community
%A Mieskes, Margot
%Y Hovy, Dirk
%Y Spruit, Shannon
%Y Mitchell, Margaret
%Y Bender, Emily M.
%Y Strube, Michael
%Y Wallach, Hanna
%S Proceedings of the First ACL Workshop on Ethics in Natural Language Processing
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F mieskes-2017-quantitative
%X We present results on a quantitative analysis of publications in the NLP domain on collecting, publishing and availability of research data. We find that a wide range of publications rely on data crawled from the web, but few give details on how potentially sensitive data was treated. Additionally, we find that while links to repositories of data are given, they often do not work even a short time after publication. We put together several suggestions on how to improve this situation based on publications from the NLP domain, but also other research areas.
%R 10.18653/v1/W17-1603
%U https://aclanthology.org/W17-1603
%U https://doi.org/10.18653/v1/W17-1603
%P 23-29
Markdown (Informal)
[A Quantitative Study of Data in the NLP community](https://aclanthology.org/W17-1603) (Mieskes, EthNLP 2017)
ACL