@inproceedings{siu-etal-2024-atlas,
title = "{ATLAS}: A System for {PDF}-centric Human Interaction Data Collection",
author = "Siu, Alexa and
Wang, Zichao and
Hoeflich, Joshua and
Kapasi, Naman and
Nenkova, Ani and
Sun, Tong",
editor = "Chang, Kai-Wei and
Lee, Annie and
Rajani, Nazneen",
booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: System Demonstrations)",
month = jun,
year = "2024",
address = "Mexico City, Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2024.naacl-demo.9",
doi = "10.18653/v1/2024.naacl-demo.9",
pages = "87--96",
abstract = "The Portable Document Format (PDF) is a popular format for distributing digital documents. Datasets on PDF reading behaviors and interactions remain limited due to the challenges of instrumenting PDF readers for these data collection tasks. We present ATLAS, a data collection tool designed to better support researchers in collecting rich PDF-centric datasets from users. ATLAS supports researchers in programmatically creating a user interface for data collection that is ready to share with annotators. It includes a toolkit and an extensible schema to easily customize the data collection tasks for a variety of purposes, allowing collection of PDF annotations (e.g., highlights, drawings) as well as reading behavior analytics (e.g., page scroll, text selections). We open-source ATLAS1 to support future research efforts and review use cases of ATLAS that showcase our system{'}s broad applicability.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="siu-etal-2024-atlas">
<titleInfo>
<title>ATLAS: A System for PDF-centric Human Interaction Data Collection</title>
</titleInfo>
<name type="personal">
<namePart type="given">Alexa</namePart>
<namePart type="family">Siu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zichao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joshua</namePart>
<namePart type="family">Hoeflich</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naman</namePart>
<namePart type="family">Kapasi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ani</namePart>
<namePart type="family">Nenkova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tong</namePart>
<namePart type="family">Sun</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2024-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: System Demonstrations)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kai-Wei</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Annie</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nazneen</namePart>
<namePart type="family">Rajani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Mexico City, Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The Portable Document Format (PDF) is a popular format for distributing digital documents. Datasets on PDF reading behaviors and interactions remain limited due to the challenges of instrumenting PDF readers for these data collection tasks. We present ATLAS, a data collection tool designed to better support researchers in collecting rich PDF-centric datasets from users. ATLAS supports researchers in programmatically creating a user interface for data collection that is ready to share with annotators. It includes a toolkit and an extensible schema to easily customize the data collection tasks for a variety of purposes, allowing collection of PDF annotations (e.g., highlights, drawings) as well as reading behavior analytics (e.g., page scroll, text selections). We open-source ATLAS1 to support future research efforts and review use cases of ATLAS that showcase our system’s broad applicability.</abstract>
<identifier type="citekey">siu-etal-2024-atlas</identifier>
<identifier type="doi">10.18653/v1/2024.naacl-demo.9</identifier>
<location>
<url>https://aclanthology.org/2024.naacl-demo.9</url>
</location>
<part>
<date>2024-06</date>
<extent unit="page">
<start>87</start>
<end>96</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T ATLAS: A System for PDF-centric Human Interaction Data Collection
%A Siu, Alexa
%A Wang, Zichao
%A Hoeflich, Joshua
%A Kapasi, Naman
%A Nenkova, Ani
%A Sun, Tong
%Y Chang, Kai-Wei
%Y Lee, Annie
%Y Rajani, Nazneen
%S Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: System Demonstrations)
%D 2024
%8 June
%I Association for Computational Linguistics
%C Mexico City, Mexico
%F siu-etal-2024-atlas
%X The Portable Document Format (PDF) is a popular format for distributing digital documents. Datasets on PDF reading behaviors and interactions remain limited due to the challenges of instrumenting PDF readers for these data collection tasks. We present ATLAS, a data collection tool designed to better support researchers in collecting rich PDF-centric datasets from users. ATLAS supports researchers in programmatically creating a user interface for data collection that is ready to share with annotators. It includes a toolkit and an extensible schema to easily customize the data collection tasks for a variety of purposes, allowing collection of PDF annotations (e.g., highlights, drawings) as well as reading behavior analytics (e.g., page scroll, text selections). We open-source ATLAS1 to support future research efforts and review use cases of ATLAS that showcase our system’s broad applicability.
%R 10.18653/v1/2024.naacl-demo.9
%U https://aclanthology.org/2024.naacl-demo.9
%U https://doi.org/10.18653/v1/2024.naacl-demo.9
%P 87-96
Markdown (Informal)
[ATLAS: A System for PDF-centric Human Interaction Data Collection](https://aclanthology.org/2024.naacl-demo.9) (Siu et al., NAACL 2024)
ACL
- Alexa Siu, Zichao Wang, Joshua Hoeflich, Naman Kapasi, Ani Nenkova, and Tong Sun. 2024. ATLAS: A System for PDF-centric Human Interaction Data Collection. In Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Volume 3: System Demonstrations), pages 87–96, Mexico City, Mexico. Association for Computational Linguistics.