@inproceedings{abekawa-aizawa-2016-sidenoter,
title = "{S}ide{N}oter: Scholarly Paper Browsing System based on {PDF} Restructuring and Text Annotation",
author = "Abekawa, Takeshi and
Aizawa, Akiko",
editor = "Watanabe, Hideo",
booktitle = "Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: System Demonstrations",
month = dec,
year = "2016",
address = "Osaka, Japan",
publisher = "The COLING 2016 Organizing Committee",
url = "https://aclanthology.org/C16-2029",
pages = "136--140",
abstract = "In this paper, we discuss our ongoing efforts to construct a scientific paper browsing system that helps users to read and understand advanced technical content distributed in PDF. Since PDF is a format specifically designed for printing, layout and logical structures of documents are indistinguishably embedded in the file. It requires much effort to extract natural language text from PDF files, and reversely, display semantic annotations produced by NLP tools on the original page layout. In our browsing system, we tackle these issues caused by the gap between printable document and plain text. Our system provides ways to extract natural language sentences from PDF files together with their logical structures, and also to map arbitrary textual spans to their corresponding regions on page images. We setup a demonstration system using papers published in ACL anthology and demonstrate the enhanced search and refined recommendation functions which we plan to make widely available to NLP researchers.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="abekawa-aizawa-2016-sidenoter">
<titleInfo>
<title>SideNoter: Scholarly Paper Browsing System based on PDF Restructuring and Text Annotation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Takeshi</namePart>
<namePart type="family">Abekawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Akiko</namePart>
<namePart type="family">Aizawa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Hideo</namePart>
<namePart type="family">Watanabe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>The COLING 2016 Organizing Committee</publisher>
<place>
<placeTerm type="text">Osaka, Japan</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we discuss our ongoing efforts to construct a scientific paper browsing system that helps users to read and understand advanced technical content distributed in PDF. Since PDF is a format specifically designed for printing, layout and logical structures of documents are indistinguishably embedded in the file. It requires much effort to extract natural language text from PDF files, and reversely, display semantic annotations produced by NLP tools on the original page layout. In our browsing system, we tackle these issues caused by the gap between printable document and plain text. Our system provides ways to extract natural language sentences from PDF files together with their logical structures, and also to map arbitrary textual spans to their corresponding regions on page images. We setup a demonstration system using papers published in ACL anthology and demonstrate the enhanced search and refined recommendation functions which we plan to make widely available to NLP researchers.</abstract>
<identifier type="citekey">abekawa-aizawa-2016-sidenoter</identifier>
<location>
<url>https://aclanthology.org/C16-2029</url>
</location>
<part>
<date>2016-12</date>
<extent unit="page">
<start>136</start>
<end>140</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T SideNoter: Scholarly Paper Browsing System based on PDF Restructuring and Text Annotation
%A Abekawa, Takeshi
%A Aizawa, Akiko
%Y Watanabe, Hideo
%S Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: System Demonstrations
%D 2016
%8 December
%I The COLING 2016 Organizing Committee
%C Osaka, Japan
%F abekawa-aizawa-2016-sidenoter
%X In this paper, we discuss our ongoing efforts to construct a scientific paper browsing system that helps users to read and understand advanced technical content distributed in PDF. Since PDF is a format specifically designed for printing, layout and logical structures of documents are indistinguishably embedded in the file. It requires much effort to extract natural language text from PDF files, and reversely, display semantic annotations produced by NLP tools on the original page layout. In our browsing system, we tackle these issues caused by the gap between printable document and plain text. Our system provides ways to extract natural language sentences from PDF files together with their logical structures, and also to map arbitrary textual spans to their corresponding regions on page images. We setup a demonstration system using papers published in ACL anthology and demonstrate the enhanced search and refined recommendation functions which we plan to make widely available to NLP researchers.
%U https://aclanthology.org/C16-2029
%P 136-140
Markdown (Informal)
[SideNoter: Scholarly Paper Browsing System based on PDF Restructuring and Text Annotation](https://aclanthology.org/C16-2029) (Abekawa & Aizawa, COLING 2016)
ACL