@inproceedings{cabrera-etal-2017-grawitas,
title = "{G}ra{W}i{T}as: a Grammar-based {W}ikipedia Talk Page Parser",
author = {Cabrera, Benjamin and
Steinert, Laura and
Ross, Bj{\"o}rn},
editor = "Martins, Andr{\'e} and
Pe{\~n}as, Anselmo",
booktitle = "Proceedings of the Software Demonstrations of the 15th Conference of the {E}uropean Chapter of the Association for Computational Linguistics",
month = apr,
year = "2017",
address = "Valencia, Spain",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/E17-3006",
pages = "21--24",
abstract = "Wikipedia offers researchers unique insights into the collaboration and communication patterns of a large self-regulating community of editors. The main medium of direct communication between editors of an article is the article{'}s talk page. However, a talk page file is unstructured and therefore difficult to analyse automatically. A few parsers exist that enable its transformation into a structured data format. However, they are rarely open source, support only a limited subset of the talk page syntax {--} resulting in the loss of content {--} and usually support only one export format. Together with this article we offer a very fast, lightweight, open source parser with support for various output formats. In a preliminary evaluation it achieved a high accuracy. The parser uses a grammar-based approach {--} offering a transparent implementation and easy extensibility.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="cabrera-etal-2017-grawitas">
<titleInfo>
<title>GraWiTas: a Grammar-based Wikipedia Talk Page Parser</title>
</titleInfo>
<name type="personal">
<namePart type="given">Benjamin</namePart>
<namePart type="family">Cabrera</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Steinert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Björn</namePart>
<namePart type="family">Ross</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Software Demonstrations of the 15th Conference of the European Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">André</namePart>
<namePart type="family">Martins</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anselmo</namePart>
<namePart type="family">Peñas</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Valencia, Spain</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Wikipedia offers researchers unique insights into the collaboration and communication patterns of a large self-regulating community of editors. The main medium of direct communication between editors of an article is the article’s talk page. However, a talk page file is unstructured and therefore difficult to analyse automatically. A few parsers exist that enable its transformation into a structured data format. However, they are rarely open source, support only a limited subset of the talk page syntax – resulting in the loss of content – and usually support only one export format. Together with this article we offer a very fast, lightweight, open source parser with support for various output formats. In a preliminary evaluation it achieved a high accuracy. The parser uses a grammar-based approach – offering a transparent implementation and easy extensibility.</abstract>
<identifier type="citekey">cabrera-etal-2017-grawitas</identifier>
<location>
<url>https://aclanthology.org/E17-3006</url>
</location>
<part>
<date>2017-04</date>
<extent unit="page">
<start>21</start>
<end>24</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GraWiTas: a Grammar-based Wikipedia Talk Page Parser
%A Cabrera, Benjamin
%A Steinert, Laura
%A Ross, Björn
%Y Martins, André
%Y Peñas, Anselmo
%S Proceedings of the Software Demonstrations of the 15th Conference of the European Chapter of the Association for Computational Linguistics
%D 2017
%8 April
%I Association for Computational Linguistics
%C Valencia, Spain
%F cabrera-etal-2017-grawitas
%X Wikipedia offers researchers unique insights into the collaboration and communication patterns of a large self-regulating community of editors. The main medium of direct communication between editors of an article is the article’s talk page. However, a talk page file is unstructured and therefore difficult to analyse automatically. A few parsers exist that enable its transformation into a structured data format. However, they are rarely open source, support only a limited subset of the talk page syntax – resulting in the loss of content – and usually support only one export format. Together with this article we offer a very fast, lightweight, open source parser with support for various output formats. In a preliminary evaluation it achieved a high accuracy. The parser uses a grammar-based approach – offering a transparent implementation and easy extensibility.
%U https://aclanthology.org/E17-3006
%P 21-24
Markdown (Informal)
[GraWiTas: a Grammar-based Wikipedia Talk Page Parser](https://aclanthology.org/E17-3006) (Cabrera et al., EACL 2017)
ACL
- Benjamin Cabrera, Laura Steinert, and Björn Ross. 2017. GraWiTas: a Grammar-based Wikipedia Talk Page Parser. In Proceedings of the Software Demonstrations of the 15th Conference of the European Chapter of the Association for Computational Linguistics, pages 21–24, Valencia, Spain. Association for Computational Linguistics.