@inproceedings{sadvilkar-neumann-2020-pysbd,
title = "{P}y{SBD}: Pragmatic Sentence Boundary Disambiguation",
author = "Sadvilkar, Nipun and
Neumann, Mark",
editor = "Park, Eunjeong L. and
Hagiwara, Masato and
Milajevs, Dmitrijs and
Liu, Nelson F. and
Chauhan, Geeticka and
Tan, Liling",
booktitle = "Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)",
month = nov,
year = "2020",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2020.nlposs-1.15",
doi = "10.18653/v1/2020.nlposs-1.15",
pages = "110--114",
abstract = "We present a rule-based sentence boundary disambiguation Python package that works out-of-the-box for 22 languages. We aim to provide a realistic segmenter which can provide logical sentences even when the format and domain of the input text is unknown. In our work, we adapt the Golden Rules Set (a language specific set of sentence boundary exemplars) originally implemented as a ruby gem pragmatic segmenter which we ported to Python with additional improvements and functionality. PySBD passes 97.92{\%} of the Golden Rule Set examplars for English, an improvement of 25{\%} over the next best open source Python tool.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sadvilkar-neumann-2020-pysbd">
<titleInfo>
<title>PySBD: Pragmatic Sentence Boundary Disambiguation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nipun</namePart>
<namePart type="family">Sadvilkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Neumann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Eunjeong</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Park</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Masato</namePart>
<namePart type="family">Hagiwara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dmitrijs</namePart>
<namePart type="family">Milajevs</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nelson</namePart>
<namePart type="given">F</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Geeticka</namePart>
<namePart type="family">Chauhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Liling</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a rule-based sentence boundary disambiguation Python package that works out-of-the-box for 22 languages. We aim to provide a realistic segmenter which can provide logical sentences even when the format and domain of the input text is unknown. In our work, we adapt the Golden Rules Set (a language specific set of sentence boundary exemplars) originally implemented as a ruby gem pragmatic segmenter which we ported to Python with additional improvements and functionality. PySBD passes 97.92% of the Golden Rule Set examplars for English, an improvement of 25% over the next best open source Python tool.</abstract>
<identifier type="citekey">sadvilkar-neumann-2020-pysbd</identifier>
<identifier type="doi">10.18653/v1/2020.nlposs-1.15</identifier>
<location>
<url>https://aclanthology.org/2020.nlposs-1.15</url>
</location>
<part>
<date>2020-11</date>
<extent unit="page">
<start>110</start>
<end>114</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T PySBD: Pragmatic Sentence Boundary Disambiguation
%A Sadvilkar, Nipun
%A Neumann, Mark
%Y Park, Eunjeong L.
%Y Hagiwara, Masato
%Y Milajevs, Dmitrijs
%Y Liu, Nelson F.
%Y Chauhan, Geeticka
%Y Tan, Liling
%S Proceedings of Second Workshop for NLP Open Source Software (NLP-OSS)
%D 2020
%8 November
%I Association for Computational Linguistics
%C Online
%F sadvilkar-neumann-2020-pysbd
%X We present a rule-based sentence boundary disambiguation Python package that works out-of-the-box for 22 languages. We aim to provide a realistic segmenter which can provide logical sentences even when the format and domain of the input text is unknown. In our work, we adapt the Golden Rules Set (a language specific set of sentence boundary exemplars) originally implemented as a ruby gem pragmatic segmenter which we ported to Python with additional improvements and functionality. PySBD passes 97.92% of the Golden Rule Set examplars for English, an improvement of 25% over the next best open source Python tool.
%R 10.18653/v1/2020.nlposs-1.15
%U https://aclanthology.org/2020.nlposs-1.15
%U https://doi.org/10.18653/v1/2020.nlposs-1.15
%P 110-114
Markdown (Informal)
[PySBD: Pragmatic Sentence Boundary Disambiguation](https://aclanthology.org/2020.nlposs-1.15) (Sadvilkar & Neumann, NLPOSS 2020)
ACL