@inproceedings{elder-etal-2022-building,
title = "Building a Biomedical Full-Text Part-of-Speech Corpus Semi-Automatically",
author = "Elder, Nicholas and
Mercer, Robert E. and
Singha Roy, Sudipta",
editor = "Pradhan, Sameer and
Kuebler, Sandra",
booktitle = "Proceedings of the 16th Linguistic Annotation Workshop (LAW-XVI) within LREC2022",
month = jun,
year = "2022",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2022.law-1.16",
pages = "129--138",
abstract = "This paper presents a method for semi-automatically building a corpus of full-text English-language biomedical articles annotated with part-of-speech tags. The outcomes are a semi-automatic procedure to create a large silver standard corpus of 5 million sentences drawn from a large corpus of full-text biomedical articles annotated for part-of-speech, and a robust, easy-to-use software tool that assists the investigation of differences in two tagged datasets. The method to build the corpus uses two part-of-speech taggers designed to tag biomedical abstracts followed by a human dispute settlement when the two taggers differ on the tagging of a token. The dispute resolution aspect is facilitated by the software tool which organizes and presents the disputed tags. The corpus and all of the software that has been implemented for this study are made publicly available.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="elder-etal-2022-building">
<titleInfo>
<title>Building a Biomedical Full-Text Part-of-Speech Corpus Semi-Automatically</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Elder</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="given">E</namePart>
<namePart type="family">Mercer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudipta</namePart>
<namePart type="family">Singha Roy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 16th Linguistic Annotation Workshop (LAW-XVI) within LREC2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sameer</namePart>
<namePart type="family">Pradhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="family">Kuebler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents a method for semi-automatically building a corpus of full-text English-language biomedical articles annotated with part-of-speech tags. The outcomes are a semi-automatic procedure to create a large silver standard corpus of 5 million sentences drawn from a large corpus of full-text biomedical articles annotated for part-of-speech, and a robust, easy-to-use software tool that assists the investigation of differences in two tagged datasets. The method to build the corpus uses two part-of-speech taggers designed to tag biomedical abstracts followed by a human dispute settlement when the two taggers differ on the tagging of a token. The dispute resolution aspect is facilitated by the software tool which organizes and presents the disputed tags. The corpus and all of the software that has been implemented for this study are made publicly available.</abstract>
<identifier type="citekey">elder-etal-2022-building</identifier>
<location>
<url>https://aclanthology.org/2022.law-1.16</url>
</location>
<part>
<date>2022-06</date>
<extent unit="page">
<start>129</start>
<end>138</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Building a Biomedical Full-Text Part-of-Speech Corpus Semi-Automatically
%A Elder, Nicholas
%A Mercer, Robert E.
%A Singha Roy, Sudipta
%Y Pradhan, Sameer
%Y Kuebler, Sandra
%S Proceedings of the 16th Linguistic Annotation Workshop (LAW-XVI) within LREC2022
%D 2022
%8 June
%I European Language Resources Association
%C Marseille, France
%F elder-etal-2022-building
%X This paper presents a method for semi-automatically building a corpus of full-text English-language biomedical articles annotated with part-of-speech tags. The outcomes are a semi-automatic procedure to create a large silver standard corpus of 5 million sentences drawn from a large corpus of full-text biomedical articles annotated for part-of-speech, and a robust, easy-to-use software tool that assists the investigation of differences in two tagged datasets. The method to build the corpus uses two part-of-speech taggers designed to tag biomedical abstracts followed by a human dispute settlement when the two taggers differ on the tagging of a token. The dispute resolution aspect is facilitated by the software tool which organizes and presents the disputed tags. The corpus and all of the software that has been implemented for this study are made publicly available.
%U https://aclanthology.org/2022.law-1.16
%P 129-138
Markdown (Informal)
[Building a Biomedical Full-Text Part-of-Speech Corpus Semi-Automatically](https://aclanthology.org/2022.law-1.16) (Elder et al., LAW 2022)
ACL