@inproceedings{ganesan-etal-2017-protein,
title = "Protein Word Detection using Text Segmentation Techniques",
author = "Ganesan, Devi and
Tendulkar, Ashish V. and
Chakraborti, Sutanu",
editor = "Cohen, Kevin Bretonnel and
Demner-Fushman, Dina and
Ananiadou, Sophia and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2017",
month = aug,
year = "2017",
address = "Vancouver, Canada,",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-2330",
doi = "10.18653/v1/W17-2330",
pages = "238--246",
abstract = "Literature in Molecular Biology is abundant with linguistic metaphors. There have been works in the past that attempt to draw parallels between linguistics and biology, driven by the fundamental premise that proteins have a language of their own. Since word detection is crucial to the decipherment of any unknown language, we attempt to establish a problem mapping from natural language text to protein sequences at the level of words. Towards this end, we explore the use of an unsupervised text segmentation algorithm to the task of extracting {``}biological words{''} from protein sequences. In particular, we demonstrate the effectiveness of using domain knowledge to complement data driven approaches in the text segmentation task, as well as in its biological counterpart. We also propose a novel extrinsic evaluation measure for protein words through protein family classification.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ganesan-etal-2017-protein">
<titleInfo>
<title>Protein Word Detection using Text Segmentation Techniques</title>
</titleInfo>
<name type="personal">
<namePart type="given">Devi</namePart>
<namePart type="family">Ganesan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ashish</namePart>
<namePart type="given">V</namePart>
<namePart type="family">Tendulkar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sutanu</namePart>
<namePart type="family">Chakraborti</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>BioNLP 2017</title>
</titleInfo>
<name type="personal">
<namePart type="given">Kevin</namePart>
<namePart type="given">Bretonnel</namePart>
<namePart type="family">Cohen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dina</namePart>
<namePart type="family">Demner-Fushman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sophia</namePart>
<namePart type="family">Ananiadou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Junichi</namePart>
<namePart type="family">Tsujii</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Vancouver, Canada,</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Literature in Molecular Biology is abundant with linguistic metaphors. There have been works in the past that attempt to draw parallels between linguistics and biology, driven by the fundamental premise that proteins have a language of their own. Since word detection is crucial to the decipherment of any unknown language, we attempt to establish a problem mapping from natural language text to protein sequences at the level of words. Towards this end, we explore the use of an unsupervised text segmentation algorithm to the task of extracting “biological words” from protein sequences. In particular, we demonstrate the effectiveness of using domain knowledge to complement data driven approaches in the text segmentation task, as well as in its biological counterpart. We also propose a novel extrinsic evaluation measure for protein words through protein family classification.</abstract>
<identifier type="citekey">ganesan-etal-2017-protein</identifier>
<identifier type="doi">10.18653/v1/W17-2330</identifier>
<location>
<url>https://aclanthology.org/W17-2330</url>
</location>
<part>
<date>2017-08</date>
<extent unit="page">
<start>238</start>
<end>246</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Protein Word Detection using Text Segmentation Techniques
%A Ganesan, Devi
%A Tendulkar, Ashish V.
%A Chakraborti, Sutanu
%Y Cohen, Kevin Bretonnel
%Y Demner-Fushman, Dina
%Y Ananiadou, Sophia
%Y Tsujii, Junichi
%S BioNLP 2017
%D 2017
%8 August
%I Association for Computational Linguistics
%C Vancouver, Canada,
%F ganesan-etal-2017-protein
%X Literature in Molecular Biology is abundant with linguistic metaphors. There have been works in the past that attempt to draw parallels between linguistics and biology, driven by the fundamental premise that proteins have a language of their own. Since word detection is crucial to the decipherment of any unknown language, we attempt to establish a problem mapping from natural language text to protein sequences at the level of words. Towards this end, we explore the use of an unsupervised text segmentation algorithm to the task of extracting “biological words” from protein sequences. In particular, we demonstrate the effectiveness of using domain knowledge to complement data driven approaches in the text segmentation task, as well as in its biological counterpart. We also propose a novel extrinsic evaluation measure for protein words through protein family classification.
%R 10.18653/v1/W17-2330
%U https://aclanthology.org/W17-2330
%U https://doi.org/10.18653/v1/W17-2330
%P 238-246
Markdown (Informal)
[Protein Word Detection using Text Segmentation Techniques](https://aclanthology.org/W17-2330) (Ganesan et al., BioNLP 2017)
ACL