@article{bamman-smith-2014-unsupervised,
title = "Unsupervised Discovery of Biographical Structure from Text",
author = "Bamman, David and
Smith, Noah A.",
editor = "Lin, Dekang and
Collins, Michael and
Lee, Lillian",
journal = "Transactions of the Association for Computational Linguistics",
volume = "2",
year = "2014",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/Q14-1029",
doi = "10.1162/tacl_a_00189",
pages = "363--376",
abstract = "We present a method for discovering abstract event classes in biographies, based on a probabilistic latent-variable model. Taking as input timestamped text, we exploit latent correlations among events to learn a set of event classes (such as Born, Graduates High School, and Becomes Citizen), along with the typical times in a person{'}s life when those events occur. In a quantitative evaluation at the task of predicting a person{'}s age for a given event, we find that our generative model outperforms a strong linear regression baseline, along with simpler variants of the model that ablate some features. The abstract event classes that we learn allow us to perform a large-scale analysis of 242,970 Wikipedia biographies. Though it is known that women are greatly underrepresented on Wikipedia{---}not only as editors (Wikipedia, 2011) but also as subjects of articles (Reagle and Rhue, 2011){---}we find that there is a bias in their characterization as well, with biographies of women containing significantly more emphasis on events of marriage and divorce than biographies of men.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="bamman-smith-2014-unsupervised">
<titleInfo>
<title>Unsupervised Discovery of Biographical Structure from Text</title>
</titleInfo>
<name type="personal">
<namePart type="given">David</namePart>
<namePart type="family">Bamman</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Noah</namePart>
<namePart type="given">A</namePart>
<namePart type="family">Smith</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2014</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Transactions of the Association for Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>We present a method for discovering abstract event classes in biographies, based on a probabilistic latent-variable model. Taking as input timestamped text, we exploit latent correlations among events to learn a set of event classes (such as Born, Graduates High School, and Becomes Citizen), along with the typical times in a person’s life when those events occur. In a quantitative evaluation at the task of predicting a person’s age for a given event, we find that our generative model outperforms a strong linear regression baseline, along with simpler variants of the model that ablate some features. The abstract event classes that we learn allow us to perform a large-scale analysis of 242,970 Wikipedia biographies. Though it is known that women are greatly underrepresented on Wikipedia—not only as editors (Wikipedia, 2011) but also as subjects of articles (Reagle and Rhue, 2011)—we find that there is a bias in their characterization as well, with biographies of women containing significantly more emphasis on events of marriage and divorce than biographies of men.</abstract>
<identifier type="citekey">bamman-smith-2014-unsupervised</identifier>
<identifier type="doi">10.1162/tacl_a_00189</identifier>
<location>
<url>https://aclanthology.org/Q14-1029</url>
</location>
<part>
<date>2014</date>
<detail type="volume"><number>2</number></detail>
<extent unit="page">
<start>363</start>
<end>376</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Unsupervised Discovery of Biographical Structure from Text
%A Bamman, David
%A Smith, Noah A.
%J Transactions of the Association for Computational Linguistics
%D 2014
%V 2
%I MIT Press
%C Cambridge, MA
%F bamman-smith-2014-unsupervised
%X We present a method for discovering abstract event classes in biographies, based on a probabilistic latent-variable model. Taking as input timestamped text, we exploit latent correlations among events to learn a set of event classes (such as Born, Graduates High School, and Becomes Citizen), along with the typical times in a person’s life when those events occur. In a quantitative evaluation at the task of predicting a person’s age for a given event, we find that our generative model outperforms a strong linear regression baseline, along with simpler variants of the model that ablate some features. The abstract event classes that we learn allow us to perform a large-scale analysis of 242,970 Wikipedia biographies. Though it is known that women are greatly underrepresented on Wikipedia—not only as editors (Wikipedia, 2011) but also as subjects of articles (Reagle and Rhue, 2011)—we find that there is a bias in their characterization as well, with biographies of women containing significantly more emphasis on events of marriage and divorce than biographies of men.
%R 10.1162/tacl_a_00189
%U https://aclanthology.org/Q14-1029
%U https://doi.org/10.1162/tacl_a_00189
%P 363-376
Markdown (Informal)
[Unsupervised Discovery of Biographical Structure from Text](https://aclanthology.org/Q14-1029) (Bamman & Smith, TACL 2014)
ACL