@inproceedings{L16-1049,
 abstract = {This project assesses the resources necessary to make oral history searchable by means of automatic speech recognition (ASR). There are many inherent challenges in applying ASR to conversational speech: smaller training set sizes and varying demographics, among others. We assess the impact of dataset size, word error rate and term-weighted value on human search capability through an information retrieval task on Mechanical Turk. We use English oral history data collected by StoryCorps, a national organization that provides all people with the opportunity to record, share and preserve their stories, and control for a variety of demographics including age, gender, birthplace, and dialect on four different training set sizes. We show comparable search performance using a standard speech recognition system as with hand-transcribed data, which is promising for increased accessibility of conversational speech and oral history archives.
},
 address = {Portorož, Slovenia},
 author = {Elizabeth Salesky and Jessica Ray and Wade Shen},
 booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)},
 month = {May},
 pages = {317--321},
 publisher = {European Language Resources Association (ELRA)},
 title = {Operational Assessment of Keyword Search on Oral History},
 url = {https://www.aclweb.org/anthology/L16-1049},
 year = {2016}
}

