@inproceedings{herms-etal-2016-corpus,
title = "A Corpus of Read and Spontaneous {U}pper {S}axon {G}erman Speech for {ASR} Evaluation",
author = {Herms, Robert and
Seelig, Laura and
M{\"u}nch, Stefanie and
Eibl, Maximilian},
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1736",
pages = "4648--4651",
abstract = "In this Paper we present a corpus named SXUCorpus which contains read and spontaneous speech of the Upper Saxon German dialect. The data has been collected from eight archives of local television stations located in the Free State of Saxony. The recordings include broadcasted topics of news, economy, weather, sport, and documentation from the years 1992 to 1996 and have been manually transcribed and labeled. In the paper, we report the methodology of collecting and processing analog audiovisual material, constructing the corpus and describe the properties of the data. In its current version, the corpus is available to the scientific community and is designed for automatic speech recognition (ASR) evaluation with a development set and a test set. We performed ASR experiments with the open-source framework sphinx-4 including a configuration for Standard German on the dataset. Additionally, we show the influence of acoustic model and language model adaptation by the utilization of the development set.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="herms-etal-2016-corpus">
<titleInfo>
<title>A Corpus of Read and Spontaneous Upper Saxon German Speech for ASR Evaluation</title>
</titleInfo>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Herms</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Laura</namePart>
<namePart type="family">Seelig</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefanie</namePart>
<namePart type="family">Münch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maximilian</namePart>
<namePart type="family">Eibl</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this Paper we present a corpus named SXUCorpus which contains read and spontaneous speech of the Upper Saxon German dialect. The data has been collected from eight archives of local television stations located in the Free State of Saxony. The recordings include broadcasted topics of news, economy, weather, sport, and documentation from the years 1992 to 1996 and have been manually transcribed and labeled. In the paper, we report the methodology of collecting and processing analog audiovisual material, constructing the corpus and describe the properties of the data. In its current version, the corpus is available to the scientific community and is designed for automatic speech recognition (ASR) evaluation with a development set and a test set. We performed ASR experiments with the open-source framework sphinx-4 including a configuration for Standard German on the dataset. Additionally, we show the influence of acoustic model and language model adaptation by the utilization of the development set.</abstract>
<identifier type="citekey">herms-etal-2016-corpus</identifier>
<location>
<url>https://aclanthology.org/L16-1736</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>4648</start>
<end>4651</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Corpus of Read and Spontaneous Upper Saxon German Speech for ASR Evaluation
%A Herms, Robert
%A Seelig, Laura
%A Münch, Stefanie
%A Eibl, Maximilian
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F herms-etal-2016-corpus
%X In this Paper we present a corpus named SXUCorpus which contains read and spontaneous speech of the Upper Saxon German dialect. The data has been collected from eight archives of local television stations located in the Free State of Saxony. The recordings include broadcasted topics of news, economy, weather, sport, and documentation from the years 1992 to 1996 and have been manually transcribed and labeled. In the paper, we report the methodology of collecting and processing analog audiovisual material, constructing the corpus and describe the properties of the data. In its current version, the corpus is available to the scientific community and is designed for automatic speech recognition (ASR) evaluation with a development set and a test set. We performed ASR experiments with the open-source framework sphinx-4 including a configuration for Standard German on the dataset. Additionally, we show the influence of acoustic model and language model adaptation by the utilization of the development set.
%U https://aclanthology.org/L16-1736
%P 4648-4651
Markdown (Informal)
[A Corpus of Read and Spontaneous Upper Saxon German Speech for ASR Evaluation](https://aclanthology.org/L16-1736) (Herms et al., LREC 2016)
ACL