@inproceedings{kampe-etal-2020-allgemeine,
title = "Allgemeine Musikalische Zeitung as a Searchable Online Corpus",
author = "Kampe, Bernd and
Duan, Tinghui and
Hahn, Udo",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.122",
pages = "969--976",
abstract = "The massive digitization efforts related to historical newspapers over the past decades have focused on mass media sources and ordinary people as their primary recipients. Much less attention has been paid to newspapers published for a more specialized audience, e.g., those aiming at scholarly or cultural exchange within intellectual communities much narrower in scope, such as newspapers devoted to music criticism, arts or philosophy. Only some few of these specialized newspapers have been digitized up until now, but they are usually not well curated in terms of digitization quality, data formatting, completeness, redundancy (de-duplication), supply of metadata, and, hence, searchability. This paper describes our approach to eliminate these drawbacks for a major German-language newspaper resource of the Romantic Age, the Allgemeine Musikalische Zeitung (General Music Gazette). We here focus on a workflow that copes with a posteriori digitization problems, inconsistent OCRing and index building for searchability. In addition, we provide a user-friendly graphic interface to empower content-centric access to this (and other) digital resource(s) adopting open-source software for the purpose of Web presentation.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="kampe-etal-2020-allgemeine">
<titleInfo>
<title>Allgemeine Musikalische Zeitung as a Searchable Online Corpus</title>
</titleInfo>
<name type="personal">
<namePart type="given">Bernd</namePart>
<namePart type="family">Kampe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tinghui</namePart>
<namePart type="family">Duan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Udo</namePart>
<namePart type="family">Hahn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>The massive digitization efforts related to historical newspapers over the past decades have focused on mass media sources and ordinary people as their primary recipients. Much less attention has been paid to newspapers published for a more specialized audience, e.g., those aiming at scholarly or cultural exchange within intellectual communities much narrower in scope, such as newspapers devoted to music criticism, arts or philosophy. Only some few of these specialized newspapers have been digitized up until now, but they are usually not well curated in terms of digitization quality, data formatting, completeness, redundancy (de-duplication), supply of metadata, and, hence, searchability. This paper describes our approach to eliminate these drawbacks for a major German-language newspaper resource of the Romantic Age, the Allgemeine Musikalische Zeitung (General Music Gazette). We here focus on a workflow that copes with a posteriori digitization problems, inconsistent OCRing and index building for searchability. In addition, we provide a user-friendly graphic interface to empower content-centric access to this (and other) digital resource(s) adopting open-source software for the purpose of Web presentation.</abstract>
<identifier type="citekey">kampe-etal-2020-allgemeine</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.122</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>969</start>
<end>976</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Allgemeine Musikalische Zeitung as a Searchable Online Corpus
%A Kampe, Bernd
%A Duan, Tinghui
%A Hahn, Udo
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F kampe-etal-2020-allgemeine
%X The massive digitization efforts related to historical newspapers over the past decades have focused on mass media sources and ordinary people as their primary recipients. Much less attention has been paid to newspapers published for a more specialized audience, e.g., those aiming at scholarly or cultural exchange within intellectual communities much narrower in scope, such as newspapers devoted to music criticism, arts or philosophy. Only some few of these specialized newspapers have been digitized up until now, but they are usually not well curated in terms of digitization quality, data formatting, completeness, redundancy (de-duplication), supply of metadata, and, hence, searchability. This paper describes our approach to eliminate these drawbacks for a major German-language newspaper resource of the Romantic Age, the Allgemeine Musikalische Zeitung (General Music Gazette). We here focus on a workflow that copes with a posteriori digitization problems, inconsistent OCRing and index building for searchability. In addition, we provide a user-friendly graphic interface to empower content-centric access to this (and other) digital resource(s) adopting open-source software for the purpose of Web presentation.
%U https://aclanthology.org/2020.lrec-1.122
%P 969-976
Markdown (Informal)
[Allgemeine Musikalische Zeitung as a Searchable Online Corpus](https://aclanthology.org/2020.lrec-1.122) (Kampe et al., LREC 2020)
ACL