@inproceedings{haaf-2016-corpus,
title = "Corpus Analysis based on Structural Phenomena in Texts: Exploiting {TEI} Encoding for Linguistic Research",
author = "Haaf, Susanne",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1692",
pages = "4365--4372",
abstract = "This paper poses the question, how linguistic corpus-based research may be enriched by the exploitation of conceptual text structures and layout as provided via TEI annotation. Examples for possible areas of research and usage scenarios are provided based on the German historical corpus of the Deutsches Textarchiv (DTA) project, which has been consistently tagged accordant to the TEI Guidelines, more specifically to the DTA ›Base Format‹ (DTABf). The paper shows that by including TEI-XML structuring in corpus-based analyses significances can be observed for different linguistic phenomena, as e.g. the development of conceptual text structures themselves, the syntactic embedding of terms in certain conceptual text structures, and phenomena of language change which become obvious via the layout of a text. The exemplary study carried out here shows some of the potential for the exploitation of TEI annotation for linguistic research, which might be kept in mind when making design decisions for new corpora as well when working with existing TEI corpora.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="haaf-2016-corpus">
<titleInfo>
<title>Corpus Analysis based on Structural Phenomena in Texts: Exploiting TEI Encoding for Linguistic Research</title>
</titleInfo>
<name type="personal">
<namePart type="given">Susanne</namePart>
<namePart type="family">Haaf</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper poses the question, how linguistic corpus-based research may be enriched by the exploitation of conceptual text structures and layout as provided via TEI annotation. Examples for possible areas of research and usage scenarios are provided based on the German historical corpus of the Deutsches Textarchiv (DTA) project, which has been consistently tagged accordant to the TEI Guidelines, more specifically to the DTA ›Base Format‹ (DTABf). The paper shows that by including TEI-XML structuring in corpus-based analyses significances can be observed for different linguistic phenomena, as e.g. the development of conceptual text structures themselves, the syntactic embedding of terms in certain conceptual text structures, and phenomena of language change which become obvious via the layout of a text. The exemplary study carried out here shows some of the potential for the exploitation of TEI annotation for linguistic research, which might be kept in mind when making design decisions for new corpora as well when working with existing TEI corpora.</abstract>
<identifier type="citekey">haaf-2016-corpus</identifier>
<location>
<url>https://aclanthology.org/L16-1692</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>4365</start>
<end>4372</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Corpus Analysis based on Structural Phenomena in Texts: Exploiting TEI Encoding for Linguistic Research
%A Haaf, Susanne
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F haaf-2016-corpus
%X This paper poses the question, how linguistic corpus-based research may be enriched by the exploitation of conceptual text structures and layout as provided via TEI annotation. Examples for possible areas of research and usage scenarios are provided based on the German historical corpus of the Deutsches Textarchiv (DTA) project, which has been consistently tagged accordant to the TEI Guidelines, more specifically to the DTA ›Base Format‹ (DTABf). The paper shows that by including TEI-XML structuring in corpus-based analyses significances can be observed for different linguistic phenomena, as e.g. the development of conceptual text structures themselves, the syntactic embedding of terms in certain conceptual text structures, and phenomena of language change which become obvious via the layout of a text. The exemplary study carried out here shows some of the potential for the exploitation of TEI annotation for linguistic research, which might be kept in mind when making design decisions for new corpora as well when working with existing TEI corpora.
%U https://aclanthology.org/L16-1692
%P 4365-4372
Markdown (Informal)
[Corpus Analysis based on Structural Phenomena in Texts: Exploiting TEI Encoding for Linguistic Research](https://aclanthology.org/L16-1692) (Haaf, LREC 2016)
ACL