@inproceedings{hutchinson-2004-mining,
title = "Mining the Web for Discourse Markers",
author = "Hutchinson, Ben",
editor = "Lino, Maria Teresa and
Xavier, Maria Francisca and
Ferreira, F{\'a}tima and
Costa, Rute and
Silva, Raquel",
booktitle = "Proceedings of the Fourth International Conference on Language Resources and Evaluation ({LREC}{'}04)",
month = may,
year = "2004",
address = "Lisbon, Portugal",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2004/pdf/333.pdf",
abstract = "This paper proposes a methodology for obtaining sentences containing discourse markers from the World Wide Web. The proposed methodology is particularly suitable for collecting large numbers of discourse marker tokens. It relies on the automatic identification of discourse markers, and we show that this can be done with an accuracy within 9{\%} of that of human performance. We also show that the distribution of discourse markers on the web correlates highly with those in a conventional balanced corpus.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="hutchinson-2004-mining">
<titleInfo>
<title>Mining the Web for Discourse Markers</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ben</namePart>
<namePart type="family">Hutchinson</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2004-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Teresa</namePart>
<namePart type="family">Lino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="given">Francisca</namePart>
<namePart type="family">Xavier</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Fátima</namePart>
<namePart type="family">Ferreira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rute</namePart>
<namePart type="family">Costa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Raquel</namePart>
<namePart type="family">Silva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Lisbon, Portugal</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper proposes a methodology for obtaining sentences containing discourse markers from the World Wide Web. The proposed methodology is particularly suitable for collecting large numbers of discourse marker tokens. It relies on the automatic identification of discourse markers, and we show that this can be done with an accuracy within 9% of that of human performance. We also show that the distribution of discourse markers on the web correlates highly with those in a conventional balanced corpus.</abstract>
<identifier type="citekey">hutchinson-2004-mining</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2004/pdf/333.pdf</url>
</location>
<part>
<date>2004-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Mining the Web for Discourse Markers
%A Hutchinson, Ben
%Y Lino, Maria Teresa
%Y Xavier, Maria Francisca
%Y Ferreira, Fátima
%Y Costa, Rute
%Y Silva, Raquel
%S Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04)
%D 2004
%8 May
%I European Language Resources Association (ELRA)
%C Lisbon, Portugal
%F hutchinson-2004-mining
%X This paper proposes a methodology for obtaining sentences containing discourse markers from the World Wide Web. The proposed methodology is particularly suitable for collecting large numbers of discourse marker tokens. It relies on the automatic identification of discourse markers, and we show that this can be done with an accuracy within 9% of that of human performance. We also show that the distribution of discourse markers on the web correlates highly with those in a conventional balanced corpus.
%U http://www.lrec-conf.org/proceedings/lrec2004/pdf/333.pdf
Markdown (Informal)
[Mining the Web for Discourse Markers](http://www.lrec-conf.org/proceedings/lrec2004/pdf/333.pdf) (Hutchinson, LREC 2004)
ACL
- Ben Hutchinson. 2004. Mining the Web for Discourse Markers. In Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC’04), Lisbon, Portugal. European Language Resources Association (ELRA).