@inproceedings{fairon-etal-2008-glossanet,
title = "{G}lossa{N}et 2: a linguistic search engine for {RSS}-based corpora",
author = "Fairon, C{\'e}drick and
Mac{\'e}, K{\'e}vin and
Naets, Hubert",
editor = "Evert, Stefan and
Kilgarriff, Adam and
Sharoff, Serge",
booktitle = "Proceedings of the 4th Web as Corpus Workshop",
month = jun,
year = "2008",
address = "Marrakech, Morocco",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2008.wac-1.6/",
pages = "34--39",
abstract = "This paper presents GlossaNet 2, a free online concordance service that enables users to search into dynamic Web corpora. Two steps are involved in using GlossaNet. At first, users define a corpus by selecting RSS feeds in a preselected pool of sources (they can also add their own RSS feeds). These sources will be visited on a regular basis by a crawler in order to generate a dynamic corpus. Secondly, the user can register one or more search queries on his / her dynamic corpus. Search queries will be re-applied on the corpus every time it is updated, new concordances will be recorded for the user (results can be emailed, published for the user in a privative RSS feed, or they can be viewed online). This service integrates two preexisting software: Corporator (Fairon, 2006), a program that creates corpora by downloading, filtering RSS feeds, Unitex (Paumier, 2003), an open source corpus processor that relies on linguistic resources. After a short introduction, we will briefly present the concept of {\textquotedblleft}RSS corpora{\textquotedblright}, the assets of this approach to corpus development. We will then give an overview of the GlossaNet architecture, present various cases of use."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="fairon-etal-2008-glossanet">
<titleInfo>
<title>GlossaNet 2: a linguistic search engine for RSS-based corpora</title>
</titleInfo>
<name type="personal">
<namePart type="given">Cédrick</namePart>
<namePart type="family">Fairon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kévin</namePart>
<namePart type="family">Macé</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hubert</namePart>
<namePart type="family">Naets</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2008-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 4th Web as Corpus Workshop</title>
</titleInfo>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Evert</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Adam</namePart>
<namePart type="family">Kilgarriff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Serge</namePart>
<namePart type="family">Sharoff</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marrakech, Morocco</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper presents GlossaNet 2, a free online concordance service that enables users to search into dynamic Web corpora. Two steps are involved in using GlossaNet. At first, users define a corpus by selecting RSS feeds in a preselected pool of sources (they can also add their own RSS feeds). These sources will be visited on a regular basis by a crawler in order to generate a dynamic corpus. Secondly, the user can register one or more search queries on his / her dynamic corpus. Search queries will be re-applied on the corpus every time it is updated, new concordances will be recorded for the user (results can be emailed, published for the user in a privative RSS feed, or they can be viewed online). This service integrates two preexisting software: Corporator (Fairon, 2006), a program that creates corpora by downloading, filtering RSS feeds, Unitex (Paumier, 2003), an open source corpus processor that relies on linguistic resources. After a short introduction, we will briefly present the concept of “RSS corpora”, the assets of this approach to corpus development. We will then give an overview of the GlossaNet architecture, present various cases of use.</abstract>
<identifier type="citekey">fairon-etal-2008-glossanet</identifier>
<location>
<url>https://aclanthology.org/2008.wac-1.6/</url>
</location>
<part>
<date>2008-06</date>
<extent unit="page">
<start>34</start>
<end>39</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T GlossaNet 2: a linguistic search engine for RSS-based corpora
%A Fairon, Cédrick
%A Macé, Kévin
%A Naets, Hubert
%Y Evert, Stefan
%Y Kilgarriff, Adam
%Y Sharoff, Serge
%S Proceedings of the 4th Web as Corpus Workshop
%D 2008
%8 June
%I European Language Resources Association
%C Marrakech, Morocco
%F fairon-etal-2008-glossanet
%X This paper presents GlossaNet 2, a free online concordance service that enables users to search into dynamic Web corpora. Two steps are involved in using GlossaNet. At first, users define a corpus by selecting RSS feeds in a preselected pool of sources (they can also add their own RSS feeds). These sources will be visited on a regular basis by a crawler in order to generate a dynamic corpus. Secondly, the user can register one or more search queries on his / her dynamic corpus. Search queries will be re-applied on the corpus every time it is updated, new concordances will be recorded for the user (results can be emailed, published for the user in a privative RSS feed, or they can be viewed online). This service integrates two preexisting software: Corporator (Fairon, 2006), a program that creates corpora by downloading, filtering RSS feeds, Unitex (Paumier, 2003), an open source corpus processor that relies on linguistic resources. After a short introduction, we will briefly present the concept of “RSS corpora”, the assets of this approach to corpus development. We will then give an overview of the GlossaNet architecture, present various cases of use.
%U https://aclanthology.org/2008.wac-1.6/
%P 34-39
Markdown (Informal)
[GlossaNet 2: a linguistic search engine for RSS-based corpora](https://aclanthology.org/2008.wac-1.6/) (Fairon et al., WAC 2008)
ACL