@inproceedings{santini-2006-identifying,
title = "Identifying Genres of Web Pages",
author = "Santini, Marina",
editor = "Mertens, Piet and
Fairon, C{\'e}drick and
Dister, Anne and
Watrin, Patrick",
booktitle = "Actes de la 13{\`e}me conf{\'e}rence sur le Traitement Automatique des Langues Naturelles. Articles longs",
month = apr,
year = "2006",
address = "Leuven, Belgique",
publisher = "ATALA",
url = "https://aclanthology.org/2006.jeptalnrecital-long.28",
pages = "308--317",
abstract = "In this paper, we present an inferential model for text type and genre identification of Web pages, where text types are inferred using a modified form of Bayes{'} theorem, and genres are derived using a few simple if-then rules. As the genre system on the Web is a complex phenomenon, and Web pages are usually more unpredictable and individualized than paper documents, we propose this approach as an alternative to unsupervised and supervised techniques. The inferential model allows a classification that can accommodate genres that are not entirely standardized, and is more capable of reading a Web page, which is mixed, rarely corresponding to an ideal type and often showing a mixture of genres or no genre at all. A proper evaluation of such a model remains an open issue.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="santini-2006-identifying">
<titleInfo>
<title>Identifying Genres of Web Pages</title>
</titleInfo>
<name type="personal">
<namePart type="given">Marina</namePart>
<namePart type="family">Santini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs</title>
</titleInfo>
<name type="personal">
<namePart type="given">Piet</namePart>
<namePart type="family">Mertens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cédrick</namePart>
<namePart type="family">Fairon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne</namePart>
<namePart type="family">Dister</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Watrin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ATALA</publisher>
<place>
<placeTerm type="text">Leuven, Belgique</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this paper, we present an inferential model for text type and genre identification of Web pages, where text types are inferred using a modified form of Bayes’ theorem, and genres are derived using a few simple if-then rules. As the genre system on the Web is a complex phenomenon, and Web pages are usually more unpredictable and individualized than paper documents, we propose this approach as an alternative to unsupervised and supervised techniques. The inferential model allows a classification that can accommodate genres that are not entirely standardized, and is more capable of reading a Web page, which is mixed, rarely corresponding to an ideal type and often showing a mixture of genres or no genre at all. A proper evaluation of such a model remains an open issue.</abstract>
<identifier type="citekey">santini-2006-identifying</identifier>
<location>
<url>https://aclanthology.org/2006.jeptalnrecital-long.28</url>
</location>
<part>
<date>2006-04</date>
<extent unit="page">
<start>308</start>
<end>317</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Identifying Genres of Web Pages
%A Santini, Marina
%Y Mertens, Piet
%Y Fairon, Cédrick
%Y Dister, Anne
%Y Watrin, Patrick
%S Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs
%D 2006
%8 April
%I ATALA
%C Leuven, Belgique
%F santini-2006-identifying
%X In this paper, we present an inferential model for text type and genre identification of Web pages, where text types are inferred using a modified form of Bayes’ theorem, and genres are derived using a few simple if-then rules. As the genre system on the Web is a complex phenomenon, and Web pages are usually more unpredictable and individualized than paper documents, we propose this approach as an alternative to unsupervised and supervised techniques. The inferential model allows a classification that can accommodate genres that are not entirely standardized, and is more capable of reading a Web page, which is mixed, rarely corresponding to an ideal type and often showing a mixture of genres or no genre at all. A proper evaluation of such a model remains an open issue.
%U https://aclanthology.org/2006.jeptalnrecital-long.28
%P 308-317
Markdown (Informal)
[Identifying Genres of Web Pages](https://aclanthology.org/2006.jeptalnrecital-long.28) (Santini, JEP/TALN/RECITAL 2006)
ACL
- Marina Santini. 2006. Identifying Genres of Web Pages. In Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Articles longs, pages 308–317, Leuven, Belgique. ATALA.