@inproceedings{antunes-etal-2006-corpus,
title = "Corpus-based extraction and identification of {P}ortuguese Multiword Expressions",
author = "Antunes, Sandra and
Fernanda Bacelar do Nascimento, Maria and
Miguel Casteleiro, Jo{\~a}o and
Mendes, Am{\'a}lia and
Pereira, Lu{\'\i}sa and
S{\'a}, Tiago",
editor = "Mertens, Piet and
Fairon, C{\'e}drick and
Dister, Anne and
Watrin, Patrick",
booktitle = "Actes de la 13{\`e}me conf{\'e}rence sur le Traitement Automatique des Langues Naturelles. Posters",
month = apr,
year = "2006",
address = "Leuven, Belgique",
publisher = "ATALA",
url = "https://aclanthology.org/2006.jeptalnrecital-poster.2",
pages = "389--397",
abstract = "This presentation reports on an on-going project aimed at building a large lexical database of corpus-extracted multiword (MW) expressions for the Portuguese language. MW expressions were automatically extracted from a balanced 50 million word corpus compiled for this project, furthermore these were statistically interpreted using lexical association measures, followed by a manual validation process. The lexical database covers different types of MW expressions, from named entities to lexical associations with different degrees of cohesion, ranging from totally frozen idioms to favoured co-occurring forms, such as collocations. We aim to achieve two main objectives with this resource. Firstly to build on the large set of data of different types of MW expressions, thus revising existing typologies of collocations and integrating them in a larger theory of MW units. Secondly, to use the extensive hand-checked data as training data to evaluate existing statistical lexical association measures.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="antunes-etal-2006-corpus">
<titleInfo>
<title>Corpus-based extraction and identification of Portuguese Multiword Expressions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sandra</namePart>
<namePart type="family">Antunes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Maria</namePart>
<namePart type="family">Fernanda Bacelar do Nascimento</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">João</namePart>
<namePart type="family">Miguel Casteleiro</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amália</namePart>
<namePart type="family">Mendes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Luísa</namePart>
<namePart type="family">Pereira</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Tiago</namePart>
<namePart type="family">Sá</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2006-04</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Posters</title>
</titleInfo>
<name type="personal">
<namePart type="given">Piet</namePart>
<namePart type="family">Mertens</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cédrick</namePart>
<namePart type="family">Fairon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anne</namePart>
<namePart type="family">Dister</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Watrin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>ATALA</publisher>
<place>
<placeTerm type="text">Leuven, Belgique</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This presentation reports on an on-going project aimed at building a large lexical database of corpus-extracted multiword (MW) expressions for the Portuguese language. MW expressions were automatically extracted from a balanced 50 million word corpus compiled for this project, furthermore these were statistically interpreted using lexical association measures, followed by a manual validation process. The lexical database covers different types of MW expressions, from named entities to lexical associations with different degrees of cohesion, ranging from totally frozen idioms to favoured co-occurring forms, such as collocations. We aim to achieve two main objectives with this resource. Firstly to build on the large set of data of different types of MW expressions, thus revising existing typologies of collocations and integrating them in a larger theory of MW units. Secondly, to use the extensive hand-checked data as training data to evaluate existing statistical lexical association measures.</abstract>
<identifier type="citekey">antunes-etal-2006-corpus</identifier>
<location>
<url>https://aclanthology.org/2006.jeptalnrecital-poster.2</url>
</location>
<part>
<date>2006-04</date>
<extent unit="page">
<start>389</start>
<end>397</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Corpus-based extraction and identification of Portuguese Multiword Expressions
%A Antunes, Sandra
%A Fernanda Bacelar do Nascimento, Maria
%A Miguel Casteleiro, João
%A Mendes, Amália
%A Pereira, Luísa
%A Sá, Tiago
%Y Mertens, Piet
%Y Fairon, Cédrick
%Y Dister, Anne
%Y Watrin, Patrick
%S Actes de la 13ème conférence sur le Traitement Automatique des Langues Naturelles. Posters
%D 2006
%8 April
%I ATALA
%C Leuven, Belgique
%F antunes-etal-2006-corpus
%X This presentation reports on an on-going project aimed at building a large lexical database of corpus-extracted multiword (MW) expressions for the Portuguese language. MW expressions were automatically extracted from a balanced 50 million word corpus compiled for this project, furthermore these were statistically interpreted using lexical association measures, followed by a manual validation process. The lexical database covers different types of MW expressions, from named entities to lexical associations with different degrees of cohesion, ranging from totally frozen idioms to favoured co-occurring forms, such as collocations. We aim to achieve two main objectives with this resource. Firstly to build on the large set of data of different types of MW expressions, thus revising existing typologies of collocations and integrating them in a larger theory of MW units. Secondly, to use the extensive hand-checked data as training data to evaluate existing statistical lexical association measures.
%U https://aclanthology.org/2006.jeptalnrecital-poster.2
%P 389-397
Markdown (Informal)
[Corpus-based extraction and identification of Portuguese Multiword Expressions](https://aclanthology.org/2006.jeptalnrecital-poster.2) (Antunes et al., JEP/TALN/RECITAL 2006)
ACL