@inproceedings{drouin-etal-2020-automatic,
title = "Automatic Term Extraction from Newspaper Corpora: Making the Most of Specificity and Common Features",
author = "Drouin, Patrick and
Morel, Jean-Beno{\^i}t and
L{'} Homme, Marie-Claude",
editor = "Daille, B{\'e}atrice and
Kageura, Kyo and
Terryn, Ayla Rigouts",
booktitle = "Proceedings of the 6th International Workshop on Computational Terminology",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.computerm-1.1/",
pages = "1--7",
language = "eng",
ISBN = "979-10-95546-57-3",
abstract = "The first step of any terminological work is to setup a reliable, specialized corpus composed of documents written by specialists and then to apply automatic term extraction (ATE) methods to this corpus in order to retrieve a first list of potential terms. In this paper, the experiment we describe differs quite drastically from this usual process since we are applying ATE to unspecialized corpora. The corpus used for this study was built from newspaper articles retrieved from the Web using a short list of keywords. The general intuition on which this research is based is that ATE based corpus comparison techniques can be used to capture both similarities and dissimilarities between corpora. The former are exploited through a termhood measure and the latter through word embeddings. Our initial results were validated manually and show that combining a traditional ATE method that focuses on dissimilarities between corpora to newer methods that exploit similarities (more specifically distributional features of candidates) leads to promising results."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="drouin-etal-2020-automatic">
<titleInfo>
<title>Automatic Term Extraction from Newspaper Corpora: Making the Most of Specificity and Common Features</title>
</titleInfo>
<name type="personal">
<namePart type="given">Patrick</namePart>
<namePart type="family">Drouin</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jean-Benoît</namePart>
<namePart type="family">Morel</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marie-Claude</namePart>
<namePart type="family">L’ Homme</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 6th International Workshop on Computational Terminology</title>
</titleInfo>
<name type="personal">
<namePart type="given">Béatrice</namePart>
<namePart type="family">Daille</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kyo</namePart>
<namePart type="family">Kageura</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ayla</namePart>
<namePart type="given">Rigouts</namePart>
<namePart type="family">Terryn</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-57-3</identifier>
</relatedItem>
<abstract>The first step of any terminological work is to setup a reliable, specialized corpus composed of documents written by specialists and then to apply automatic term extraction (ATE) methods to this corpus in order to retrieve a first list of potential terms. In this paper, the experiment we describe differs quite drastically from this usual process since we are applying ATE to unspecialized corpora. The corpus used for this study was built from newspaper articles retrieved from the Web using a short list of keywords. The general intuition on which this research is based is that ATE based corpus comparison techniques can be used to capture both similarities and dissimilarities between corpora. The former are exploited through a termhood measure and the latter through word embeddings. Our initial results were validated manually and show that combining a traditional ATE method that focuses on dissimilarities between corpora to newer methods that exploit similarities (more specifically distributional features of candidates) leads to promising results.</abstract>
<identifier type="citekey">drouin-etal-2020-automatic</identifier>
<location>
<url>https://aclanthology.org/2020.computerm-1.1/</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>1</start>
<end>7</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Automatic Term Extraction from Newspaper Corpora: Making the Most of Specificity and Common Features
%A Drouin, Patrick
%A Morel, Jean-Benoît
%A L’ Homme, Marie-Claude
%Y Daille, Béatrice
%Y Kageura, Kyo
%Y Terryn, Ayla Rigouts
%S Proceedings of the 6th International Workshop on Computational Terminology
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-57-3
%G eng
%F drouin-etal-2020-automatic
%X The first step of any terminological work is to setup a reliable, specialized corpus composed of documents written by specialists and then to apply automatic term extraction (ATE) methods to this corpus in order to retrieve a first list of potential terms. In this paper, the experiment we describe differs quite drastically from this usual process since we are applying ATE to unspecialized corpora. The corpus used for this study was built from newspaper articles retrieved from the Web using a short list of keywords. The general intuition on which this research is based is that ATE based corpus comparison techniques can be used to capture both similarities and dissimilarities between corpora. The former are exploited through a termhood measure and the latter through word embeddings. Our initial results were validated manually and show that combining a traditional ATE method that focuses on dissimilarities between corpora to newer methods that exploit similarities (more specifically distributional features of candidates) leads to promising results.
%U https://aclanthology.org/2020.computerm-1.1/
%P 1-7
Markdown (Informal)
[Automatic Term Extraction from Newspaper Corpora: Making the Most of Specificity and Common Features](https://aclanthology.org/2020.computerm-1.1/) (Drouin et al., CompuTerm 2020)
ACL