@inproceedings{alghamdi-etal-2016-empirical,
title = "An Empirical Study of {A}rabic Formulaic Sequence Extraction Methods",
author = "Alghamdi, Ayman and
Atwell, Eric and
Brierley, Claire",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Declerck, Thierry and
Goggi, Sara and
Grobelnik, Marko and
Maegaard, Bente and
Mariani, Joseph and
Mazo, Helene and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Tenth International Conference on Language Resources and Evaluation ({LREC}'16)",
month = may,
year = "2016",
address = "Portoro{\v{z}}, Slovenia",
publisher = "European Language Resources Association (ELRA)",
url = "https://aclanthology.org/L16-1080",
pages = "502--506",
abstract = "This paper aims to implement what is referred to as the collocation of the Arabic keywords approach for extracting formulaic sequences (FSs) in the form of high frequency but semantically regular formulas that are not restricted to any syntactic construction or semantic domain. The study applies several distributional semantic models in order to automatically extract relevant FSs related to Arabic keywords. The data sets used in this experiment are rendered from a new developed corpus-based Arabic wordlist consisting of 5,189 lexical items which represent a variety of modern standard Arabic (MSA) genres and regions, the new wordlist being based on an overlapping frequency based on a comprehensive comparison of four large Arabic corpora with a total size of over 8 billion running words. Empirical n-best precision evaluation methods are used to determine the best association measures (AMs) for extracting high frequency and meaningful FSs. The gold standard reference FSs list was developed in previous studies and manually evaluated against well-established quantitative and qualitative criteria. The results demonstrate that the MI.log{\_}f AM achieved the highest results in extracting significant FSs from the large MSA corpus, while the T-score association measure achieved the worst results.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="alghamdi-etal-2016-empirical">
<titleInfo>
<title>An Empirical Study of Arabic Formulaic Sequence Extraction Methods</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ayman</namePart>
<namePart type="family">Alghamdi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Eric</namePart>
<namePart type="family">Atwell</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claire</namePart>
<namePart type="family">Brierley</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2016-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Marko</namePart>
<namePart type="family">Grobelnik</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Helene</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Portorož, Slovenia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>This paper aims to implement what is referred to as the collocation of the Arabic keywords approach for extracting formulaic sequences (FSs) in the form of high frequency but semantically regular formulas that are not restricted to any syntactic construction or semantic domain. The study applies several distributional semantic models in order to automatically extract relevant FSs related to Arabic keywords. The data sets used in this experiment are rendered from a new developed corpus-based Arabic wordlist consisting of 5,189 lexical items which represent a variety of modern standard Arabic (MSA) genres and regions, the new wordlist being based on an overlapping frequency based on a comprehensive comparison of four large Arabic corpora with a total size of over 8 billion running words. Empirical n-best precision evaluation methods are used to determine the best association measures (AMs) for extracting high frequency and meaningful FSs. The gold standard reference FSs list was developed in previous studies and manually evaluated against well-established quantitative and qualitative criteria. The results demonstrate that the MI.log_f AM achieved the highest results in extracting significant FSs from the large MSA corpus, while the T-score association measure achieved the worst results.</abstract>
<identifier type="citekey">alghamdi-etal-2016-empirical</identifier>
<location>
<url>https://aclanthology.org/L16-1080</url>
</location>
<part>
<date>2016-05</date>
<extent unit="page">
<start>502</start>
<end>506</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T An Empirical Study of Arabic Formulaic Sequence Extraction Methods
%A Alghamdi, Ayman
%A Atwell, Eric
%A Brierley, Claire
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Grobelnik, Marko
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Helene
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC’16)
%D 2016
%8 May
%I European Language Resources Association (ELRA)
%C Portorož, Slovenia
%F alghamdi-etal-2016-empirical
%X This paper aims to implement what is referred to as the collocation of the Arabic keywords approach for extracting formulaic sequences (FSs) in the form of high frequency but semantically regular formulas that are not restricted to any syntactic construction or semantic domain. The study applies several distributional semantic models in order to automatically extract relevant FSs related to Arabic keywords. The data sets used in this experiment are rendered from a new developed corpus-based Arabic wordlist consisting of 5,189 lexical items which represent a variety of modern standard Arabic (MSA) genres and regions, the new wordlist being based on an overlapping frequency based on a comprehensive comparison of four large Arabic corpora with a total size of over 8 billion running words. Empirical n-best precision evaluation methods are used to determine the best association measures (AMs) for extracting high frequency and meaningful FSs. The gold standard reference FSs list was developed in previous studies and manually evaluated against well-established quantitative and qualitative criteria. The results demonstrate that the MI.log_f AM achieved the highest results in extracting significant FSs from the large MSA corpus, while the T-score association measure achieved the worst results.
%U https://aclanthology.org/L16-1080
%P 502-506
Markdown (Informal)
[An Empirical Study of Arabic Formulaic Sequence Extraction Methods](https://aclanthology.org/L16-1080) (Alghamdi et al., LREC 2016)
ACL