@inproceedings{zaghouani-etal-2010-adapting,
title = "Adapting a resource-light highly multilingual Named Entity Recognition system to {A}rabic",
author = "Zaghouani, Wajdi and
Pouliquen, Bruno and
Ebrahim, Mohamed and
Steinberger, Ralf",
editor = "Calzolari, Nicoletta and
Choukri, Khalid and
Maegaard, Bente and
Mariani, Joseph and
Odijk, Jan and
Piperidis, Stelios and
Rosner, Mike and
Tapias, Daniel",
booktitle = "Proceedings of the Seventh International Conference on Language Resources and Evaluation ({LREC}'10)",
month = may,
year = "2010",
address = "Valletta, Malta",
publisher = "European Language Resources Association (ELRA)",
url = "http://www.lrec-conf.org/proceedings/lrec2010/pdf/669_Paper.pdf",
abstract = "We present a fully functional Arabic information extraction (IE) system that is used to analyze large volumes of news texts every day to extract the named entity (NE) types person, organization, location, date and number, as well as quotations (direct reported speech) by and about people. The Named Entity Recognition (NER) system was not developed for Arabic, but - instead - a highly multilingual, almost language-independent NER system was adapted to also cover Arabic. The Semitic language Arabic substantially differs from the Indo-European and Finno-Ugric languages currently covered. This paper thus describes what Arabic language-specific resources had to be developed and what changes needed to be made to the otherwise language-independent rule set in order to be applicable to the Arabic language. The achieved evaluation results are generally satisfactory, but could be improved for certain entity types. The results of the IE tools can be seen on the Arabic pages of the freely accessible Europe Media Monitor (EMM) application NewsExplorer, which can be found at \url{http://press.jrc.it/overview.html}.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="zaghouani-etal-2010-adapting">
<titleInfo>
<title>Adapting a resource-light highly multilingual Named Entity Recognition system to Arabic</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wajdi</namePart>
<namePart type="family">Zaghouani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bruno</namePart>
<namePart type="family">Pouliquen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mohamed</namePart>
<namePart type="family">Ebrahim</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ralf</namePart>
<namePart type="family">Steinberger</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2010-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Rosner</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Tapias</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association (ELRA)</publisher>
<place>
<placeTerm type="text">Valletta, Malta</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We present a fully functional Arabic information extraction (IE) system that is used to analyze large volumes of news texts every day to extract the named entity (NE) types person, organization, location, date and number, as well as quotations (direct reported speech) by and about people. The Named Entity Recognition (NER) system was not developed for Arabic, but - instead - a highly multilingual, almost language-independent NER system was adapted to also cover Arabic. The Semitic language Arabic substantially differs from the Indo-European and Finno-Ugric languages currently covered. This paper thus describes what Arabic language-specific resources had to be developed and what changes needed to be made to the otherwise language-independent rule set in order to be applicable to the Arabic language. The achieved evaluation results are generally satisfactory, but could be improved for certain entity types. The results of the IE tools can be seen on the Arabic pages of the freely accessible Europe Media Monitor (EMM) application NewsExplorer, which can be found at http://press.jrc.it/overview.html.</abstract>
<identifier type="citekey">zaghouani-etal-2010-adapting</identifier>
<location>
<url>http://www.lrec-conf.org/proceedings/lrec2010/pdf/669_Paper.pdf</url>
</location>
<part>
<date>2010-05</date>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Adapting a resource-light highly multilingual Named Entity Recognition system to Arabic
%A Zaghouani, Wajdi
%A Pouliquen, Bruno
%A Ebrahim, Mohamed
%A Steinberger, Ralf
%Y Calzolari, Nicoletta
%Y Choukri, Khalid
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Odijk, Jan
%Y Piperidis, Stelios
%Y Rosner, Mike
%Y Tapias, Daniel
%S Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC’10)
%D 2010
%8 May
%I European Language Resources Association (ELRA)
%C Valletta, Malta
%F zaghouani-etal-2010-adapting
%X We present a fully functional Arabic information extraction (IE) system that is used to analyze large volumes of news texts every day to extract the named entity (NE) types person, organization, location, date and number, as well as quotations (direct reported speech) by and about people. The Named Entity Recognition (NER) system was not developed for Arabic, but - instead - a highly multilingual, almost language-independent NER system was adapted to also cover Arabic. The Semitic language Arabic substantially differs from the Indo-European and Finno-Ugric languages currently covered. This paper thus describes what Arabic language-specific resources had to be developed and what changes needed to be made to the otherwise language-independent rule set in order to be applicable to the Arabic language. The achieved evaluation results are generally satisfactory, but could be improved for certain entity types. The results of the IE tools can be seen on the Arabic pages of the freely accessible Europe Media Monitor (EMM) application NewsExplorer, which can be found at http://press.jrc.it/overview.html.
%U http://www.lrec-conf.org/proceedings/lrec2010/pdf/669_Paper.pdf
Markdown (Informal)
[Adapting a resource-light highly multilingual Named Entity Recognition system to Arabic](http://www.lrec-conf.org/proceedings/lrec2010/pdf/669_Paper.pdf) (Zaghouani et al., LREC 2010)
ACL