@inproceedings{balouchzahi-etal-2020-mucs,
title = "{MUCS}@{T}ech{DO}fication using {F}ine{T}uned Vectors and n-grams",
author = "Balouchzahi, Fazlourrahman and
Anusha, M D and
Shashirekha, H L",
editor = "Sharma, Dipti Misra and
Ekbal, Asif and
Arora, Karunesh and
Naskar, Sudip Kumar and
Ganguly, Dipankar and
L, Sobha and
Mamidi, Radhika and
Arora, Sunita and
Mishra, Pruthwik and
Mujadia, Vandan",
booktitle = "Proceedings of the 17th International Conference on Natural Language Processing (ICON): TechDOfication 2020 Shared Task",
month = dec,
year = "2020",
address = "Patna, India",
publisher = "NLP Association of India (NLPAI)",
url = "https://aclanthology.org/2020.icon-techdofication.1/",
pages = "1--5",
abstract = "The increase in domain specific text processing applications are demanding tools and techniques for domain specific Text Classification (TC) which may be helpful in many downstream applications like Machine Translation, Summarization, Question Answering etc. Further, many TC algorithms are applied on globally recognized languages like English giving less importance for local languages particularly Indian languages. To boost the research for technical domains and text processing activities in Indian languages, a shared task named {\textquotedblright}TechDOfication2020{\textquotedblright} is organized by ICON`20. The objective of this shared task is to automatically identify the technical domain of a given text which provides information about coarse grained technical domains and fine grained subdomains in eight languages. To tackle this challenge we, team MUCS have proposed three models, namely, DL-FineTuned model applied for all subtasks, and VC-FineTuned and VC-ngrams models applied only for some subtasks. n-grams and word embedding with a step of fine-tuning are used as features and machine learning and deep learning algorithms are used as classifiers in the proposed models. The proposed models outperformed in most of subtasks and also obtained first rank in subTask1b (Bangla) and subTask1e (Malayalam) with f1 score of 0.8353 and 0.3851 respectively using DL-FineTuned model for both the subtasks."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="balouchzahi-etal-2020-mucs">
<titleInfo>
<title>MUCS@TechDOfication using FineTuned Vectors and n-grams</title>
</titleInfo>
<name type="personal">
<namePart type="given">Fazlourrahman</namePart>
<namePart type="family">Balouchzahi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">M</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Anusha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">H</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Shashirekha</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th International Conference on Natural Language Processing (ICON): TechDOfication 2020 Shared Task</title>
</titleInfo>
<name type="personal">
<namePart type="given">Dipti</namePart>
<namePart type="given">Misra</namePart>
<namePart type="family">Sharma</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asif</namePart>
<namePart type="family">Ekbal</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Karunesh</namePart>
<namePart type="family">Arora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sudip</namePart>
<namePart type="given">Kumar</namePart>
<namePart type="family">Naskar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dipankar</namePart>
<namePart type="family">Ganguly</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sobha</namePart>
<namePart type="family">L</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Radhika</namePart>
<namePart type="family">Mamidi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sunita</namePart>
<namePart type="family">Arora</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Pruthwik</namePart>
<namePart type="family">Mishra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Vandan</namePart>
<namePart type="family">Mujadia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>NLP Association of India (NLPAI)</publisher>
<place>
<placeTerm type="text">Patna, India</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>The increase in domain specific text processing applications are demanding tools and techniques for domain specific Text Classification (TC) which may be helpful in many downstream applications like Machine Translation, Summarization, Question Answering etc. Further, many TC algorithms are applied on globally recognized languages like English giving less importance for local languages particularly Indian languages. To boost the research for technical domains and text processing activities in Indian languages, a shared task named ”TechDOfication2020” is organized by ICON‘20. The objective of this shared task is to automatically identify the technical domain of a given text which provides information about coarse grained technical domains and fine grained subdomains in eight languages. To tackle this challenge we, team MUCS have proposed three models, namely, DL-FineTuned model applied for all subtasks, and VC-FineTuned and VC-ngrams models applied only for some subtasks. n-grams and word embedding with a step of fine-tuning are used as features and machine learning and deep learning algorithms are used as classifiers in the proposed models. The proposed models outperformed in most of subtasks and also obtained first rank in subTask1b (Bangla) and subTask1e (Malayalam) with f1 score of 0.8353 and 0.3851 respectively using DL-FineTuned model for both the subtasks.</abstract>
<identifier type="citekey">balouchzahi-etal-2020-mucs</identifier>
<location>
<url>https://aclanthology.org/2020.icon-techdofication.1/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>1</start>
<end>5</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T MUCS@TechDOfication using FineTuned Vectors and n-grams
%A Balouchzahi, Fazlourrahman
%A Anusha, M. D.
%A Shashirekha, H. L.
%Y Sharma, Dipti Misra
%Y Ekbal, Asif
%Y Arora, Karunesh
%Y Naskar, Sudip Kumar
%Y Ganguly, Dipankar
%Y L, Sobha
%Y Mamidi, Radhika
%Y Arora, Sunita
%Y Mishra, Pruthwik
%Y Mujadia, Vandan
%S Proceedings of the 17th International Conference on Natural Language Processing (ICON): TechDOfication 2020 Shared Task
%D 2020
%8 December
%I NLP Association of India (NLPAI)
%C Patna, India
%F balouchzahi-etal-2020-mucs
%X The increase in domain specific text processing applications are demanding tools and techniques for domain specific Text Classification (TC) which may be helpful in many downstream applications like Machine Translation, Summarization, Question Answering etc. Further, many TC algorithms are applied on globally recognized languages like English giving less importance for local languages particularly Indian languages. To boost the research for technical domains and text processing activities in Indian languages, a shared task named ”TechDOfication2020” is organized by ICON‘20. The objective of this shared task is to automatically identify the technical domain of a given text which provides information about coarse grained technical domains and fine grained subdomains in eight languages. To tackle this challenge we, team MUCS have proposed three models, namely, DL-FineTuned model applied for all subtasks, and VC-FineTuned and VC-ngrams models applied only for some subtasks. n-grams and word embedding with a step of fine-tuning are used as features and machine learning and deep learning algorithms are used as classifiers in the proposed models. The proposed models outperformed in most of subtasks and also obtained first rank in subTask1b (Bangla) and subTask1e (Malayalam) with f1 score of 0.8353 and 0.3851 respectively using DL-FineTuned model for both the subtasks.
%U https://aclanthology.org/2020.icon-techdofication.1/
%P 1-5
Markdown (Informal)
[MUCS@TechDOfication using FineTuned Vectors and n-grams](https://aclanthology.org/2020.icon-techdofication.1/) (Balouchzahi et al., ICON 2020)
ACL
- Fazlourrahman Balouchzahi, M D Anusha, and H L Shashirekha. 2020. MUCS@TechDOfication using FineTuned Vectors and n-grams. In Proceedings of the 17th International Conference on Natural Language Processing (ICON): TechDOfication 2020 Shared Task, pages 1–5, Patna, India. NLP Association of India (NLPAI).