@inproceedings{woldemariam-2020-assessing,
title = "Assessing Users{'} Reputation from Syntactic and Semantic Information in Community Question Answering",
author = "Woldemariam, Yonas",
editor = "Calzolari, Nicoletta and
B{\'e}chet, Fr{\'e}d{\'e}ric and
Blache, Philippe and
Choukri, Khalid and
Cieri, Christopher and
Declerck, Thierry and
Goggi, Sara and
Isahara, Hitoshi and
Maegaard, Bente and
Mariani, Joseph and
Mazo, H{\'e}l{\`e}ne and
Moreno, Asuncion and
Odijk, Jan and
Piperidis, Stelios",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.662",
pages = "5383--5391",
abstract = "Textual content is the most significant as well as substantially the big part of CQA (Community Question Answering) forums. Users gain reputation for contributing such content. Although linguistic quality is the very essence of textual information, that does not seem to be considered in estimating users{'} reputation. As existing users{'} reputation systems seem to solely rely on vote counting, adding that bit of linguistic information surely improves their quality. In this study, we investigate the relationship between users{'} reputation and linguistic features extracted from their associated answers content. And we build statistical models on a Stack Overflow dataset that learn reputation from complex syntactic and semantic structures of such content. The resulting models reveal how users{'} writing styles in answering questions play important roles in building reputation points. In our experiments, extracting answers from systematically selected users followed by linguistic features annotation and models building. The models are evaluated on in-domain (e.g., Server Fault, Super User) and out-domain (e.g., English, Maths) datasets. We found out that the selected linguistic features have quite significant influences over reputation scores. In the best case scenario, the selected linguistic feature set could explain 80{\%} variation in reputation scores with the prediction error of 3{\%}. The performance results obtained from the baseline models have been significantly improved by adding syntactic and punctuation marks features.",
language = "English",
ISBN = "979-10-95546-34-4",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="woldemariam-2020-assessing">
<titleInfo>
<title>Assessing Users’ Reputation from Syntactic and Semantic Information in Community Question Answering</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yonas</namePart>
<namePart type="family">Woldemariam</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<language>
<languageTerm type="text">English</languageTerm>
<languageTerm type="code" authority="iso639-2b">eng</languageTerm>
</language>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Twelfth Language Resources and Evaluation Conference</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicoletta</namePart>
<namePart type="family">Calzolari</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Frédéric</namePart>
<namePart type="family">Béchet</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Philippe</namePart>
<namePart type="family">Blache</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Khalid</namePart>
<namePart type="family">Choukri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Christopher</namePart>
<namePart type="family">Cieri</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sara</namePart>
<namePart type="family">Goggi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hitoshi</namePart>
<namePart type="family">Isahara</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Bente</namePart>
<namePart type="family">Maegaard</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Joseph</namePart>
<namePart type="family">Mariani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hélène</namePart>
<namePart type="family">Mazo</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Asuncion</namePart>
<namePart type="family">Moreno</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan</namePart>
<namePart type="family">Odijk</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stelios</namePart>
<namePart type="family">Piperidis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>European Language Resources Association</publisher>
<place>
<placeTerm type="text">Marseille, France</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
<identifier type="isbn">979-10-95546-34-4</identifier>
</relatedItem>
<abstract>Textual content is the most significant as well as substantially the big part of CQA (Community Question Answering) forums. Users gain reputation for contributing such content. Although linguistic quality is the very essence of textual information, that does not seem to be considered in estimating users’ reputation. As existing users’ reputation systems seem to solely rely on vote counting, adding that bit of linguistic information surely improves their quality. In this study, we investigate the relationship between users’ reputation and linguistic features extracted from their associated answers content. And we build statistical models on a Stack Overflow dataset that learn reputation from complex syntactic and semantic structures of such content. The resulting models reveal how users’ writing styles in answering questions play important roles in building reputation points. In our experiments, extracting answers from systematically selected users followed by linguistic features annotation and models building. The models are evaluated on in-domain (e.g., Server Fault, Super User) and out-domain (e.g., English, Maths) datasets. We found out that the selected linguistic features have quite significant influences over reputation scores. In the best case scenario, the selected linguistic feature set could explain 80% variation in reputation scores with the prediction error of 3%. The performance results obtained from the baseline models have been significantly improved by adding syntactic and punctuation marks features.</abstract>
<identifier type="citekey">woldemariam-2020-assessing</identifier>
<location>
<url>https://aclanthology.org/2020.lrec-1.662</url>
</location>
<part>
<date>2020-05</date>
<extent unit="page">
<start>5383</start>
<end>5391</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Assessing Users’ Reputation from Syntactic and Semantic Information in Community Question Answering
%A Woldemariam, Yonas
%Y Calzolari, Nicoletta
%Y Béchet, Frédéric
%Y Blache, Philippe
%Y Choukri, Khalid
%Y Cieri, Christopher
%Y Declerck, Thierry
%Y Goggi, Sara
%Y Isahara, Hitoshi
%Y Maegaard, Bente
%Y Mariani, Joseph
%Y Mazo, Hélène
%Y Moreno, Asuncion
%Y Odijk, Jan
%Y Piperidis, Stelios
%S Proceedings of the Twelfth Language Resources and Evaluation Conference
%D 2020
%8 May
%I European Language Resources Association
%C Marseille, France
%@ 979-10-95546-34-4
%G English
%F woldemariam-2020-assessing
%X Textual content is the most significant as well as substantially the big part of CQA (Community Question Answering) forums. Users gain reputation for contributing such content. Although linguistic quality is the very essence of textual information, that does not seem to be considered in estimating users’ reputation. As existing users’ reputation systems seem to solely rely on vote counting, adding that bit of linguistic information surely improves their quality. In this study, we investigate the relationship between users’ reputation and linguistic features extracted from their associated answers content. And we build statistical models on a Stack Overflow dataset that learn reputation from complex syntactic and semantic structures of such content. The resulting models reveal how users’ writing styles in answering questions play important roles in building reputation points. In our experiments, extracting answers from systematically selected users followed by linguistic features annotation and models building. The models are evaluated on in-domain (e.g., Server Fault, Super User) and out-domain (e.g., English, Maths) datasets. We found out that the selected linguistic features have quite significant influences over reputation scores. In the best case scenario, the selected linguistic feature set could explain 80% variation in reputation scores with the prediction error of 3%. The performance results obtained from the baseline models have been significantly improved by adding syntactic and punctuation marks features.
%U https://aclanthology.org/2020.lrec-1.662
%P 5383-5391
Markdown (Informal)
[Assessing Users’ Reputation from Syntactic and Semantic Information in Community Question Answering](https://aclanthology.org/2020.lrec-1.662) (Woldemariam, LREC 2020)
ACL