@inproceedings{yoon-etal-2018-word,
title = "Word-Embedding based Content Features for Automated Oral Proficiency Scoring",
author = "Yoon, Su-Youn and
Loukina, Anastassia and
Lee, Chong Min and
Mulholland, Matthew and
Wang, Xinhao and
Choi, Ikkyu",
editor = "Anke, Luis Espinosa and
Gromann, Dagmar and
Declerck, Thierry",
booktitle = "Proceedings of the Third Workshop on Semantic Deep Learning",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-4002",
pages = "12--22",
abstract = "In this study, we develop content features for an automated scoring system of non-native English speakers{'} spontaneous speech. The features calculate the lexical similarity between the question text and the ASR word hypothesis of the spoken response, based on traditional word vector models or word embeddings. The proposed features do not require any sample training responses for each question, and this is a strong advantage since collecting question-specific data is an expensive task, and sometimes even impossible due to concerns about question exposure. We explore the impact of these new features on the automated scoring of two different question types: (a) providing opinions on familiar topics and (b) answering a question about a stimulus material. The proposed features showed statistically significant correlations with the oral proficiency scores, and the combination of new features with the speech-driven features achieved a small but significant further improvement for the latter question type. Further analyses suggested that the new features were effective in assigning more accurate scores for responses with serious content issues.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yoon-etal-2018-word">
<titleInfo>
<title>Word-Embedding based Content Features for Automated Oral Proficiency Scoring</title>
</titleInfo>
<name type="personal">
<namePart type="given">Su-Youn</namePart>
<namePart type="family">Yoon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anastassia</namePart>
<namePart type="family">Loukina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chong</namePart>
<namePart type="given">Min</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Matthew</namePart>
<namePart type="family">Mulholland</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Xinhao</namePart>
<namePart type="family">Wang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ikkyu</namePart>
<namePart type="family">Choi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Third Workshop on Semantic Deep Learning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Luis</namePart>
<namePart type="given">Espinosa</namePart>
<namePart type="family">Anke</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dagmar</namePart>
<namePart type="family">Gromann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Thierry</namePart>
<namePart type="family">Declerck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>In this study, we develop content features for an automated scoring system of non-native English speakers’ spontaneous speech. The features calculate the lexical similarity between the question text and the ASR word hypothesis of the spoken response, based on traditional word vector models or word embeddings. The proposed features do not require any sample training responses for each question, and this is a strong advantage since collecting question-specific data is an expensive task, and sometimes even impossible due to concerns about question exposure. We explore the impact of these new features on the automated scoring of two different question types: (a) providing opinions on familiar topics and (b) answering a question about a stimulus material. The proposed features showed statistically significant correlations with the oral proficiency scores, and the combination of new features with the speech-driven features achieved a small but significant further improvement for the latter question type. Further analyses suggested that the new features were effective in assigning more accurate scores for responses with serious content issues.</abstract>
<identifier type="citekey">yoon-etal-2018-word</identifier>
<location>
<url>https://aclanthology.org/W18-4002</url>
</location>
<part>
<date>2018-08</date>
<extent unit="page">
<start>12</start>
<end>22</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Word-Embedding based Content Features for Automated Oral Proficiency Scoring
%A Yoon, Su-Youn
%A Loukina, Anastassia
%A Lee, Chong Min
%A Mulholland, Matthew
%A Wang, Xinhao
%A Choi, Ikkyu
%Y Anke, Luis Espinosa
%Y Gromann, Dagmar
%Y Declerck, Thierry
%S Proceedings of the Third Workshop on Semantic Deep Learning
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico
%F yoon-etal-2018-word
%X In this study, we develop content features for an automated scoring system of non-native English speakers’ spontaneous speech. The features calculate the lexical similarity between the question text and the ASR word hypothesis of the spoken response, based on traditional word vector models or word embeddings. The proposed features do not require any sample training responses for each question, and this is a strong advantage since collecting question-specific data is an expensive task, and sometimes even impossible due to concerns about question exposure. We explore the impact of these new features on the automated scoring of two different question types: (a) providing opinions on familiar topics and (b) answering a question about a stimulus material. The proposed features showed statistically significant correlations with the oral proficiency scores, and the combination of new features with the speech-driven features achieved a small but significant further improvement for the latter question type. Further analyses suggested that the new features were effective in assigning more accurate scores for responses with serious content issues.
%U https://aclanthology.org/W18-4002
%P 12-22
Markdown (Informal)
[Word-Embedding based Content Features for Automated Oral Proficiency Scoring](https://aclanthology.org/W18-4002) (Yoon et al., SemDeep 2018)
ACL