@inproceedings{yoon-lee-2019-content,
title = "Content Modeling for Automated Oral Proficiency Scoring System",
author = "Yoon, Su-Youn and
Lee, Chong Min",
editor = "Yannakoudakis, Helen and
Kochmar, Ekaterina and
Leacock, Claudia and
Madnani, Nitin and
Pil{\'a}n, Ildik{\'o} and
Zesch, Torsten",
booktitle = "Proceedings of the Fourteenth Workshop on Innovative Use of NLP for Building Educational Applications",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4441",
doi = "10.18653/v1/W19-4441",
pages = "394--401",
abstract = "We developed an automated oral proficiency scoring system for non-native English speakers{'} spontaneous speech. Automated systems that score holistic proficiency are expected to assess a wide range of performance categories, and the content is one of the core performance categories. In order to assess the quality of the content, we trained a Siamese convolutional neural network (CNN) to model the semantic relationship between key points generated by experts and a test response. The correlation between human scores and Siamese CNN scores was comparable to human-human agreement (r=0.63), and it was higher than the baseline content features. The inclusion of Siamese CNN-based feature to the existing state-of-the-art automated scoring model achieved a small but statistically significant improvement. However, the new model suffered from score inflation for long atypical responses with serious content issues. We investigated the reasons of this score inflation by analyzing the associations with linguistic features and identifying areas strongly associated with the score errors.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="yoon-lee-2019-content">
<titleInfo>
<title>Content Modeling for Automated Oral Proficiency Scoring System</title>
</titleInfo>
<name type="personal">
<namePart type="given">Su-Youn</namePart>
<namePart type="family">Yoon</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chong</namePart>
<namePart type="given">Min</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourteenth Workshop on Innovative Use of NLP for Building Educational Applications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Yannakoudakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Leacock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nitin</namePart>
<namePart type="family">Madnani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ildikó</namePart>
<namePart type="family">Pilán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Torsten</namePart>
<namePart type="family">Zesch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We developed an automated oral proficiency scoring system for non-native English speakers’ spontaneous speech. Automated systems that score holistic proficiency are expected to assess a wide range of performance categories, and the content is one of the core performance categories. In order to assess the quality of the content, we trained a Siamese convolutional neural network (CNN) to model the semantic relationship between key points generated by experts and a test response. The correlation between human scores and Siamese CNN scores was comparable to human-human agreement (r=0.63), and it was higher than the baseline content features. The inclusion of Siamese CNN-based feature to the existing state-of-the-art automated scoring model achieved a small but statistically significant improvement. However, the new model suffered from score inflation for long atypical responses with serious content issues. We investigated the reasons of this score inflation by analyzing the associations with linguistic features and identifying areas strongly associated with the score errors.</abstract>
<identifier type="citekey">yoon-lee-2019-content</identifier>
<identifier type="doi">10.18653/v1/W19-4441</identifier>
<location>
<url>https://aclanthology.org/W19-4441</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>394</start>
<end>401</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Content Modeling for Automated Oral Proficiency Scoring System
%A Yoon, Su-Youn
%A Lee, Chong Min
%Y Yannakoudakis, Helen
%Y Kochmar, Ekaterina
%Y Leacock, Claudia
%Y Madnani, Nitin
%Y Pilán, Ildikó
%Y Zesch, Torsten
%S Proceedings of the Fourteenth Workshop on Innovative Use of NLP for Building Educational Applications
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F yoon-lee-2019-content
%X We developed an automated oral proficiency scoring system for non-native English speakers’ spontaneous speech. Automated systems that score holistic proficiency are expected to assess a wide range of performance categories, and the content is one of the core performance categories. In order to assess the quality of the content, we trained a Siamese convolutional neural network (CNN) to model the semantic relationship between key points generated by experts and a test response. The correlation between human scores and Siamese CNN scores was comparable to human-human agreement (r=0.63), and it was higher than the baseline content features. The inclusion of Siamese CNN-based feature to the existing state-of-the-art automated scoring model achieved a small but statistically significant improvement. However, the new model suffered from score inflation for long atypical responses with serious content issues. We investigated the reasons of this score inflation by analyzing the associations with linguistic features and identifying areas strongly associated with the score errors.
%R 10.18653/v1/W19-4441
%U https://aclanthology.org/W19-4441
%U https://doi.org/10.18653/v1/W19-4441
%P 394-401
Markdown (Informal)
[Content Modeling for Automated Oral Proficiency Scoring System](https://aclanthology.org/W19-4441) (Yoon & Lee, BEA 2019)
ACL