@InProceedings{yoon-EtAl:2018:W18-40,
  author    = {Yoon, Su-Youn  and  Loukina, Anastassia  and  Lee, Chong Min  and  Mulholland, Matthew  and  Wang, Xinhao  and  Choi, Ikkyu},
  title     = {Word-Embedding based Content Features for Automated Oral Proficiency Scoring},
  booktitle = {Proceedings of the Third Workshop on Semantic Deep Learning},
  month     = {August},
  year      = {2018},
  address   = {Santa Fe, New Mexico},
  publisher = {Association for Computational Linguistics},
  pages     = {12--22},
  abstract  = {In this study, we develop content features for an automated scoring system of non-native English speakers' spontaneous speech. The features calculate the lexical similarity between the question text and the ASR word hypothesis of the spoken response, based on traditional word vector models or word embeddings. The proposed features do not require any sample training responses for each question, and this is a strong advantage since collecting question-specific data is an expensive task, and sometimes even impossible due to concerns about question exposure. We explore the impact of these new features on the automated scoring of two different question types: (a) providing opinions on familiar topics and (b) answering a question about a stimulus material. The proposed features showed statistically significant correlations with the oral proficiency scores, and the combination of new features with the speech-driven features achieved a small but significant further improvement for the latter question type. Further analyses suggested that the new features were effective in assigning more accurate scores for responses with serious content issues.},
  url       = {http://www.aclweb.org/anthology/W18-4002}
}

