@inproceedings{sakakini-etal-2019-equipping,
title = "Equipping Educational Applications with Domain Knowledge",
author = "Sakakini, Tarek and
Gong, Hongyu and
Lee, Jong Yoon and
Schloss, Robert and
Xiong, JinJun and
Bhat, Suma",
editor = "Yannakoudakis, Helen and
Kochmar, Ekaterina and
Leacock, Claudia and
Madnani, Nitin and
Pil{\'a}n, Ildik{\'o} and
Zesch, Torsten",
booktitle = "Proceedings of the Fourteenth Workshop on Innovative Use of NLP for Building Educational Applications",
month = aug,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W19-4448",
doi = "10.18653/v1/W19-4448",
pages = "472--477",
abstract = "One of the challenges of building natural language processing (NLP) applications for education is finding a large domain-specific corpus for the subject of interest (e.g., history or science). To address this challenge, we propose a tool, Dexter, that extracts a subject-specific corpus from a heterogeneous corpus, such as Wikipedia, by relying on a small seed corpus and distributed document representations. We empirically show the impact of the generated corpus on language modeling, estimating word embeddings, and consequently, distractor generation, resulting in better performances than while using a general domain corpus, a heuristically constructed domain-specific corpus, and a corpus generated by a popular system: BootCaT.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="sakakini-etal-2019-equipping">
<titleInfo>
<title>Equipping Educational Applications with Domain Knowledge</title>
</titleInfo>
<name type="personal">
<namePart type="given">Tarek</namePart>
<namePart type="family">Sakakini</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Hongyu</namePart>
<namePart type="family">Gong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jong</namePart>
<namePart type="given">Yoon</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Robert</namePart>
<namePart type="family">Schloss</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">JinJun</namePart>
<namePart type="family">Xiong</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Suma</namePart>
<namePart type="family">Bhat</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2019-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fourteenth Workshop on Innovative Use of NLP for Building Educational Applications</title>
</titleInfo>
<name type="personal">
<namePart type="given">Helen</namePart>
<namePart type="family">Yannakoudakis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Kochmar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Claudia</namePart>
<namePart type="family">Leacock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nitin</namePart>
<namePart type="family">Madnani</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ildikó</namePart>
<namePart type="family">Pilán</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Torsten</namePart>
<namePart type="family">Zesch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Florence, Italy</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>One of the challenges of building natural language processing (NLP) applications for education is finding a large domain-specific corpus for the subject of interest (e.g., history or science). To address this challenge, we propose a tool, Dexter, that extracts a subject-specific corpus from a heterogeneous corpus, such as Wikipedia, by relying on a small seed corpus and distributed document representations. We empirically show the impact of the generated corpus on language modeling, estimating word embeddings, and consequently, distractor generation, resulting in better performances than while using a general domain corpus, a heuristically constructed domain-specific corpus, and a corpus generated by a popular system: BootCaT.</abstract>
<identifier type="citekey">sakakini-etal-2019-equipping</identifier>
<identifier type="doi">10.18653/v1/W19-4448</identifier>
<location>
<url>https://aclanthology.org/W19-4448</url>
</location>
<part>
<date>2019-08</date>
<extent unit="page">
<start>472</start>
<end>477</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Equipping Educational Applications with Domain Knowledge
%A Sakakini, Tarek
%A Gong, Hongyu
%A Lee, Jong Yoon
%A Schloss, Robert
%A Xiong, JinJun
%A Bhat, Suma
%Y Yannakoudakis, Helen
%Y Kochmar, Ekaterina
%Y Leacock, Claudia
%Y Madnani, Nitin
%Y Pilán, Ildikó
%Y Zesch, Torsten
%S Proceedings of the Fourteenth Workshop on Innovative Use of NLP for Building Educational Applications
%D 2019
%8 August
%I Association for Computational Linguistics
%C Florence, Italy
%F sakakini-etal-2019-equipping
%X One of the challenges of building natural language processing (NLP) applications for education is finding a large domain-specific corpus for the subject of interest (e.g., history or science). To address this challenge, we propose a tool, Dexter, that extracts a subject-specific corpus from a heterogeneous corpus, such as Wikipedia, by relying on a small seed corpus and distributed document representations. We empirically show the impact of the generated corpus on language modeling, estimating word embeddings, and consequently, distractor generation, resulting in better performances than while using a general domain corpus, a heuristically constructed domain-specific corpus, and a corpus generated by a popular system: BootCaT.
%R 10.18653/v1/W19-4448
%U https://aclanthology.org/W19-4448
%U https://doi.org/10.18653/v1/W19-4448
%P 472-477
Markdown (Informal)
[Equipping Educational Applications with Domain Knowledge](https://aclanthology.org/W19-4448) (Sakakini et al., BEA 2019)
ACL
- Tarek Sakakini, Hongyu Gong, Jong Yoon Lee, Robert Schloss, JinJun Xiong, and Suma Bhat. 2019. Equipping Educational Applications with Domain Knowledge. In Proceedings of the Fourteenth Workshop on Innovative Use of NLP for Building Educational Applications, pages 472–477, Florence, Italy. Association for Computational Linguistics.