@inproceedings{schneider-2018-leaving,
title = "Leaving no token behind: comprehensive (and delicious) annotation of {MWE}s and supersenses",
author = "Schneider, Nathan",
editor = "Savary, Agata and
Ramisch, Carlos and
Hwang, Jena D. and
Schneider, Nathan and
Andresen, Melanie and
Pradhan, Sameer and
Petruck, Miriam R. L.",
booktitle = "Proceedings of the Joint Workshop on Linguistic Annotation, Multiword Expressions and Constructions ({LAW}-{MWE}-{C}x{G}-2018)",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W18-4903",
pages = "5",
abstract = {I will describe an unorthodox approach to lexical semantic annotation that prioritizes corpus coverage, democratizing analysis of a wide range of expression types. I argue that a lexicon-free lexical semantics{---}defined in terms of units and supersense tags{---}is an appetizing direction for NLP, as it is robust, cost-effective, easily understood, not too language-specific, and can serve as a foundation for richer semantic structure. Linguistic delicacies from the STREUSLE and DiMSUM corpora, which have been multiword- and supersense-annotated, attest to the veritable sm{\"o}rg{\aa}sbord of noncanonical constructions in English, including various flavors of prepositions, MWEs, and other curiosities. Bio: Nathan Schneider is an annotation schemer and computational modeler for natural language. As Assistant Professor of Linguistics and Computer Science at Georgetown University, he looks for synergies between practical language technologies and the scientific study of language. He specializes in broad-coverage semantic analysis: designing linguistic meaning representations, annotating them in corpora, and automating them with statistical natural language processing techniques. A central focus in this research is the nexus between grammar and lexicon as manifested in multiword expressions and adpositions/case markers. He has inhabited UC Berkeley (BA in Computer Science and Linguistics), Carnegie Mellon University (Ph.D. in Language Technologies), and the University of Edinburgh (postdoc). Now a Hoya and leader of NERT, he continues to play with data and algorithms for linguistic meaning.},
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="schneider-2018-leaving">
<titleInfo>
<title>Leaving no token behind: comprehensive (and delicious) annotation of MWEs and supersenses</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2018-08</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Joint Workshop on Linguistic Annotation, Multiword Expressions and Constructions (LAW-MWE-CxG-2018)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Agata</namePart>
<namePart type="family">Savary</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Carlos</namePart>
<namePart type="family">Ramisch</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jena</namePart>
<namePart type="given">D</namePart>
<namePart type="family">Hwang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Nathan</namePart>
<namePart type="family">Schneider</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Melanie</namePart>
<namePart type="family">Andresen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sameer</namePart>
<namePart type="family">Pradhan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Miriam</namePart>
<namePart type="given">R</namePart>
<namePart type="given">L</namePart>
<namePart type="family">Petruck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Santa Fe, New Mexico, USA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>I will describe an unorthodox approach to lexical semantic annotation that prioritizes corpus coverage, democratizing analysis of a wide range of expression types. I argue that a lexicon-free lexical semantics—defined in terms of units and supersense tags—is an appetizing direction for NLP, as it is robust, cost-effective, easily understood, not too language-specific, and can serve as a foundation for richer semantic structure. Linguistic delicacies from the STREUSLE and DiMSUM corpora, which have been multiword- and supersense-annotated, attest to the veritable smörgåsbord of noncanonical constructions in English, including various flavors of prepositions, MWEs, and other curiosities. Bio: Nathan Schneider is an annotation schemer and computational modeler for natural language. As Assistant Professor of Linguistics and Computer Science at Georgetown University, he looks for synergies between practical language technologies and the scientific study of language. He specializes in broad-coverage semantic analysis: designing linguistic meaning representations, annotating them in corpora, and automating them with statistical natural language processing techniques. A central focus in this research is the nexus between grammar and lexicon as manifested in multiword expressions and adpositions/case markers. He has inhabited UC Berkeley (BA in Computer Science and Linguistics), Carnegie Mellon University (Ph.D. in Language Technologies), and the University of Edinburgh (postdoc). Now a Hoya and leader of NERT, he continues to play with data and algorithms for linguistic meaning.</abstract>
<identifier type="citekey">schneider-2018-leaving</identifier>
<location>
<url>https://aclanthology.org/W18-4903</url>
</location>
<part>
<date>2018-08</date>
<detail type="page"><number>5</number></detail>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Leaving no token behind: comprehensive (and delicious) annotation of MWEs and supersenses
%A Schneider, Nathan
%Y Savary, Agata
%Y Ramisch, Carlos
%Y Hwang, Jena D.
%Y Schneider, Nathan
%Y Andresen, Melanie
%Y Pradhan, Sameer
%Y Petruck, Miriam R. L.
%S Proceedings of the Joint Workshop on Linguistic Annotation, Multiword Expressions and Constructions (LAW-MWE-CxG-2018)
%D 2018
%8 August
%I Association for Computational Linguistics
%C Santa Fe, New Mexico, USA
%F schneider-2018-leaving
%X I will describe an unorthodox approach to lexical semantic annotation that prioritizes corpus coverage, democratizing analysis of a wide range of expression types. I argue that a lexicon-free lexical semantics—defined in terms of units and supersense tags—is an appetizing direction for NLP, as it is robust, cost-effective, easily understood, not too language-specific, and can serve as a foundation for richer semantic structure. Linguistic delicacies from the STREUSLE and DiMSUM corpora, which have been multiword- and supersense-annotated, attest to the veritable smörgåsbord of noncanonical constructions in English, including various flavors of prepositions, MWEs, and other curiosities. Bio: Nathan Schneider is an annotation schemer and computational modeler for natural language. As Assistant Professor of Linguistics and Computer Science at Georgetown University, he looks for synergies between practical language technologies and the scientific study of language. He specializes in broad-coverage semantic analysis: designing linguistic meaning representations, annotating them in corpora, and automating them with statistical natural language processing techniques. A central focus in this research is the nexus between grammar and lexicon as manifested in multiword expressions and adpositions/case markers. He has inhabited UC Berkeley (BA in Computer Science and Linguistics), Carnegie Mellon University (Ph.D. in Language Technologies), and the University of Edinburgh (postdoc). Now a Hoya and leader of NERT, he continues to play with data and algorithms for linguistic meaning.
%U https://aclanthology.org/W18-4903
%P 5
Markdown (Informal)
[Leaving no token behind: comprehensive (and delicious) annotation of MWEs and supersenses](https://aclanthology.org/W18-4903) (Schneider, LAW-MWE 2018)
ACL