@inproceedings{almodaresi-etal-2017-distribution,
    title = "On the Distribution of Lexical Features at Multiple Levels of Analysis",
    author = "Almodaresi, Fatemeh  and
      Ungar, Lyle  and
      Kulkarni, Vivek  and
      Zakeri, Mohsen  and
      Giorgi, Salvatore  and
      Schwartz, H. Andrew",
    editor = "Barzilay, Regina  and
      Kan, Min-Yen",
    booktitle = "Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
    month = jul,
    year = "2017",
    address = "Vancouver, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P17-2013/",
    doi = "10.18653/v1/P17-2013",
    pages = "79--84",
    abstract = "Natural language processing has increasingly moved from modeling documents and words toward studying the people behind the language. This move to working with data at the user or community level has presented the field with different characteristics of linguistic data. In this paper, we empirically characterize various lexical distributions at different levels of analysis, showing that, while most features are decidedly sparse and non-normal at the message-level (as with traditional NLP), they follow the central limit theorem to become much more Log-normal or even Normal at the user- and county-levels. Finally, we demonstrate that modeling lexical features for the correct level of analysis leads to marked improvements in common social scientific prediction tasks."
}<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="almodaresi-etal-2017-distribution">
    <titleInfo>
        <title>On the Distribution of Lexical Features at Multiple Levels of Analysis</title>
    </titleInfo>
    <name type="personal">
        <namePart type="given">Fatemeh</namePart>
        <namePart type="family">Almodaresi</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Lyle</namePart>
        <namePart type="family">Ungar</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Vivek</namePart>
        <namePart type="family">Kulkarni</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Mohsen</namePart>
        <namePart type="family">Zakeri</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">Salvatore</namePart>
        <namePart type="family">Giorgi</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <name type="personal">
        <namePart type="given">H</namePart>
        <namePart type="given">Andrew</namePart>
        <namePart type="family">Schwartz</namePart>
        <role>
            <roleTerm authority="marcrelator" type="text">author</roleTerm>
        </role>
    </name>
    <originInfo>
        <dateIssued>2017-07</dateIssued>
    </originInfo>
    <typeOfResource>text</typeOfResource>
    <relatedItem type="host">
        <titleInfo>
            <title>Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)</title>
        </titleInfo>
        <name type="personal">
            <namePart type="given">Regina</namePart>
            <namePart type="family">Barzilay</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <name type="personal">
            <namePart type="given">Min-Yen</namePart>
            <namePart type="family">Kan</namePart>
            <role>
                <roleTerm authority="marcrelator" type="text">editor</roleTerm>
            </role>
        </name>
        <originInfo>
            <publisher>Association for Computational Linguistics</publisher>
            <place>
                <placeTerm type="text">Vancouver, Canada</placeTerm>
            </place>
        </originInfo>
        <genre authority="marcgt">conference publication</genre>
    </relatedItem>
    <abstract>Natural language processing has increasingly moved from modeling documents and words toward studying the people behind the language. This move to working with data at the user or community level has presented the field with different characteristics of linguistic data. In this paper, we empirically characterize various lexical distributions at different levels of analysis, showing that, while most features are decidedly sparse and non-normal at the message-level (as with traditional NLP), they follow the central limit theorem to become much more Log-normal or even Normal at the user- and county-levels. Finally, we demonstrate that modeling lexical features for the correct level of analysis leads to marked improvements in common social scientific prediction tasks.</abstract>
    <identifier type="citekey">almodaresi-etal-2017-distribution</identifier>
    <identifier type="doi">10.18653/v1/P17-2013</identifier>
    <location>
        <url>https://aclanthology.org/P17-2013/</url>
    </location>
    <part>
        <date>2017-07</date>
        <extent unit="page">
            <start>79</start>
            <end>84</end>
        </extent>
    </part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T On the Distribution of Lexical Features at Multiple Levels of Analysis
%A Almodaresi, Fatemeh
%A Ungar, Lyle
%A Kulkarni, Vivek
%A Zakeri, Mohsen
%A Giorgi, Salvatore
%A Schwartz, H. Andrew
%Y Barzilay, Regina
%Y Kan, Min-Yen
%S Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)
%D 2017
%8 July
%I Association for Computational Linguistics
%C Vancouver, Canada
%F almodaresi-etal-2017-distribution
%X Natural language processing has increasingly moved from modeling documents and words toward studying the people behind the language. This move to working with data at the user or community level has presented the field with different characteristics of linguistic data. In this paper, we empirically characterize various lexical distributions at different levels of analysis, showing that, while most features are decidedly sparse and non-normal at the message-level (as with traditional NLP), they follow the central limit theorem to become much more Log-normal or even Normal at the user- and county-levels. Finally, we demonstrate that modeling lexical features for the correct level of analysis leads to marked improvements in common social scientific prediction tasks.
%R 10.18653/v1/P17-2013
%U https://aclanthology.org/P17-2013/
%U https://doi.org/10.18653/v1/P17-2013
%P 79-84
Markdown (Informal)
[On the Distribution of Lexical Features at Multiple Levels of Analysis](https://aclanthology.org/P17-2013/) (Almodaresi et al., ACL 2017)
ACL
- Fatemeh Almodaresi, Lyle Ungar, Vivek Kulkarni, Mohsen Zakeri, Salvatore Giorgi, and H. Andrew Schwartz. 2017. On the Distribution of Lexical Features at Multiple Levels of Analysis. In Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pages 79–84, Vancouver, Canada. Association for Computational Linguistics.