# -*- coding: utf-8 -*-
"""
Reproduces the analyses for 2019 NAACL submission titled
"What just happened? Evaluating retrofitted distributional vectors"

Dmetri Hayes

2019-04-04

@author: dmetri
"""
import os
import gzip
import math
import numpy as np
import sys
from scipy.spatial.distance import cosine
from scipy.stats import spearmanr, pearsonr, hmean
from sklearn.metrics import mean_squared_error as mse
import re
import io
import pandas as pd
from copy import deepcopy
import seaborn as sns
import matplotlib.pyplot as plt
from gensim.models import KeyedVectors


SYS_VERSION = sys.version_info[0]
print('Using Python %d' % SYS_VERSION)

# set the open function
if SYS_VERSION == 3:
    open_file = open
elif SYS_VERSION == 2:
    open_file = io.open

ROOT = os.getcwd()
# global paths
VEC_ROOT = ROOT + '/vectors/'
#VEC_ROOT = ROOT + '/../../data/word_vectors/'
PATH_LEX = ROOT + '/lexicons/'
PATH_SIM = ROOT + '/similarity/'
PATH_OUT_FIG = ROOT + '/output/'

# path
VEC_PATH = os.path.join(os.getcwd(), VEC_ROOT)

# vector keys
VEC_KEYS = ['SG', 'GloVe', 'NB', 'GC', 'Multi']
SG_KEY = 'SG_FULL' # current hack to allow this code to be stand-alone
#SG_KEY = 'SG' # XXX: uncomment this to use the original word2vec vectors

# word2vec/skipGram constants
SG_PATH_FULL = VEC_PATH + 'GoogleNews-vectors-negative300.bin'
SG_PATH_EVAL = VEC_PATH + 'sg_eval.txt'
SG_CHAR_MAP = {'-':''}
SG_WORD_MAP = {'harbour':'harbor', 'colour':'color', 'grey':'gray',
              'theatre':'theater'}
SG_LOWER = False

# GloVe constants
GLOVE_PATH_FULL = VEC_PATH + 'glove.6B.300d.txt'
GLOVE_PATH_EVAL = VEC_PATH + 'glove_eval.txt'
GLOVE_CHAR_MAP = {}
GLOVE_WORD_MAP = {}
GLOVE_LOWER = True

# NumberBatch constants
NB_PATH_FULL = VEC_PATH + 'numberbatch-en-17.06.txt.gz'
NB_PATH_EVAL = VEC_PATH + 'nb_eval.txt'
NB_CHAR_MAP = {'-':'_', ' ':'_'}
NB_WORD_MAP = {}
NB_LOWER = True

# GC constants
GC_PATH_FULL = VEC_PATH + 'globalContext.txt'
GC_PATH_EVAL = VEC_PATH + 'gc_eval.txt'
GC_CHAR_MAP = {}
GC_WORD_MAP = {}
GC_LOWER = True

# Multi constants
MULTI_PATH_FULL = VEC_PATH + 'de-projected-en-512.txt.gz'
MULTI_PATH_EVAL = VEC_PATH + 'multi_eval.txt'
MULTI_CHAR_MAP = {}
MULTI_WORD_MAP = {}
MULTI_LOWER = True


#### DO NOT ALTER BELOW
WORD2VEC_KEY = SG_KEY # alias for SG_KEY
# combine the full paths
PATH_FULL_DICT = {'NB': NB_PATH_FULL, 'GloVe': GLOVE_PATH_FULL, 
                  'SG': SG_PATH_FULL, 'GC': GC_PATH_FULL,
                  'Multi': MULTI_PATH_FULL}
# combine the eval paths
PATH_EVAL_DICT = {'NB': NB_PATH_EVAL, 'GloVe': GLOVE_PATH_EVAL, 
                  'SG': SG_PATH_EVAL, 'GC': GC_PATH_EVAL,
                  'Multi': MULTI_PATH_EVAL}
# combine the character maps
CHAR_MAP_DICT = {'NB': NB_CHAR_MAP, 'GloVe': GLOVE_CHAR_MAP, 'SG': SG_CHAR_MAP, 
                 'GC': GC_CHAR_MAP, 'Multi': MULTI_CHAR_MAP}

# combine the word maps
WORD_MAP_DICT = {'NB': NB_WORD_MAP, 'GloVe': GLOVE_WORD_MAP, 'SG': SG_WORD_MAP, 
                 'GC': GC_WORD_MAP, 'Multi': MULTI_WORD_MAP}

# combine the lower flags
VEC_LOWER_DICT = {'NB': NB_LOWER, 'GloVe': GLOVE_LOWER, 'SG': SG_LOWER, 
                  'GC': GC_LOWER, 'Multi': MULTI_LOWER}

# compile numbers
IS_NUMBER = re.compile(r'\d+.*')
SPECIAL_WORDS = ['---num---', '---punc---']

def get_word_from_label(label):
    '''
    Retrieves the word from a label of the form "PREFIX@WORD".
    
    Args:
        label (str): the label of the grouping
    
    Returns:
        the word, if the label is of the form PREFIX.WORD
        if no such word is found, the label is returned
    '''
    items = re.findall('.*[@](.*)', label)
    if len(items) == 1:
        return items[0]
    elif len(items) > 1:
        raise ValueError('Labels must be of the form "PREFIX@WORD" or "WORD"')
    else:
        return label

def read_lines(filename):
    '''
    Reads in the lines of a file.
    
    Args:
        filename (str): name of the file to be loaded
    
    Returns:
        a list of strings, the lines of the text
    '''
    with open_file(filename, 'r', encoding='utf8') as f:
        lines = f.read().strip().split('\n')
        
    return lines

def processAnnoText(words, version):
    '''
    Preprocesses the annotation text from FrameNet lexical units.
    
    Args:
        words (list of str): a list of words to be processed
        version (float): the version of FrameNet
        
    Returns:
        a list of strings, the words without parentheses, underscores, brackets
        or spaces
    '''
    if version == 1.5:
        # remove parentheses
        post = [re.sub('\(.*\)','', w).strip() for w in words]
        # if an underscore is at the end of a word, just remove it
        post = [re.sub('_$','', p).strip() for p in post]
        # skip words which have underscores or spaces (i.e. multiword compound)
        post = [p for p in post if not any(x in p for x in ['_', ' '])]
        post = list(set(post))
    elif version in [1.7, 1.71]:
        # remove parentheses
        post = [re.sub('\(.*\)','', w).strip() for w in words]
        # remove brackets
        post = [re.sub('\[.*\]','', p).strip() for p in post]
        # skip words which have spaces (i.e. multiword compound)
        post = [p.strip() for p in post if ' ' not in p]
        post = list(set(post))
    else:
        raise ValueError('Version %s not supported' % version)
    return post

def norm_word(word_or_label):
    '''
    Normalizes a word or label by neutralizing punctuation 
    and numbers, as well as lowering the word.
    
    Args:
        word_or_label (str): a preprocessed string
    
    Returns:
        a processed word
    '''
    # check if the item is a label (as opposed to a regular word)
    items = re.findall('.*[@](.*)', word_or_label)
    if len(items) == 1:
        prefix = re.findall('(.*@).*', word_or_label) 
        word = items[0]
    elif len(items) > 1:
        raise ValueError('Labels must be of the form "PREFIX@WORD" or "WORD"')
    else:
        prefix = ''
        word = word_or_label
    if IS_NUMBER.search(word.lower()):
        return '---num---'
    elif re.sub(r'\W+', '', word) == '':
        return '---punc---'
    else:
        # return the possibly recovered label
        return prefix + word.lower()

def get_unique_vals(d, include_keys=True):
    '''
    Returns the unique values from a dictionary mapping a string 
    to a list of strings.
    
    Args:
        d (dict from str to list of str): the dictionary
        include_keys (bool, optional): if True, the keys of the dictionary
            are included in the values
    
    Returns:
        a list of str, the unique vals from the dictionary
    '''
    temp = set()
    for v in d.values():
        temp.update(v)
    if include_keys:
        temp.update(d.keys())
    vals = list(temp)
    return vals

## VECTORS
def read_word_vecs_legacy(filename):
    '''
    Reads a set of word vectors and normalizes them.
    (the original code from Faruqui et al. 2015)
    '''
    wordVectors = {}
    if filename.endswith('.gz'): fileObject = gzip.open(filename, 'r')
    else: fileObject = open(filename, 'r')
  
    for line in fileObject:
        line = line.strip().lower()
        word = line.split()[0]
        wordVectors[word] = np.zeros(len(line.split())-1, dtype=float)
        for index, vecVal in enumerate(line.split()[1:]):
            wordVectors[word][index] = float(vecVal)
        ''' normalize weight vector '''
        wordVectors[word] /= math.sqrt((wordVectors[word]**2).sum() + 1e-6)

    sys.stderr.write("Vectors read from: "+filename+" \n")
    return wordVectors

def read_word_vecs(filename, doLower=False, normalize=False):
    ''' 
    Reads a set of word vectors.
    
    Args:
        filename (str): the file name of the word vectors to be loaded
        normalize (bool, optional): if True, normalizes the word
            vectors
        
    Returns:
        a dictionary of strings to their Numpy arrays
    '''
    wordVectors = {}
    if filename.endswith('.gz'): 
        fileObject = gzip.open(filename, 'r')
        isBinary = True
    else: 
        fileObject = open_file(filename, 'r', encoding='utf8')
        isBinary = False
    
    for line in fileObject:
        line = line.strip() # redid this so it doesn't lower the values
        if doLower:
            line = line.lower()
        items = line.split()
        if isBinary:
            # decode into ascii
            word = items[0].decode()
        else:
            word = items[0]
        if normalize:
            wordVectors[word] = np.zeros(len(line.split())-1, dtype=float)
            for index, vecVal in enumerate(line.split()[1:]):
                wordVectors[word][index] = float(vecVal)
            # normalize the weight vector
            wordVectors[word] /= math.sqrt((wordVectors[word]**2).sum() + 1e-6)
        else:
            wordVectors[word] = np.array([float(i) for i in items[1:]])
    return wordVectors

def get_word2vec(path, **kwargs):
    '''
    A helper function which loads word2vec using the `gensim` module
    
    Args:
        path (str): path to the word vectors
        kwargs: Additional arguments that get passed to `load_word2vec_format`
    
    Returns:
        a ``
    '''
    vecs = KeyedVectors.load_word2vec_format(path, **kwargs)
    return vecs

def load_word_vecs(filenames, keys=None, legacy=False, lowerMap=None, 
                   normalize=False, word2vec_key=None, verbose=True):
    '''
    Loads word vector files as a dictionary.
    
    Args:
        filenames (list of str): list of file names to load
        keys (list of str, optional): list of dictionary keys
        legacy (bool, optional): if True, uses the 
            original retrofitting code
        lowerMap (dict from str to bool, optional): a dictionary mapping 
            vector names to whether or not the words should be lowered 
            before the evaluation
        normalize (bool, optional): if True, normalizes the word 
            vectors
        word2vec_key (str, optional): key of the word2vec word vectors, 
            which are loaded with `get_word2vec`
        verbose (bool, optional): if True, prints the names of the word
            vectors as they are loaded
        
    Returns:
        a dictionary mapping the name of each set of word vectors to
        those word vectors
    '''
    if keys == None:
        keys = filenames
    vectors = {}
    for i in range(len(filenames)):
        f = filenames[i]
        if legacy:
            vecs = read_word_vecs_legacy(f+'.txt')
        else:
            if lowerMap is not None:
                doLower = lowerMap[keys[i]]
            else:
                doLower = False
            if keys[i] == word2vec_key:
                vecs = get_word2vec(f, binary=True)
            else:
                vecs = read_word_vecs(f, doLower=doLower, 
                                      normalize=normalize)
        vectors[keys[i]] = vecs
        if verbose:
            print('%s loaded' % keys[i])
    return vectors

def read_lexicon_legacy(filename):
    '''
    Reads word relations as a dictionary.
    (the original code from Faruqui et al. 2015)
    '''
    lexicon = {}
    for line in open(filename, 'r'):
        words = line.lower().strip().split()
        lexicon[norm_word(words[0])] = [norm_word(word) for word in words[1:]]
    return lexicon

def read_lexicon(filename_or_lines, labeled=False, overwriteVals=False, 
                 doLabel=True):
    '''
    Reads word relations as a dictionary.
    
    Args:
        filename_or_lines (str or list of str): if a string, the file path of
            the lines to be loaded. if a list, the lines defining the word
            relations
        labeled (bool, optional): specifies whether the word relations 
            begin with a group label. By default the lines of the semantic 
            resource are assumed to begin with a word, not a label.
        overwriteVals (bool, optional): if True, allows the entries of
            the dictionary to be overwritten. Only relevant if doLabel=False,
            otherwise overwriteVals is set to True
        doLabel (bool, optional): if True, labels the relations 
            as PREFIX@WORD. Previous labels (i.e. REVENGE) are ignored.
            
    Returns:
        a dictionary mapping each word to its neighbors according to the
        semantic resource
    '''
    if type(filename_or_lines) == str:
        lines = read_lines(filename_or_lines)
    elif type(filename_or_lines) == list:
        lines = filename_or_lines
    
    # if there were already group labels, get rid of them
    if labeled:
        lines = [' '.join(l.split()[1:]) for l in lines]
    
    # label lines?
    if doLabel:
        lines = label_lines(lines)
        if overwriteVals == False:
            print('WARNING: overwriteVals overwritten to True')
            overwriteVals = True
    
    lexicon = {}
    for line in lines:
        words = line.lower().strip().split()
        if len(words) > 0:
            # ensure compatibility with lines of the form PREFIX@WORD
            key = words[0]
            first_word = get_word_from_label(key)
            key_clean = norm_word(first_word)
            if not overwriteVals:
                if (key_clean in lexicon) & (key_clean not in SPECIAL_WORDS):
                    msg = '"%s" already included in the dictionary' % key_clean
                    raise KeyError(msg)
            lexicon[key] = [norm_word(word) for word in words[1:]]
    return lexicon

def label_lines(filename_or_lines):
    '''
    Returns labeled lines of a semantic resource of the form PREFIX@WORD.
    
    Args:
        filename_or_lines (str or list of str): if a string, the file path 
            to the unlabeled semantic resource. if a list, the lines of the
            unlabeled semantic resource
    
    Returns:
        the lines of the semantic resource with the first word of each line
        replaced by PREFIX@WORD
    '''
    if type(filename_or_lines) == str:
        lines = read_lines(filename_or_lines)
    elif type(filename_or_lines) == list:
        lines = filename_or_lines
    else:
        raise ValueError('filename_or_lines must be of type str or list')
    # keep count of each of the words
    count = {}
    new_lines = []
    # loop through the lines
    for l in lines:
        words = l.split()
        if len(words) > 0:
            first_word = words[0]
            other_words = ' '.join(words[1:])
            if first_word not in count:
                myCount = 1
                count[first_word] = myCount # set count to 1
            else:
                myCount = count[first_word] + 1 # increment count
                count[first_word] = myCount
            
            new = u'{}@{}'.format(myCount, first_word) + ' ' + other_words
            new_lines.append(new)
    return new_lines  

def read_compact_lexicon(filename_or_lines, sep='\t'):
    '''
    Reads groupings of word relations as a dictionary.
    
    Args:
        filename_or_lines (str or list of str): if a string, the file path of
            the lines to be loaded. if a list, the lines defining the word
            relations
    
    Returns:
        a dictionary mapping each group label to its neighbors according to the
        semantic resource
    '''
    
    if type(filename_or_lines) == str:
        lines = read_lines(filename_or_lines)
    elif type(filename_or_lines) == list:
        lines = filename_or_lines
    
    label_to_items = {l.split(sep)[0] : list(set(l.split(sep)[1:])) \
                      for l in lines}

    return label_to_items

def load_lexicons(filenames, labeled=None, keys=None, legacy=False, 
                  overwriteVals=True, doLabel=True, verbose=True):
    '''
    Loads the retrofitting semantic lexicons as a dictionary.
    
    Args:
        filenames (list of str): list of file names to load
        keys (list of str, optional): list of dictionary keys
        labeled (list of bool, optional): specifies for each semantic
            resource, whether the word relations begin with a group label.
            By default the lines of a semantic resource are assumed to begin 
            with a word, not a label.
        legacy (bool, optional): if True, uses the 
            original retrofitting code. This is a global option.
        overwriteVals (bool, optional): if True, allows the entries of
            the dictionary to be overwritten. Only relevant if doLabel=False,
            otherwise overwriteVals is to True
        doLabel (bool, optional): if True, labels the relations 
            as PREFIX@WORD. Previous labels (i.e. REVENGE) are ignored.
        verbose (bool, optional): if True, prints the names of the lexicons 
            as they are loaded
    
    Returns:
        a dictionary mapping the name of each semantic lexicon to
        that semantic lexicon
    '''
    if keys == None:
        keys = filenames
    else:
        if len(keys) != len(filenames):
            raise ValueError('keys must be the same length as filenames')
    
    if labeled == None:
        labeled = np.full(len(filenames), False)
    else:
        if len(labeled) != len(filenames):
            raise ValueError('labeled must be the same length as filenames')

    lexicons = {}
    for i in range(len(filenames)):
        f = filenames[i]
        if legacy:
            lex = read_lexicon_legacy(f+'.txt')
        else:
            lex = read_lexicon(f+'.txt', labeled=labeled[i], 
                               overwriteVals=overwriteVals, doLabel=doLabel)
        lexicons[keys[i]] = lex
        if verbose:
            print('%s loaded' % keys[i])
    return lexicons

def load_compact_lexicons(filenames, keys=None, verbose=True, sep='\t'):
    '''
    Loads multiple compact retrofitting lexicons as a dictionary.
    
    Args:
        filenames (list of str): list of file names to load
        keys (list of str, optional): list of dictionary keys
        verbose (bool, optional): if True, prints the names of the lexicons 
            as they are loaded
        sep (str, optional): separator for the split operation
    
    Returns:
        a dictionary mapping the name of each semantic lexicon to
        that semantic lexicon
    '''
    if keys == None:
        keys = filenames
    else:
        if len(keys) != len(filenames):
            raise ValueError('keys must be the same length as filenames')
    
    lexicons = {}
    for i in range(len(filenames)):
        f = filenames[i]
        lex = read_compact_lexicon(f + '.txt', sep=sep)
        lexicons[keys[i]] = lex
        if verbose:
            print('%s loaded' % keys[i])
    return lexicons

def get_sub_lexicon(lex, words):
    '''
    Returns the subset of the semantic resource which contains 
    the target words.
    
    Args:
        lex (dict from str to list of str): a dictionary mapping
            each word to its neighbors in a semantic resource
        words (array-like of str): the target words
    
    Returns:
        the subset of the semantic resource containing the target
        words
    '''
    # create a smaller version of the semantic resource
    lex_sub = {k : v for k, v in list(lex.items()) \
               if any((True for x in v if x in words))}
    return lex_sub

def check_if_paired(lex, *words, grouped=None):
    '''
    Checks whether a set of words are paired together within a 
    semantic resource.
    
    Args:
        lex (dict from str to list of str): a semantic resource
        grouped (bool, optional): indicates whether the two words are 
            both in the resource
        *words: variable length list of words
    
    Returns:
        a boolean, whether or not the words are paired in the
        same group of words in the semantic resource
    
    Examples:
        >>> isPaired = check_if_paired(lex=lexicons['FrameNet'], 
                                       'cat', 'dog')
    '''
    if grouped == False:
        return False
    # check if both words are in the resource
    isPaired = False
    for k, v in lex.items():
        # combine the key with the values
        items = [get_word_from_label(k)]
        items.extend(v)
        # check to see if both words are present
        if all(x in items for x in words):
             isPaired = True
    return isPaired

def check_if_grouped(lex, sims, lexName, words=None, getPaired=False):
    '''
    Adds to an existing dataframe, indicating if words are grouped together in 
    a particular semantic resource.
    
    Args:
        lex (dict): a dictionary mapping each word to its neighbors
        sims (`DataFrame`): a word similarity dataframe
        lexName (str): name of the semantic resource
        words (list, optional): list of unique words in the semantic 
            resource. if not supplied, it is generated from the lexicon
        getPaired (bool, optional): checks whether the two words are paired
            in the same resource (warning: this may take a long time)
        
    Returns:
        a Pandas DataFrame derived from `sims` with columns indicating whether
        each pair of words in the semantic resource are present or grouped 
        within the lexicon
    
    Examples:
        >>> df = check_if_grouped(fn, sims, lexName='FrameNet', words=words)
    '''
    # copy the original similarity dataframe
    df = sims.copy()
    
    if words == None:
        words = get_unique_vals(lex)
    
    # check if both words are in the resource
    col_both = '{}_Both'.format(lexName)
    df[col_both] = df['Word1'].isin(words) & df['Word2'].isin(words)
    
    # restrict attention to the words which appeared in both
    df_both = df[df[col_both] == True]
        
    # get the sub-lexicon for the relevant words
    simWords = set(df_both['Word1']).union(df_both['Word2'])
    lex_sub = get_sub_lexicon(lex, simWords)
    
    if getPaired:
        # check if both words are paired in the resource
        col_paired = '{}_Paired'.format(lexName)
        paired = df_both.apply(lambda x: 
            check_if_paired(lex_sub, x['Word1'], x['Word2'], 
                            grouped=x[col_both]), axis=1)
        # assign the new column
        df_both = df_both.assign(**{col_paired : paired})
    
    # merge with the old dataframe
    df = pd.merge(df, df_both, how='left')
    # fill the resulting na values with False
    df = df.fillna(False)
    
    return df

def check_if_grouped_many(lexicons, evalData, wordData=None, getPaired=False,
                          verbose=True):
    '''
    Adds to the existing dataframe objects in a dictionary, indicating 
    if words are grouped together in a particular semantic resource.
    
    Args:
        lexicons (dict from str to dict): a dictionary mapping each lexicon 
            name to a dictionary mapping each word to its neighbors
        evalData (dict from str to `DataFrame`): a dictionary mapping each 
            word similarity dataset name to its dataframe
        wordData (dict, optional): a dictionary mapping each resource name to a 
            list of unique words in the semantic resource. if not supplied, 
            this information is generated from the lexicons
        getPaired (bool, optional): checks whether the two words are paired
            in the same resource (warning: this may take a long time)
        verbose (bool, optional): if True, the names of the lexicon-similarity 
            dataset pairing are printed as they are processed
        
    Returns:
        a dictionary mapping each resource to a Pandas DataFrame indicating 
        whether each pair of words in the semantic resource are present or 
        grouped within the lexicon
    
    Examples:
        >>> evalData = check_if_grouped_many(lexicons, evalData)
        ...
        >>> evalData = check_if_grouped_many(lexicons, evalData, wordData)
    '''
    newData = {}    
    # loop through the evaluation data
    for simName, sims_new in evalData.items():
        # loop through the lexicons
        for lexName, lex in lexicons.items():
            # get the words in the lexicon
            if wordData is not None:
                words = wordData[lexName]
            else:
                words=None
            sims_new = check_if_grouped(lex=lex, sims=sims_new, 
                                        lexName=lexName, words=words, 
                                        getPaired=getPaired)
            if verbose:
                msg = '%s-%s is finished' % (simName, lexName)
                print(msg)
        # now that the similarity dataset is done, add to the dictionary
        newData[simName] = sims_new
    return newData

def retrofit_legacy(wordVecs, lexicon, numIters=10):
    '''
    Retrofits word vectors given a semantic lexicon.
    (the original code from Faruqui et al. 2015)
    '''
    newWordVecs = deepcopy(wordVecs)
    wvVocab = set(newWordVecs.keys())
    loopVocab = wvVocab.intersection(set(lexicon.keys()))
    for it in range(numIters):
        # loop through every node also in ontology (else just use data estimate)
        for word in loopVocab:
            wordNeighbours = set(lexicon[word]).intersection(wvVocab)
            numNeighbours = len(wordNeighbours)
            #no neighbours, pass - use data estimate
            if numNeighbours == 0:
                continue
            # the weight of the data estimate if the number of neighbours
            newVec = numNeighbours * wordVecs[word]
            # loop over neighbours and add to new vector (currently with weight 1)
            for ppWord in wordNeighbours:
                newVec += newWordVecs[ppWord]
            newWordVecs[word] = newVec/(2*numNeighbours)
    return newWordVecs

def retrofit(wordVecs, lexicon, num_iters=10, alpha=1.0, beta=1.0, 
             word2vec=False, keep_doubles=False):
    ''' 
    Retrofits word vectors given a semantic lexicon.
    
    Args:
        wordVecs (list of `Numpy array`): the word vectors to 
            be retrofitted
        lexicon (dict from str to `Numpy array`): a dictionary mapping
            each word to its neighbors in a semantic resource
        num_iters (int, optional): number of iterations
        alpha (float, optional): controls the strength of the fidelity 
            to the original word vector
        beta (float, optional): controls the strength of the closeness
            of a word vector to its neighbors. By default it will be the
            inverse of the number of neighbors.
        word2vec (bool, optional): indicates that the vectors are 
            `KeyedVectors` rather than ordinary Python dictionaries
        keep_doubles (bool, optional): if True, then repeated words are kept. 
            if False, only one instance of each word is kept.

    Returns:
        the retrofitted word vectors
    
    Examples:
        >>> glove_retro = retrofit(glove, frameNet, numIters=10)
    '''
    # get the words in the word vectors
    if word2vec:
        wvVocab = set(wordVecs.vocab.keys())
        newWordVecs = {k : wordVecs[k] for k in wvVocab}
    else:
        wvVocab = set(wordVecs.keys())
        newWordVecs = deepcopy(wordVecs)
    # create a dictionary which maps the words to their labels
    # (in the case of most lexicons, this will map onto the same word)
    word_to_label = {get_word_from_label(label) : label for label in lexicon.keys()}
    # get the words from the word vector vocabulary
    lexVocab = set(word_to_label.keys())
    # find the intersection between vocab and lexicon
    loopVocab = wvVocab.intersection(set(lexVocab))
    for it in range(num_iters):
        # loop through every node also in ontology (else just use data estimate)
        for word in loopVocab:
            # use the unique label to get to the neighbors
            label = word_to_label[word]
            good = set(lexicon[label]).intersection(wvVocab)
            # get the words which are in the 
            if keep_doubles:
                bors = [w for w in lexicon[label] if w in good]
            else:
                bors = good
#            wordNeighbours = set(lexicon[label]).intersection(wvVocab)
#            numNeighbours = len(wordNeighbours)
            numBors = len(bors)
            # use the inverse of beta for algebraic simplicity
#            betaInv = numNeighbours/beta
            betaInv = numBors/beta
            #no neighbours, pass - use data estimate
            if numBors == 0:
                continue
            # the weight of the data estimate if the number of neighbors
            newVec = alpha * numBors * wordVecs[word]
            # loop over neighbors and add to new vector
            for ppWord in bors:
                newVec += newWordVecs[ppWord]
            newWordVecs[word] = newVec/(2.0*betaInv)
    return newWordVecs

def retrofit_online(wordVecs, label_to_items, version, numIters=10, alpha=1.0, 
                    beta=1.0, word2vec=False):
    ''' 
    Retrofits word vectors given a dictionary from labels to items comprising
    a semantic lexicon.
    
    Args:wo
        wordVecs (:obj:`list' of :obj:`Numpy array`): the word vectors to 
            be retrofitted.
        label_to_items (dict from str to list of str): a dictionary mapping
            each group label to its items
        version (float): version of FrameNet
        numIters (:obj:`int`, optional): number of iterations (default=10)
        alpha (:obj:`float`, optional): controls the strength of the fidelity 
            to the original word vector (default = 1)
        beta (:obj:`float`, optional): controls the strength of the closeness
            of a word vector to its neighbors. By default it will be the inverse
            of the number of neighbors. (default = 1)
        word2vec (bool, optional): indicates that the vectors are 
            `KeyedVectors` rather than ordinary Python dictionaries 
    
    Returns:
        the retrofitted word vectors
    
    Examples:
        retrofit(glove, label_to_items, numIters=10)
    '''
    if word2vec:
        wvVocab = set(wordVecs.vocab.keys())
        newWordVecs = {k : wordVecs[k] for k in wvVocab}
    else:
        wvVocab = set(wordVecs.keys())
        newWordVecs = deepcopy(wordVecs)
        
    # get all of the unique words in the lexicon
#    lexWords = set([y for x in label_to_items.values() for y in x])
#    loopVocab = wvVocab.intersection(lexWords)
    for it in range(numIters):
        # loop through all of the groupings in the ontology
        for label, items in label_to_items.items():
            # preprocess the words
            post = processAnnoText(items, version)
            # find all of the words that are in the ontology
            groupVocab = wvVocab.intersection(post)
            numNeighbours = len(groupVocab) - 1
            for word in groupVocab:
                wordNeighbours = groupVocab.copy()
                wordNeighbours.remove(word)
                # use the inverse of beta for algebraic simplicity
                betaInv = numNeighbours/beta
                #no neighbours, pass - use data estimate
                if numNeighbours == 0:
                    continue
                # the weight of the data estimate if the number of neighbours
                newVec = alpha * numNeighbours * wordVecs[word]
                # loop over neighbours and add to new vector
                for ppWord in wordNeighbours:
                    newVec += newWordVecs[ppWord]
                newWordVecs[word] = newVec/(2.0*betaInv)
    return newWordVecs

def retrofit_lexicons(lexicons, vectors, legacy=False, verbose=True, 
                      word2vec_key=None, **kwargs):
    '''
    Exhaustively retrofits a dictionary of word vectors given
    a dictionary of lexicons.
    
    Args:
        lexicons (dict from str to dict): a dictionary mapping the
            name of each lexicon to that lexicon
        vectors (dict from str to dict): a dictionary mapping the
            name of each set of word vectors to those vectors
        legacy (bool, optional): if True, uses the original
            retrofitting code
        verbose (bool, optional): if True, prints the names of the lexicons as 
            they are retrofitted
        word2vec_key (str, optional): the name of the word2vec vectors
        kwargs: Additional arguments that get passed to `retrofit` or 
            `retrofit_legacy`
    
    Returns:
        a dictionary mapping the name of each set of word vectors to
        those word vectors
        
    Examples:
        >>> retroVecs = retrofit_lexicons(lexicons, vectors)
        >>> retroVecs_legacy = retrofit_lexicons(lexicons, vectors, legacy=True)
    '''
    if word2vec_key == None:
        word2vec_key = ''
    retroVecs = {}
    for lexName, lex in lexicons.items():
        retroVecs[lexName] = {}
        # cycle through the word vectors
        for vecName, vec in vectors.items():
            if vecName == word2vec_key:
                word2vec = True
            else:
                word2vec = False
            if legacy == False:
                retroVecs[lexName][vecName] = retrofit(vec, lex, 
                         word2vec=word2vec, **kwargs)
            elif legacy == True:
                retroVecs[lexName][vecName] = retrofit_legacy(vec, lex, 
                         **kwargs)
            if verbose:
                print('%s has been retrofitted with %s' % (vecName, lexName))
    return retroVecs

def retrofit_online_many(filenames, lexNames, lexicons, retroVecs, baseVecs, 
                         vecNames, version, verbose=False, sep='\t', **kwargs):
    '''
    Helper function to retrofit multiple lexicons 'online' rather than using 
    standard retrofit lexicons which list each word to its neighbors.
    
    Args:
        filenames (list): list of file paths to the compact retrofitting 
            lexicons
        lexNames (list): list of names for the retrofitted
        lexicons (dict from str to dict): a dictionary mapping the
            name of each lexicon to that lexicon
        retroVecs (dict from str to dict): a dictionary mapping the
            name of each set of word vectors to its retrofitted vectors
        baseVecs (dict from str to dict): a dictionary mapping the
            name of each set of word vectors to its baseline vectors
        vecNames (str or list): the name of the vector to retrofit or a list 
            of names of vectors to retrofit
        version (float): the version of FrameNet used
        verbose (bool, optional): if True, the names of the lexicons are 
            printed when they are completed
        kwargs: additional key-worded arguments passed to `retrofit_online`
    
    Returns:
        the updated `retroVecs` and `lexicons` variables
    '''
    if type(vecNames) == str:
        vecNames = [vecNames]
    elif type(vecNames) != list:
        raise ValueError('vecNames must be of type str or list')
    # loop through the vectors
    for vecName in vecNames:
        print('\nStarting vector %s' % vecName)
        # use a new set of vectors
        vectors = baseVecs[vecName]
        # loop through the filenames and lexNames
        n = len(filenames)
        for i in range(n):
            fn = filenames[i]
            lexName = lexNames[i]
            # load the compact lexicon
            label_to_items = read_compact_lexicon(fn + '.txt', sep=sep)
            results = retrofit_online(vectors, label_to_items, version, **kwargs)
            # add to the retrofitting dictionary
            if lexName not in retroVecs:
                # allows saving the previous results
                retroVecs[lexName] = {}
            retroVecs[lexName][vecName] = results
            # add the dictionary to the list of lexicons
            lexicons[lexName] = label_to_items
            if verbose:
                print('%s is done' % lexName)
        if verbose:
            print('Vector %s is done' % vecName)
    return retroVecs, lexicons

## EVALUATION
def cosine_sim(w1, w2, vectors, doLower=False, word2vec=False):
    '''
    Finds the cosine similarity of the vector representations 
    of two words.
    
    Args:
        w1 (str): the first word
        w2 (str): the second word
        vectors (dict from str to `np.array`): the word embeddings
        doLower (bool, optional): if True, lowers the words
        word2vec (bool, optional): indicates that the vectors are 
            `KeyedVectors` rather than ordinary Python dictionaries
        
    Returns:
        the cosine similarity of the two values
    '''
    
    if doLower:
        w1 = w1.lower()
        w2 = w2.lower()
    
    if word2vec:
        if (w1 not in vectors):
            return np.nan
        if (w2 not in vectors):
            return np.nan
    else:
        if (w1 not in vectors.keys()):
            return np.nan
        if (w2 not in vectors.keys()):
            return np.nan
    return 1 - cosine(vectors[w1], vectors[w2])

def cosine_sim_data(wordSim, vectors, columns=None, label='CosineSim', 
                    doLower=False, fillZeros=False, getFrame=False, 
                    wordMap=None, charMap=None, word2vec=False):
    '''
    Finds the cosine similarity values across a word similarity dataset.
    
    Args:
        wordSim (`DataFrame`): the word similarity dataset
        vectors (dict from str to `np.array`): the word embeddings
        columns (list of str, optional): the column names for the first
            word and second word
        label (str, optional): label of the cosine similarity column
        doLower (bool, optional): if True, lowers the words
        fillZeros (bool, optional): if True, then any pair which contains an 
            out-of-vocabulary word is set to zero
        getFrame (bool, optional): if True, returns a new DataFrame with
            the cosine similarity values, if False, returns the similarity
            data as an array
        wordMap (dict, optional): a dictionary mapping word forms in 
            `wordSim` to their corresponding form in `vectors`
        charMap (dict, optional): if supplied, a mapping to apply to the 
            words before attempting to extract them from the word vectors
        word2vec (bool, optional): indicates that the vectors are 
            `KeyedVectors` rather than ordinary Python dictionaries
    
    Returns:
        the cosine similarity values across the words in the word similarity 
        dataset, and possibly a new Pandas DataFrame with this information
    '''
    if columns is None:
        columns = ['Word1', 'Word2']
    # get the names of the columns with the words
    name_one, name_two = columns
    temp = wordSim[columns]
    if wordMap is not None:
        temp = temp.replace(wordMap)
    if charMap is not None:
        table = str.maketrans(charMap)
        for c in columns:
            temp[c] = temp[c].str.translate(table)
    sims = temp.apply(lambda x: cosine_sim(x[name_one], x[name_two], vectors, 
                                           doLower=doLower, 
                                           word2vec=word2vec), axis=1)
    if fillZeros:
        sims = sims.fillna(0)
    if getFrame:
        df = wordSim.copy()
        df[label] = sims
        return df
    else:
        return sims
    
def get_metric(df, metric='spearman', col_gold='Score', col_base='Base', 
               col_retro=None):
    '''
    Gets the performance of a set of word vectors before and
    after retrofitting base on a given metric.
    
    Args:
        df (`DataFrame`): a DataFrame with the cosine 
            similarity values of word vectors before and after 
            retrofitting.
        metric (str or callable): if a string, the name of the 
            performance metric. 
            Valid values: 
                'spearman': Spearmon's rank correlation coefficient [default]
                'pearson': Pearson product-moment correlation
                'hmean': harmonic mean of the Spearman and Pearson correlations
                'mse': mean squared error
                'msd': mean squared deviation, an alias of MSE
                'rmse': root-mean-square error
                'rmsd': root-mean-square deviation, an alias of RMSE
                'me': mean of the error
                'mae': mean absolute error
            if a function, the performance metric to be used
        col_gold (str, optional): name of the column 
            with the gold standard values
        col_base (str, optional): name of the column 
            with the similarities before retrofitting
        col_retro (str, optional): name of the column
            with the similarities after retrofitting
    
    Returns:
        two floats, the performance of the cosine similarity values 
        of the word vectors against the word similarity 
        judgments before and after retrotting
        
    Examples:
        >>> rho = get_metric(df)
        >>> (rho_base, rho_retro) = get_metric(df, col_retro='Retro')
        >>> (r_base, r_retro) = get_metric(df, metric='pearson', 
            col_retro='Retro')
        >>> (rmse_base, rmse_retro) = get_metric(df, metric='rmse')
    '''
    if type(metric) == str:
        if metric == 'spearman':
            func = lambda x, y: spearmanr(x, y)[0]
        elif metric == 'pearson':
            func = lambda x, y: pearsonr(x, y)[0]
        elif metric == 'hmean':
            func = lambda x, y: hmean([spearmanr(x, y)[0], pearsonr(x, y)[0]])
        elif metric in ['mse', 'msd']:
            func = mse
        elif metric == 'rmse':
            func = lambda x, y: math.sqrt(mse(x, y))
        elif metric == 'me':
            func = lambda x, y: np.average(y - x)
        elif metric == 'mae':
            func = lambda x, y: np.average(np.absolute(y - x))
        else:
            raise ValueError('Function not supported. Feed in '+
                             'the function instead?')
    elif callable(metric):
        func = metric
    # x is the prediction, y is the standard
    metric_base = func(df[col_base], df[col_gold])
    if col_retro is not None:
        metric_retro = func(df[col_retro], df[col_gold])
        return metric_base, metric_retro
    else:
        return metric_base
    
def load_frames(filenames, keys=None):
    '''
    Loads csv files as a dictionary of Pandas DataFrames.
    
    Args:
        filenames (list of str): list of file names to load
        keys (list, optional): list of dictionary keys
    
    Returns:
        a dictionary mapping the name of a csv file to its
        corresponding Pandas DataFrame
    '''
    if keys == None:
        keys = filenames
        
    frames = {}
    for i in range(len(filenames)):
        f = filenames[i]
        d = pd.read_csv(f+'.csv')
        frames[keys[i]] = d
    return frames
    

def get_base_df(wordSim, vecs, col_base='Base', col_resid_base='Resid_base',
                col_score='Score', **kwargs):
    '''
    Returns a DataFrame with the baseline performance of a set of word 
    vectors against a word similarity dataset.
    
    Args:
        wordSim (`DataFrame`): word similarity dataframe
        vecs (dict from str to `np.array`): a set of word vectors
        col_base (str, optional): name of the column to store the word
            vectors' baseline cosine similarity values
        col_resid_base (str, optional): name of the column to store 
            the residuals from the baseline cosine similarity values
        col_score (str, optional): name of the column with the human
            similarity judgment scores
        kwargs: additional key-worded arguments passed on to 
            `cosine_sim_data`
            
    Returns:
        a Pandas DataFrame with the cosine similarity measures between each
        pair of words present in the word similarity dataset.
    
    Examples:
        >>> df = get_base_df(sims, vecs)
    '''
    # add color rows
    df = wordSim.copy()
    # add base and retro predictions
    df[col_base] = cosine_sim_data(wordSim, vecs, **kwargs)
    df[col_resid_base] = df[col_base] - df[col_score]
    
#    # get the grouping information
#    if checkGrouping:
#        df = check_if_grouped(lex, df, lexName, words, getPaired)
    return df

def get_change_df(wordSim, lexName, baseVecs, retroVecs, col_score='Score',
                  **kwargs):
    '''
    Returns a DataFrame with the performance of word vectors before and after
    retrofitting with a semantic resource against a word similarity dataset.
    
    Args:
        wordSim (`DataFrame`): word similarity dataframe
        lexName (str): name of the semantic resource
        baseVecs (dict from str to `np.array`): the original word 
            vectors
        retroVecs (dict from str to `np.array`): the retrofitted 
            word vectors
        col_score (str, optional): name of the column with the human
            similarity judgment scores
        kwargs: additional key-worded arguments passed on to 
            `cosine_sim_data`
    
    Returns:
        a Pandas DataFrame with the cosine similarity measures between each
        pair of words present in the word similarity dataset.
    
    Examples:
        >>> df = get_change_df(sims, 'FrameNet', vectors_base, vectors_retro, 
                               fn)
        >>> df = get_change_df(sims, 'FrameNet', vectors_base, vectors_retro, 
                               fn, words)
    '''
    
#    '''
#    lex (dict from str to list of str, optional): a dictionary mapping
#            each word to its neighbors in a semantic resource
#        words (list of str, optional): if supplied, indicates whether
#            both words were present in the semantic lexicon
#    checkGrouping (bool, optional): if True, adds grouping 
#            information (warning: this may be slow for large datasets)
#        getPaired (bool, optional): if True, adds pairing information
#            (warning: this may be slow for large datasets)
#    '''
#    if checkGrouping:
#        if lex is None:
#            raise ValueError('lex must be provided if checkGrouping is True')
    # add rows
    df = wordSim.copy()
    # add base and retro predictions
    col_base = '{}_Base'.format(lexName)
    col_retro = '{}_Retro'.format(lexName)
    df[col_base] = cosine_sim_data(wordSim, baseVecs, **kwargs)
    df[col_retro] = cosine_sim_data(wordSim, retroVecs, **kwargs)

    # add residuals
    col_resid_base = '{}_Resid_base'.format(lexName)
    col_resid_retro = '{}_Resid_retro'.format(lexName)
    col_err_change = '{}_Err_change'.format(lexName)
    
    df[col_resid_base] = df[col_base] - df[col_score]
    df[col_resid_retro] = df[col_retro] - df[col_score]
    df[col_err_change] = abs(df[col_resid_retro]) - abs(df[col_resid_base])
    
    # add whether the change in residual from the baseline
    col_change = '{}_Change'.format(lexName)
    df[col_change] = ['None' if v == 0 else 'Better' if v < 0 \
           else 'Worse' for v in df[col_err_change].values]
    
#    # get the grouping information
#    if checkGrouping:
#        df = check_if_grouped(lex, df, lexName, words, getPaired)
        
    return df

def get_change_data(evalData, baseVecs, retroVecs, lexicons, wordMaps=None, 
                    charMaps=None, lowerMap=None, word2vec_key=None, **kwargs):
    '''
    Returns a dictionary indicating the trial-level performance of 
    many word vectors retrofitting against many word similarity datasets.
    
    Args:
        evalData (dict from str to dict): a dictionary containing the
            word similarity datasets
        baseVecs (dict from str to dict): a dictionary containing the
            word vectors prior to retrofitting
        retroVecs (dict from str to dict): a dictionary containing the
            word vectors after retrofitting
        lexicons (dict from str to dict): a dictionary mapping 
            each name to the associated semantic resource
        wordMaps (dict from str to dict,, optional): a dictionary mapping 
            vector names to dictionaries which map word forms in the 
            similarity datasets to their corresponding vectors
        charMaps (dict from str to dict, optional): a dictionary mapping 
            vector names to a mapping to apply to the words before 
            attempting to extract them from the word vectors
        lowerMap (dict from str to bool, optional): a dictionary mapping 
            vector names to whether or not the words should be lowered 
            before the evaluation
        word2vec_key (str, optional): the name of the word2vec vectors
        kwargs: additional arguments passed to `get_change_df`
            
    Returns:
        a dictionary containing the trial-level performance of the 
        word vectors against the word similarity datasets
        
    Examples:
        >>> changeData = get_change_data(evalData, baseVecs, retroVecs, 
                                         lexicons)
        >>> changeData = get_change_data(evalData, baseVecs, retroVecs, 
                                        lexicons, wordData)
        >>> changeData = get_change_data(evalData, baseVecs, retroVecs, 
                                        lexicons, wordData, checkGrouping=True)
    '''
#    if 'checkGrouping' in kwargs:
#        raise ValueError('use of checkGrouping in `get_change_data` ' +
#                         'no longer supported. please load the paired evalData')
#    if lexicons is None:
#        # expect that the evalData contains the lexicon information
#        tmp = list(evalData.values())[0].columns # (rathery hacky)
#        lexNames = tmp.str.extract('(.*)_Both', expand=True).dropna()[0].tolist()
#        lexicons = {lexName : None for lexName in lexNames}
    
    changeData = {}
    # cycle through the word vectors
    for vecName, vectors_base in baseVecs.items():
        if vecName == word2vec_key:
            word2vec = True
        else:
            word2vec = False
        if wordMaps is not None:
            if vecName in wordMaps:
                vecWordMap = wordMaps[vecName]
            else:
                vecWordMap = None
        else:
            vecWordMap = None
        if charMaps is not None:
            if vecName in charMaps:
                vecCharMap = charMaps[vecName]
            else:
                vecCharMap = None
        else:
            vecCharMap = None
        if lowerMap is not None:
            if vecName in lowerMap:
                lower = lowerMap[vecName]
            else:
                lower = False
        else:
            lower = False
        changeData[vecName] = {}
        # cycle through the human data
        for simName, sims in evalData.items():
            sims_vec = sims.copy()
            # cycle through the lexicons
            for lexName, lex in lexicons.items():
                # get the retrofitted vectors
                vectors_retro = retroVecs[lexName][vecName]
                
                sims_vec = get_change_df(wordSim=sims_vec, 
                                         lexName=lexName,
                                         baseVecs=vectors_base,
                                         retroVecs=vectors_retro,
                                         wordMap=vecWordMap,
                                         charMap=vecCharMap,
                                         doLower=lower,
                                         word2vec=word2vec,
                                         **kwargs)
            
                print('Finished with {}-{}-{}'.format(simName, vecName, lexName))
            
            # once finished looping through the lexicons, store the result
            changeData[vecName][simName] = sims_vec
            
    return changeData

def get_change_subset(df, lexName):
    '''
    Returns the subset of a change dataframe to use to plot results.
    
    Args:
        df (:obj:`Pandas DataFrame`): a DataFrame with columns prefixed
            by LEXICON_[Measure]
        lexName (str): the name of the semantic resource
    
    Returns:
        the section of the table dealing with information pertaining to the
        semantic resource, with the prefix LEXICON_ removed.
    
    Examples:
        >>> changeData = get_change_data(...)
        >>> df_fn = get_change_subset(changeData['GloVe']['MEN3K'], 'FN_v17')
    '''
    # copy the dataframe
    df_lex = df.copy()
    # create the regex command
    pat = '{}_'.format(lexName)
    # find the relavant columns
    lex_cols = df_lex.columns[df_lex.columns.str.contains(pat)]
    # add to the other columns (this is messy)
    # TODO: make sure that this isn't hard-coded
    lex_cols = pd.Index(['Word1', 'Word2', 'Score']).append(lex_cols)
    # get the subset
    df_lex = df_lex[lex_cols]
    # replace the lexicon prefix with an empty string
    df_lex.columns = df_lex.columns.str.replace(pat, '')
    return df_lex

# `get_metric` moved to `vector_utils.py`

def get_metric_data(data, metric='spearman', base=False, subset='all', 
                    family=None, vecName=None, simName=None, 
                    **kwargs):
    '''
    Gets the metric values before and possibly after retrofitting.
    
    Args:
        data (dict or `DataFrame`): a dictionary containing the 
            trial-level performance of the word vectors against the word 
            similarity datasets, or a Pandas DataFrame containing the 
            trial-level performance for a particular set of word vectors
            evaluated against a particular human similarity dataset
        metric (str or function): if a string, the name of the 
            performance metric. 
            Valid values: 'spearman', 'pearson', 'mse' and 'rmse'
            if a function, the performance metric to be used
        base (bool, optional): if True, then only the baseline metrics 
            are returned
        subset (`str`, optional): if 'all', ises all of the points, if
            'both', uses only the points which were affected by retrofitting,
            if 'paired', uses only the points which were neighbors in the 
            semantic resource
            Values: 'all', 'both', 'paired', 'better', 'worse', 'none'
        family (`str`, optional): if supplied, the name of the lexicon 
            family to be added as a column name. Useful with calls to 
            `retrofit_iter`.
        vecName (str, optional): the name or a list of names of the word vectors.
        simName (str, optional): the name or a list of the human similarity 
            dataset names.
        kwargs: Additional arguments that get passed to `get_metric` 
    
    Returns:
        a Pandas DataFrame containing the root-mean squared error values
        between the the cosine similarity values of the word vectors
        and the word similarity judgments before and after retrotting
    
    Examples:
        >>> metricData = get_metric_data(changeData, metric='spearman')
        ...
        >>> metricData = get_metric_data(changeData, metric='rmse')
    '''
    if type(data) == pd.DataFrame:
        if vecName is None:
             print('vecName must be specified if changeData is of type DataFrame')
        if (base == False) & (simName is None):
             print('simName must be specified if changeData is of type DataFrame')
        # create the lists for the loops
        vecNames = [vecName]
        simNames = [simName]
        # change changeData to fit the schema below
        tmp = data.copy()
        data = {}
        data[vecName] = {}
        if base == False:
            data[vecName][simName] = tmp
        
    elif type(data) == dict:
        if vecName is not None:
            if type(vecName) == str:
                vecNames = [vecName]
            elif type(vecName) == list:
                vecNames = vecName
            else:
                raise ValueError('vecName must be of type str or list')
        else:
            # get the names of the word vectors 
            vecNames = list(data.keys())
        if (base == False) & (simName is not None):
            if type(simName) == str:
                simNames = [simName]
            elif type(simName) == list:
                simNames = simName
            else:
                raise ValueError('simName must be of type str or list')
        else:
            # get the names of the similarity datasets
            simNames = list(data[vecNames[0]].keys()) # assumes symmetry in the sim data

    # create a dictionary with the information
    metricData = {}
    metricData['Human Judgment'] = []
    metricData['Word vector'] = []
    metricData['Base'] = []
    if base == False:
        metricData['Lexicon'] = []
        metricData['Retro'] = []
    
    # get the columns of the change dataframe
    if base == False:
        columns = data[vecNames[0]][simNames[0]].columns
        # extract the lexical resource names
        lexNames = columns.str.extract('(.*)_Retro', expand=False).dropna().values
    
    # cycle through the word vectors
    for vecName in vecNames:
        # cycle through the human data
        for simName in simNames:
            if base == False:
                # cycle through the lexicons
                for lexName in lexNames:
                    metricData['Human Judgment'].append(simName)
                    metricData['Lexicon'].append(lexName)
                    metricData['Word vector'].append(vecName)
                    # get the subset
                    df = get_change_subset(data[vecName][simName], lexName)
                    if subset == 'all':
                        df_sub = df
                    elif subset in ['both', 'paired']:
                        df_sub = df.loc[df[subset.capitalize()] == True]
                    elif subset in ['better', 'worse', 'none']:
                        df_sub = df.loc[df['Change'] == subset.capitalize()]
                    else:
                        raise ValueError('subset value "%s" not supported' % subset)
                    
                    (metric_base, metric_retro) = get_metric(df_sub, metric=metric, 
                    col_retro='Retro', **kwargs)
                    metricData['Base'].append(metric_base)
                    metricData['Retro'].append(metric_retro)
            else:
                metricData['Human Judgment'].append(simName)
                metricData['Word vector'].append(vecName)
                df = data[vecName][simName]
                metric_base = get_metric(df, metric=metric, **kwargs)
                metricData['Base'].append(metric_base)

    # turn this into a Pandas DataFrame
    metricData = pd.DataFrame(metricData)
    
    if base == False:
        # add a change column
        metricData['Change'] = metricData['Retro'] - metricData['Base']
    
    # add the family name
    if family != None:
        metricData = metricData.assign(Family=family)
    
    return metricData

def get_base_data(evalData, baseVecs, wordMaps=None, charMaps=None,
                    lowerMap=None, word2vec_key=None, **kwargs):
    '''
    Returns a dictionary indicating the trial-level performance of 
    many word vectors against many word similarity datasets.
    
    Args:
        evalData (dict from str to dict): a dictionary containing the
            word similarity datasets
        baseVecs (dict from str to dict): a dictionary containing the
            word vectors prior to retrofitting
        wordMaps (dict from str to dict,, optional): a dictionary mapping 
            vector names to dictionaries which map word forms in the 
            similarity datasets to their corresponding vectors
        charMaps (dict from str to dict, optional): a dictionary mapping 
            vector names to a mapping to apply to the words before 
            attempting to extract them from the word vectors
        lowerMap (dict from str to bool, optional): a dictionary mapping 
            vector names to whether or not the words should be lowered 
            before the evaluation
        word2vec_key (str, optional): the name of the word2vec vectors
        kwargs: additional arguments passed to `get_base_df`
            
    Returns:
        a dictionary containing the trial-level performance of the 
        word vectors against the word similarity datasets
        
    Examples:
        >>> baseData = get_base_data(evalData, baseVecs)
    '''
    if word2vec_key == None:
        word2vec_key = ''
    baseData = {}
    # cycle through the word vectors
    for vecName, vectors_base in baseVecs.items():
        if vecName == word2vec_key:
            word2vec = True
        else:
            word2vec = False
        if wordMaps is not None:
            if vecName in wordMaps:
                vecWordMap = wordMaps[vecName]
            else:
                vecWordMap = None
        else:
            vecWordMap = None
        if charMaps is not None:
            if vecName in charMaps:
                vecCharMap = charMaps[vecName]
            else:
                vecCharMap = None
        else:
            vecCharMap = None
        if lowerMap is not None:
            if vecName in lowerMap:
                lower = lowerMap[vecName]
            else:
                lower = False
        else:
            lower = False
        baseData[vecName] = {}
        # cycle through the human data
        for simName, sims in evalData.items():
            sims_vec = sims.copy()
            sims_vec = get_base_df(wordSim=sims_vec, 
                                   vecs=vectors_base,
                                   doLower=lower,
                                   wordMap=vecWordMap,
                                   charMap=vecCharMap,
                                   word2vec=word2vec,
                                   **kwargs)
            
            print('Finished with {}-{}'.format(simName, vecName))
            
            # once finished looping through the lexicons, store the result
            baseData[vecName][simName] = sims_vec
            
    return baseData

## LOAD RESULTS
def load_change_data(path, post=None):
    '''
    Loads a directory as a dictionary containing the trial-level performance 
    of the word vectors against the word similarity datasets. This functions 
    as the reverse of `save_change_data`, but it simply loads all of the files 
    with the compliant format into a single dictionary.
    '''
    files = os.listdir(path)
    files = [f for f in files if f.endswith('.csv')]
    changeData = {}
    for f in files:
        items = f.split('_')
        if len(items) == 3:
            vecName, simName, postName = items
            if post is None:
                # discard the "postname"
                d = pd.read_csv(path + f)
                if vecName not in changeData:
                    changeData[vecName] = {}
                changeData[vecName][simName] = d
            else:
                if post == postName[:-4]:
                    d = pd.read_csv(path + f)
                    if vecName not in changeData:
                        changeData[vecName] = {}
                    changeData[vecName][simName] = d
        else:
            continue
        
#        if post is None:
#            if len(items) == 3:
#                # discard the "postname"
#                vecName, simName, _ = items
#            else:
#                continue
#            d = pd.read_csv(path + f)
#        else:
#            raise NotImplementedError('loading by postfix not implemented yet')
#        # load the data
#        if vecName not in changeData:
#            changeData[vecName] = {}
#        changeData[vecName][simName] = d
    return changeData

## SAVING
def save_change_data(path, changeData, post=None):
    '''
    Saves a dictionary containing the trial-level performance of the word 
    vectors against the word similarity datasets, as computed via 
    `get_change_data`. `post` is a post-fix to distinguish results.
    '''
    for vecName, vecDict in changeData.items():
        for simName, d in vecDict.items():
            if post is not None:
                filename = '%s_%s_%s.csv' % (vecName, simName, post)
            else:
                filename = '%s_%s.csv' % (vecName, simName)
            d.to_csv(path + filename, index=False)

## PLOTTING
def get_plot_metrics(df, metrics, df_plot=None, showChange=False):
    '''
    Returns the metrics before and after retrofitting.
    
    Args:
        df (`DataFrame`): an evaluation dataframe containing the human 
            scores, and the base and retrofitted values from the word 
            vectors
        metrics (str, callable or list, optional): a string, 
            a function, or a list of strings or functions to be passed on to 
            `get_metric` to be used as performance metrics
        df_plot (`DataFrame`, optional): the dataframe used in `plot_results`.
            if supplied, the metrics are added to `df_plot` under the "Type"
            column, otherwise the metrics are returned as strings
        showChange (bool, optional): if True, the change in metric is computed 
            rather than the raw values before and after
    
    Returns:
        if `df_plot` is supplied, the dataframe with the metrics added, 
        otherwise two strings indicating the metrics (intended to be used 
        as labels in a plot)
    '''
    # copy the original dataframe
    if df_plot is not None:
        df_plot = df_plot.copy()
    
    # create a metric symbol dictionary (temp)
    metric_to_symbol = {'spearman':'rho', 'pearson':'r', 'rmse':'RMSE'}
    
    def get_one_metric(df, metric):
        '''another utility function, to avoid repeating in the loop'''
        val = get_metric(df, metric=metric, col_gold='Score', col_base='Base', 
                         col_retro='Retro')
        val_base = np.round(val[0], 3)
        val_retro = np.round(val[1], 3)
        return val_base, val_retro
    
    # if a string or function, simply run as normal
    if type(metrics) == str or callable(metrics):
        val_base, val_retro = get_one_metric(df, metrics)
        if metrics in metric_to_symbol:
            symbol = metric_to_symbol[metrics]
        else:
            symbol = 'metric'
        pat_retro = 'Retro ({}=%.3f)'.format(symbol)
        if showChange:
            # get the change
            val_change = val_retro - val_base
            text_base = 'Base'
            text_retro = pat_retro % val_change
        else:
            pat_base = 'Base ({}=%.3f)'.format(symbol)
            text_base = pat_base % val_base
            text_retro = pat_retro % val_retro
        
    elif type(metrics) == list:
        if showChange:
            text_base = 'Base'
        else:
            text_base = 'Base ({})'
        text_retro = 'Retro ({})'
        rep_base = ''
        rep_retro = ''
        for m in metrics:
            val_base, val_retro = get_one_metric(df, m)
            if m in metric_to_symbol:
                symbol = metric_to_symbol[m]
            else:
                symbol = 'metric'
            # add to the text
            if showChange:
                val_change = val_retro - val_base
                rep_retro += ('{}=%.3f, ' % val_change).format(symbol)
            else:
                rep_base += ('{}=%.3f, ' % val_base).format(symbol)
                rep_retro += ('{}=%.3f, ' % val_retro).format(symbol)
        # strip the extra comma and space
        if not showChange:
            rep_base = rep_base.rstrip(', ')
        rep_retro = rep_retro.rstrip(', ')
        # put into text_base and text_retro
        text_base = text_base.format(rep_base)
        text_retro = text_retro.format(rep_retro)
    
    if df_plot is not None:
        # update the df_plot values     
        df_plot['Type'] = df_plot['Type'].map({'Base' : text_base,
               'Retro' : text_retro})
        return df_plot
    else:
        return text_base, text_retro
    
def plot_results(data, lexName, vecName=None, simName=None, title=None, 
                 xlabel=None, ylabel=None, alpha=.7, s=75, tsize=14, xsize=14,
                 ysize=14, legsize=12, legend_out=True, 
                 plotDiagonal=True, subset='all', showMetrics=True, 
                 showChange=False, metrics='spearman', filename=None, colors=None, 
                 **kwargs):
    '''
    Plots human similarity judgment against the word vector cosine
    similarity values before and after retrofitting.
    
    Args:
        data (dict or `DataFrame`): a dictionary containing the 
            retrofitting results, or aPandas DataFrame containing the
            results of a single resource in retrofitting
        lexName (str): name of the semantic resource
        vecName (str, optional): name of the word vectors
        simName (str, optional): name of the similarity dataset
        xlabel (str, optional): x-label of the figure
        ylabel (str, optional): y-label of the figure
        title (str, optional): the title of the figure
        alpha (float, optional): controls the transparency of the marker
        s (int, optional): controls the size of the marker
        tsize (int): font size for the titles
        xsize (int): font size for the x labels
        ysize (int): font size for the y labels
        legsize (int): font size for the legend labels
        plotDiagonal (bool, optional): if True, plots the diagonal
        legend_out (bool, optional): if True, the legend is put outside the figure
        subset (str, optional): if 'all', plots all of the points, if
            'both', plots only the points which were affected by retrofitting,
            if 'paired', plots only the points which were neighbors in the 
            semantic resource
            Values: 'all', 'both', 'paired', 'better', 'worse', 'none'
        showMetrics (bool, optional): if True, shows the metric(s) on each
            of the figures
        showChange (bool, optional): if True, shows the change in the metric(s)
            on the retrofitted figure
        metrics (str, callable or list, optional): a string, 
            a function, or a list of strings or functions to be passed on to 
            `get_metric` to be used as performance metrics
        filename (str, optional): name of file path to save the resulting
            figure
        colors (list, optional): name of xkcd color names for the
            results. Default: ['blue', 'emerald green', 'red']
        kwargs: additional arguments passed to `sns.FacetGrid`
    
    Returns:
        the FacetGrid object
    
    Examples:
        >>> df = changeData['GloVe']['MEN3K']
        >>> plot_results(df, 'FrameNet', title='Lexicon : Data : Vector')
    '''
    if type(data) == dict:
        if (vecName is None) | (simName is None):
            msg = 'if data is a dictionary then both vecName and simName must be specified'
            raise ValueError(msg)
        df = get_change_subset(data[vecName][simName], lexName)
        if xlabel is None:
            xlabel = vecName + ' cosine similarity'
        if ylabel is None:
            ylabel = 'Judgments from ' + simName
    elif type(data) == pd.DataFrame:
        # get the subset of the dataframe for the lexicon
        df = get_change_subset(data, lexName)
        if xlabel is None:
            xlabel = 'Word Vector'
        elif ylabel is None:
            ylabel = 'Human Judgment'
    else:
        raise ValueError('data must be either a dictionary of a DataFrame')
    if subset in ['all', 'better', 'worse']:
        id_vars = ['Word1', 'Word2', 'Score', 'Err_change', 'Change']
    elif subset == 'both':
        id_vars = ['Word1', 'Word2', 'Score', 'Err_change', 'Change', \
                   'Both']
    elif subset == 'paired':
        id_vars = ['Word1', 'Word2', 'Score', 'Err_change', 'Change', \
                   'Both', 'Paired']
        
    # convert the dataframe to the form necessary for plotting
    df_plot = df.melt(id_vars=id_vars, \
            value_vars=['Base', 'Retro'], var_name = 'Type', value_name= 'Vector')
    
    if colors is None:
        colors = ['blue', 'emerald green', 'red']
    color_vals = sns.xkcd_palette(colors)
    # create the custom palette
    pal = {'None':color_vals[0], 'Better':color_vals[1], 
               'Worse':color_vals[2]}
        
    if title is None:
        title = lexName
    
    if subset == 'all':        
        if showMetrics:
           df_plot = get_plot_metrics(df, metrics, df_plot, showChange=showChange) 
        # hack
        hue_order = df['Change'].value_counts().index.values
        if 'None' in hue_order:
            hue_order = ['None', 'Better', 'Worse']
        else:
            hue_order = ['Better', 'Worse']
        g = sns.FacetGrid(df_plot, col='Type', hue='Change', legend_out=legend_out, \
                      palette=pal, hue_order=hue_order, **kwargs)
    elif subset in ['both', 'paired']:
        df_sub = df.loc[df[subset.capitalize()] == True]
        df_plot = df_plot.loc[df_plot[subset.capitalize()] == True]
        
        if showMetrics:
           df_plot = get_plot_metrics(df_sub, metrics, df_plot, showChange=showChange) 
        
        g = sns.FacetGrid(df_plot, col='Type', hue="Change", legend_out=legend_out, \
                  palette=pal, hue_order = ['Better', 'Worse'], **kwargs)
    elif subset in ['better', 'worse', 'none']:
        df_sub = df.loc[df['Change'] == subset.capitalize()]
        df_plot = df_plot.loc[df_plot['Change'] == subset.capitalize()]
        
        if showMetrics:
           df_plot = get_plot_metrics(df_sub, metrics, df_plot, showChange=showChange) 
        
        g = sns.FacetGrid(df_plot, col='Type', hue="Change", legend_out=legend_out, \
                  palette=pal, **kwargs)
    else:
        raise ValueError('subset value not recognized')
    
    g = g.map(plt.scatter, "Vector", "Score", alpha=alpha, s=s)
    g.set_titles("{col_name}", size=tsize, weight='bold')
    leg = g.add_legend(prop={'weight':'bold', 'size':legsize})
    new_title = 'Change'
    # set the title
    if legend_out:
        leg = g._legend
    else:
        leg = g.facet_axis(0, 0).get_legend()
    leg.set_title(new_title)
    plt.setp(leg.get_title(), **{'weight':'bold', 'size':str(tsize)})
    g.fig.suptitle(title, size=tsize, weight = 'bold')
    g.fig.subplots_adjust(top=.9) # offset
    g.set_xlabels(xlabel, size=xsize, weight='bold')
    g.set_ylabels(ylabel, size=ysize, weight='bold')
    
    if plotDiagonal:
        # plot the diagonal
        for ax in g.axes.ravel():
            ax.plot([0, 1], [0, 1], color = 'black', ls = '--')
            
    if filename is not None:
        g.savefig(filename)
    
    return g

def plot_metrics(data, title= '', ylabel='', subset='all', metric='spearman',
                 invert=False, base=False, text_size=14, leg_labels_dict=None, 
                 leg_title_weight='bold', leg_title_size=14, leg_texts_size=13, 
                 vecName=None, simName=None, height=6, aspect=0.7, 
                 filename=None, **kwargs):
    '''
    Plots a DataFrame using some metric to compare human similarity 
    judgment against the word vector cosine similarity values before 
    and after retrofitting.
    
    Args:
        data (dict from str to dict): a dictionary containing the 
            trial-level performance of the word vectors against the word 
            similarity datasets
        title (str, optional): the title of the figure
        ylabel (str, optional): the label of the y-axis
        subset (str, optional): if 'all', plots all of the points, if
            'both', plots only the points which were affected by retrofitting,
            if 'paired', plots only the points which were neighbors in the 
            semantic resource
            Values: 'all', 'both', 'paired', 'better', 'worse', 'none'
        metric (str or callable): a string or function to be passed on to 
            `get_metric` to be used as the performance metric
        invert (bool, optional): if True, the y-axis is inverted
        base (bool, optional): if True, the baseline performance of the 
            vectors is plotted
        text_size (int): font size of the axes and title
        leg_labels_dict (dict, optional): if specified, a dictionary mapping the 
            old labels to the new labels
        leg_title_weight (str, optional): weight of legend title
        leg_title_size (str or int, optional): font size of the legend title
        leg_texts_size (str or int, optional): font size of the legend texts 
        vecName (str, optional): the name or a list of names of the word vectors.
        simName (str, optional): the name or a list of the human similarity 
            dataset names.
        height (int, optional): the height of the plot
        aspect (float, optional): the aspect of the plot
        filename (str, optional): file path to save the resulting figure 
        kwargs: additional key-worded arguments passed to `seaborn.catplot`
    
    Returns:
        the axis of the plot
    
    Examples:
        >>> changeData = get_change_data(...)
        >>> plot_metrics(changeData, metric='spearman',
                         'Change in Spearman Correlation')
        >>> plot_metrics(changeData, metric='pearson',
                         'Change in Pearson Correlation')
        >>> plot_metrics(changeData, metric='rmse',
                         'Change in RMSE')
        >>> baseData = get_base_data(...)
        >>> plot_metrics(baseData)
    '''
    # get the proper subset
    df_sub = get_metric_data(data, metric=metric, base=base,
                             subset=subset, 
                             vecName=vecName, simName=simName)
    # restrict to the labels present, if any
    if leg_labels_dict is not None:
        df_sub = df_sub[df_sub['Lexicon'].isin(leg_labels_dict.keys())]
    
#    with sns.plotting_context("notebook", font_scale=1.5):
    if base == False:
        g = sns.catplot(x='Word vector', y='Change', hue='Lexicon',
                       col='Human Judgment', data=df_sub, kind="bar", height=height, 
                       aspect=aspect, legend_out=True, **kwargs)
    else:
        g = sns.catplot(x='Word vector', y='Base',
                       col='Human Judgment', data=df_sub, kind="bar", height=height, 
                       aspect=aspect, legend_out=True, **kwargs)
    
    # override font weight 
    g.set_titles("{col_name}", size=text_size, weight='bold')
    g.set_xlabels('Word vector', size=text_size, fontweight='bold')
    g.set_ylabels(ylabel, size=text_size, fontweight='bold')
    g.fig.suptitle(title, size=text_size, weight = 'bold')
    g.fig.subplots_adjust(top=.9) # offset
    if invert:
        plt.gca().invert_yaxis()
    # deal with the legend
    leg = g._legend
    if leg is not None:
        # annoying way to change this
        plt.setp(leg.get_title(), **{'weight':leg_title_weight, 
                 'size':leg_title_size}) 
        plt.setp(leg.get_texts(), fontsize=leg_texts_size)
        # replace labels
        if leg_labels_dict is not None:
            for t in leg.texts:
                t.set_text(leg_labels_dict[t.get_text()])
    
    if filename is not None:
        g.savefig(filename)
        
    return g

def annotate_results(data, lexName, g, w1, w2, vecName=None, simName=None, show_points=False,
                     show_labels=False, markers=None, ms=15):
    '''
    Annotates the figure resulting from `plot_results`, which shows the effects of 
    retrofitting.
    
    Args:
        data (dict or `DataFrame`): a dictionary containing the 
            retrofitting results, or aPandas DataFrame containing the
            results of a single resource in retrofitting
        lexName (str): name of the semantic resource
        g (`seaborn.axisgrid.FacetGrid`): FacetGrid resulting from `plot_results`
        w1 (str or list): the first word in the word pair, or a list of first words
        w2 (str or list): the second word in the word pair, or a list of second words
        vecName (str, optional): name of the word vectors
        simName (str, optional): name of the similarity dataset
        show_points (bool, optional): if True, plots the annotated points in black
        show_labels (bool, optional): if True, shows the word-pair labels
        markers (list, optional): list of markers for the points
        ms (int): marker size
        
    Returns:
        a Seaborn FacetGrid
    '''
    if type(data) == dict:
        if (vecName is None) | (simName is None):
            msg = 'if data is a dictionary then both vecName and simName must be specified'
            raise ValueError(msg)
        df = get_change_subset(data[vecName][simName], lexName)
    elif type(data) == pd.DataFrame:
        # get the subset of the dataframe for the lexicon
        df = get_change_subset(data, lexName)
    else:
        raise ValueError('data must be either a dictionary of a DataFrame')
    if type(w1) == str:
        if type(w2) != str:
            raise ValueError('w1 and w2 must be of the same type')
        w1 = [w1]
        w2 = [w2]
    elif type(w1) == list:
        if type(w2) != list:
            raise ValueError('w1 and w2 must be of the same type')
        if len(w1) != len(w2):
            raise ValueError('w1 and w2 must have the same length')
    else:
        raise ValueError('w1 and w2 must be either strings or a list of strings')
    df = df.assign(**{'Label': ['%s &\n%s' % (x, y) for (x, y) in zip(df['Word1'], df['Word2'])]})
    # get the critical pairs
    labs = ['%s &\n%s' % (x, y) for (x, y) in zip(w1, w2)]
    sub = df[df['Label'].isin(labs)]
    # get the indices (hack)
    inds = [df.index[df['Label'] == l][0] for l in labs]
    sub = sub.loc[inds]
    # get the markers
    if markers is None:
        markers = ['o'] * len(sub)
    # get the x, y values and plot them
    for base, retro, score, lab, m in zip(sub['Base'], sub['Retro'], sub['Score'], labs, markers):
        if show_labels:
            g.facet_axis(0, 0).annotate(lab, xy=(base - 0.1, score + 0.05), fontsize=20, fontweight='bold')
            g.facet_axis(0, 1).annotate(lab, xy=(retro - 0.1, score + 0.05), fontsize=20, fontweight='bold')
        if show_points:
            # debug for visuals
            cmd = 'k' + m
            g.facet_axis(0, 0).plot(base, score, cmd, ms=ms)
            g.facet_axis(0, 1).plot(retro, score, cmd, ms=ms)
    return g

if __name__ == '__main__':
    RERUN = True # boolean indicating if the analysis should be recomputed
    
    sns.set_style('darkgrid')
    version = 1.71 # version of FrameNet to use (compatability with other code)
    print('Using version %s' % version)
    
    # constants
    # retrofitting lexicons
    # code to generate the retrofitting lexicons is not present in this file
    lex_names_main = ['ppdb-xl', 'wordnet_plus', 'framenet_lus']
    lex_keys_main = ['PPDB', 'WN-PLUS', 'FN']
    labeled_main = [False, False, False]
    
#    lex_names_anno = ['framenet-live-fe-nouns']
#    lex_names_anno = ['fn_retro_nouns_first', 'fn_retro_nouns_last', 'fn_retro_nouns_one',
#                      'fn_retro_nouns_all']
    lex_names_anno = ['fn_retro_nouns_last']
    lex_keys_anno = ['FN-ANNO']
#    lex_keys_anno = ['FN-ANNO-FIRST', 'FN-ANNO-LAST', 'FN-ANNO-ONES', 'FN-ANNO-ALL']
    lex_names_main = [PATH_LEX + l for l in lex_names_main]
    lex_names_anno = [PATH_LEX + l for l in lex_names_anno]
    
#    leg_labels_dict = {'PPDB': 'PPDB', 'WN-PLUS': 'WN+', 'FN': 'FN', 'FN-NOUN': 'FN-ANNO'}
    leg_labels_dict = {'PPDB': 'PPDB', 'WN-PLUS': 'WN+', 'FN': 'FN', 'FN-ANNO-FIRST': 'FN-ANNO-FIRST',
                       'FN-ANNO-LAST': 'FN-ANNO-LAST', 'FN-ANNO-ONES': 'FN-ANNO-ONES',
                       'FN-ANNO-ALL': 'FN-ANNO-ALL', 'FN-ANNO': 'FN-ANNO'}

    
    vec_keys = ['NB', 'GloVe', 'SG']
#    vec_keys = ['GloVe', 'SG']
    # XXX: "EVAL" files were cached vectors to speed up the loading process
    vec_names = [PATH_EVAL_DICT[v] for v in vec_keys]
    # XXX: to use the full word vector files (not included), 
    # the files must be added to the folder "vectors". Line 46 needs to be 
    # uncommented so that the code knows SG refers to KeyedVectors
#    vec_names = [PATH_FULL_DICT[v] for v in vec_keys]
    
    sim_keys = ['MT771', 'MEN3K', 'RW', 'SL999']
    # note: these files are labeled "PAIRED" because they contain cached 
    # information
    sim_names = ['MT771_PAIRED', 'MEN3K_PAIRED', 'RW_PAIRED', 'SL999_PAIRED']
    sim_names = [PATH_SIM + s for s in sim_names]
    
    # load the vectors, lexicons and similarity sets
#    baseVecs = load_word_vecs(vec_names, vec_keys, word2vec_key=WORD2VEC_KEY)
    baseVecs = load_word_vecs(vec_names, vec_keys, word2vec_key='')
    lexicons = load_lexicons(lex_names_main, labeled_main, lex_keys_main)
#    lexicons_anno = load_compact_lexicons(lex_names_anno, keys=lex_keys_anno, sep=' ')
    evalData = load_frames(sim_names, sim_keys)
    baseData = get_base_data(evalData, baseVecs, wordMaps=WORD_MAP_DICT, 
                             charMaps=CHAR_MAP_DICT, lowerMap=VEC_LOWER_DICT, 
                             fillZeros=True, word2vec_key='') 
    
    # get the base data
    base_hmean = get_metric_data(baseData, metric='hmean', base=True, subset='all')
    base_rmse = get_metric_data(baseData, metric='rmse', base=True, subset='all')
    
    # TO RERUN THE ANALYSIS FROM SCRACH
    if RERUN:
        retroVecs = retrofit_lexicons(lexicons, baseVecs, word2vec_key='')
        retroVecs, lexicons = retrofit_online_many(lex_names_anno, 
                                                   lex_keys_anno, 
                                                   lexicons, 
                                                   retroVecs, 
                                                   baseVecs, 
                                                   vec_keys, 
                                                   version,
                                                   verbose=True,
                                                   sep=' ')
        # add the grouping information
        evalData = check_if_grouped_many(lexicons, evalData, getPaired=False)
        changeData = get_change_data(evalData, baseVecs, retroVecs, lexicons,
                                     wordMaps=WORD_MAP_DICT, charMaps=CHAR_MAP_DICT,
                                     lowerMap=VEC_LOWER_DICT, fillZeros=True)
    else:
        changeData = load_change_data('results/', post='0403B')
    
    
#    # FIG 1: Spearman, all pairs
#    plot_metrics(changeData, title='Change in Spearman after retrofitting (all pairs)', 
#                 vecName=['GloVe', 'SG'], simName=['MT771', 'MEN3K', 'RW', 'SL999'], 
#                 subset='all', metric='spearman', invert=False, leg_labels_dict=leg_labels_dict, 
#                 text_size=18, leg_texts_size=18, leg_title_size=18, height=5, 
#                 ylabel='Change in Spearman correlation')
#    # FIG 1B: Pearson, all pairs
#    plot_metrics(changeData, title='Change in Pearson after retrofitting (all pairs)', 
#                 vecName=['GloVe', 'SG'], simName=['MT771', 'MEN3K', 'RW', 'SL999'], 
#                 subset='all', metric='pearson', invert=False, leg_labels_dict=leg_labels_dict, 
#                 text_size=18, leg_texts_size=18, leg_title_size=18, height=5, 
#                 ylabel='Change in Pearson correlation')
    
    # FIG 1C: Harmonic Mean, all pairs
    plot_metrics(changeData, title='Change in correlation after retrofitting (all pairs)', 
                 vecName=['GloVe', 'SG'], simName=['MT771', 'MEN3K', 'RW', 'SL999'], 
                 subset='all', metric='hmean', invert=False, leg_labels_dict=leg_labels_dict, 
                 text_size=18, leg_texts_size=18, leg_title_size=18, height=5, 
                 ylabel='Change in harmonic mean of correlations')
    
    # FIG 2: RMSE, all pairs
    plot_metrics(changeData, title='Change in RMSE after retrofitting (all pairs)', 
                 vecName=['GloVe', 'SG'], simName=['MT771', 'MEN3K', 'RW', 'SL999'], 
                 subset='all', metric='rmse', invert=True, leg_labels_dict=leg_labels_dict, 
                 text_size=18, leg_texts_size=18, leg_title_size=18, height=5, 
                 ylabel='Change in RMSE')
    
#    # FIG 3: Spearman, pairs in resource
#    plot_metrics(changeData, title='Change in Spearman after retrofitting (pairs in resource)', 
#             vecName=['GloVe', 'SG'], simName=['MT771', 'MEN3K', 'RW', 'SL999'], 
#             subset='both', metric='spearman', invert=False, leg_labels_dict=leg_labels_dict, 
#             text_size=18, leg_texts_size=18, leg_title_size=18, height=5, 
#             ylabel='Change in Spearman correlation')
#    
#    # FIG 3B: Pearson, pairs in resource
#    plot_metrics(changeData, title='Change in Pearson after retrofitting (pairs in resource)', 
#             vecName=['GloVe', 'SG'], simName=['MT771', 'MEN3K', 'RW', 'SL999'], 
#             subset='both', metric='pearson', invert=False, leg_labels_dict=leg_labels_dict, 
#             text_size=18, leg_texts_size=18, leg_title_size=18, height=5, 
#             ylabel='Change in Pearson correlation')
#    
    # FIG 3C: Harmonic Mean, pairs in resource
    plot_metrics(changeData, title='Change in correlation after retrofitting (pairs in resource)', 
             vecName=['GloVe', 'SG'], simName=['MT771', 'MEN3K', 'RW', 'SL999'], 
             subset='both', metric='hmean', invert=False, leg_labels_dict=leg_labels_dict, 
             text_size=18, leg_texts_size=18, leg_title_size=18, height=5, 
             ylabel='Change in harmonic mean of correlations')
  
    # FIG 4: RMSE, pairs in resource
    plot_metrics(changeData, title='Change in RMSE after retrofitting (pairs in resource)', 
                 vecName=['GloVe', 'SG'], simName=['MT771', 'MEN3K', 'RW', 'SL999'], 
                 subset='both', metric='rmse', invert=True, leg_labels_dict=leg_labels_dict, 
                 text_size=18, leg_texts_size=18, leg_title_size=18, height=5, 
                 ylabel='Change in RMSE')
    
    # FIG 5: FN-ANNO, all pairs
    g = plot_results(changeData, 'FN-ANNO', 'GloVe', 'MT771', 
                     metrics=['spearman', 'rmse'], subset='all', 
                     title='FN-ANNO (all pairs)', s=75, legend_out=False, 
                     xsize=20, ysize=22, tsize=20, showChange=True)
    
    w1 = ['find', 'occasion', 'film']
    w2 = ['occurrence', 'second', 'movie']
    annotate_results(changeData, 'FN-ANNO', g, w1, w2, 'GloVe', 'MT771', 
                     show_points=True, markers=['D', 'X', 's'], ms=18)
    
    # FIG 5B: FN-ANNO, pairs in resource
    g = plot_results(changeData, 'FN-ANNO', 'GloVe', 'MT771', 
                     metrics=['spearman', 'rmse'], subset='both', 
                     title='FN-ANNO (pairs in resource)', s=75, legend_out=False, 
                     xsize=20, ysize=22, tsize=20, showChange=True)
    
    w1 = ['find', 'occasion', 'film']
    w2 = ['occurrence', 'second', 'movie']
    annotate_results(changeData, 'FN-ANNO', g, w1, w2, 'GloVe', 'MT771', 
                     show_points=True, markers=['D', 'X', 's'], ms=18)