import numpy as np
import pandas as pd
import math
import nltk
from tqdm import tqdm
from nltk.corpus import wordnet as wn
from nltk import word_tokenize
from nltk.metrics import edit_distance

def tokenize(q1, q2):
    return word_tokenize(q1), word_tokenize(q2)

def posTag(q1, q2):
    return nltk.pos_tag(q1), nltk.pos_tag(q2)

class Lesk(object):

    def __init__(self, sentence):
        self.sentence = sentence
        self.meanings = {}
        for word in sentence:
            self.meanings[word] = ''

    def getSenses(self, word):
        return wn.synsets(word.lower())

    def getGloss(self, senses):
        gloss = {}
        for sense in senses:
            gloss[sense.name()] = []
        for sense in senses:
            gloss[sense.name()] += word_tokenize(sense.definition())
        return gloss

    def getAll(self, word):
        senses = self.getSenses(word)
        if senses == []:
            return {word.lower(): senses}
        return self.getGloss(senses)

    def Score(self, set1, set2):
        overlap = 0
        for word in set1:
            if word in set2:
                overlap += 1

        return overlap

    def overlapScore(self, word1, word2):

        gloss_set1 = self.getAll(word1)
        if self.meanings[word2] == '':
            gloss_set2 = self.getAll(word2)
        else:
            gloss_set2 = self.getGloss([wn.synset(self.meanings[word2])])

        score = {}
        for i in gloss_set1.keys():
            score[i] = 0
            for j in gloss_set2.keys():
                score[i] += self.Score(gloss_set1[i], gloss_set2[j])

        bestSense = None
        max_score = 0
        for i in gloss_set1.keys():
            if score[i] > max_score:
                max_score = score[i]
                bestSense = i

        return bestSense, max_score

    def lesk(self, word, sentence):
        maxOverlap = 0
        context = sentence
        word_sense = []
        meaning = {}

        senses = self.getSenses(word)

        for sense in senses:
            meaning[sense.name()] = 0

        for word_context in context:
            if not word == word_context:
                score = self.overlapScore(word, word_context)
                if score[0] == None:
                    continue
                meaning[score[0]] += score[1]

        if senses == []:
            return word, None

        self.meanings[word] = max(meaning.keys(), key=lambda x: meaning[x])

        return word, self.meanings[word]

def edit(word1, word2):
    if float(edit_distance(word1, word2)) == 0.0:
        return 0.0
    return 1.0 / float(edit_distance(word1, word2))

def computePathSim(q1, q2):

    R = np.zeros((len(q1), len(q2)))
        
    for i in range(len(q1)):
        for j in range(len(q2)):
            if q1[i][1] == None or q2[j][1] == None:
                sim = edit(q1[i][0], q2[j][0])
            else:
                sim = wn.path_similarity(wn.synset(q1[i][1]), wn.synset(q2[j][1]))

            if sim == None:
                sim = edit(q1[i][0], q2[j][0])

            R[i, j] = sim

    return R

def computeLchSim(q1, q2):

    R = np.zeros((len(q1), len(q2)))
        
    for i in range(len(q1)):
        for j in range(len(q2)):
            if q1[i][1] == None or q2[j][1] == None:
                sim = edit(q1[i][0], q2[j][0])
            else:
                if q1[i][1].split('.')[1] != q2[j][1].split('.')[1]:
                    sim = 0
                else:
                    sim = wn.lch_similarity(wn.synset(q1[i][1]), wn.synset(q2[j][1]))

            if sim == None:
                sim = edit(q1[i][0], q2[j][0])

            R[i, j] = sim

    return R

def computeWupSim(q1, q2):

    R = np.zeros((len(q1), len(q2)))
        
    for i in range(len(q1)):
        for j in range(len(q2)):
            if q1[i][1] == None or q2[j][1] == None:
                sim = edit(q1[i][0], q2[j][0])
            else:
                sim = wn.wup_similarity(wn.synset(q1[i][1]), wn.synset(q2[j][1]))

            if sim == None:
                sim = edit(q1[i][0], q2[j][0])

            R[i, j] = sim

    return R

def overallSim(q1, q2, R):
    sum_X = 0.0
    sum_Y = 0.0
    for i in range(len(q1)):
        max_i = 0.0
        for j in range(len(q2)):
            if R[i, j] > max_i:
                max_i = R[i, j]
        sum_X += max_i
    for j in range(len(q2)):
        max_j = 0.0
        for i in range(len(q1)):
            if R[i, j] > max_j:
                max_j = R[i, j]
        sum_Y += max_j
    if (float(len(q1)) + float(len(q2))) == 0.0:
        return 0.0   
    overall = (sum_X + sum_Y) / (2 * (float(len(q1)) + float(len(q2))))

    return overall

def semanticSimilarity(q1, q2, metric):

    tokens_q1, tokens_q2 = tokenize(q1, q2)
    tag_q1, tag_q2 = posTag(tokens_q1, tokens_q2)

    sentence = []
    for i, word in enumerate(tag_q1):
        if 'NN' in word[1] or 'JJ' in word[1] or 'VB' in word[1]:
            sentence.append(word[0])
    sense1 = Lesk(sentence)
    sentence1Means = []
    for word in sentence:
        sentence1Means.append(sense1.lesk(word, sentence))

    sentence = []
    for i, word in enumerate(tag_q2):
        if 'NN' in word[1] or 'JJ' in word[1] or 'VB' in word[1]:
            sentence.append(word[0])
    sense2 = Lesk(sentence)
    sentence2Means = []
    for word in sentence:
        sentence2Means.append(sense2.lesk(word, sentence))

    R = metric(sentence1Means, sentence2Means)
    
    return overallSim(sentence1Means, sentence2Means, R)