from flair.embeddings import TransformerWordEmbeddings, StackedEmbeddings, CharacterEmbeddings, MuseCrosslingualEmbeddings, WordEmbeddings, BytePairEmbeddings
from flair.data import Corpus
from flair.datasets import ColumnCorpus
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer
from hyperopt import hp
from flair.hyperparameter.param_selection import SearchSpace, Parameter
from flair.hyperparameter.param_selection import SequenceTaggerParamSelector, OptimizationValue
import argparse
from pathlib import Path



parser = argparse.ArgumentParser()
parser.add_argument('--data_folder', type=str, help='Path to file with training data')
parser.add_argument('--out', type=str, help='Path to folder for out')
parser.add_argument('--model', type=str, help='Path to where model lives')

args = parser.parse_args()

# define columns
columns = {0: 'text', 1: 'borrowing'}

# this is the folder in which train, test and dev files reside
print("Creating embeddings...")

"""
embeddings = StackedEmbeddings(
    [
        # standard FastText word embeddings for English
        #MuseCrosslingualEmbeddings(),
        CharacterEmbeddings(),
        TransformerWordEmbeddings('dccuchile/bert-base-spanish-wwm-cased'), 
        TransformerWordEmbeddings('bert-base-cased'),
        #TransformerWordEmbeddings('bert-base-multilingual-cased'),
        BytePairEmbeddings('en'),
        BytePairEmbeddings('es'),
    ]
)
"""
embeddings = StackedEmbeddings(
    [
        # standard FastText word embeddings for English
        #MuseCrosslingualEmbeddings(),
        CharacterEmbeddings(),
        TransformerWordEmbeddings('dccuchile/bert-base-spanish-wwm-cased'), 
        TransformerWordEmbeddings('bert-base-cased'),
        #TransformerWordEmbeddings('bert-base-multilingual-cased'),
        #TransformerWordEmbeddings('sagorsarker/codeswitch-spaeng-lid-lince'),
        BytePairEmbeddings('en'),
        BytePairEmbeddings('es'),
    ]
)

print("Creating corpus...")
# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(args.data_folder, columns,
                              train_file='training.conll',
                              test_file='test.conll',
                              dev_file='dev.conll')
                              
print("Corpus created...")


model = SequenceTagger.load(args.model)
model.embeddings = embeddings

test_results, test_loss = model.evaluate(
    corpus.test,
    mini_batch_size=32,
    out_path= Path(args.out) / "final_test.tsv",
    embedding_storage_mode="cpu"
)




