export CUDA_VISIBLE_DEVICES=2
data_bin_dir=examples/translation/wmt14_en_de_small/data-bin
model_dir=./adapt_model_small_0_sentence_gram_3
python train.py $data_bin_dir \
    --arch adapt_transformer_wmt_en_de --share-all-embeddings \
    --optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 --lr-scheduler inverse_sqrt \
    --warmup-init-lr 1e-07 --warmup-updates 4000 --lr 0.0007 --min-lr 1e-09 \
    --weight-decay 0.0 --criterion oracle_label_smoothed_cross_entropy --label-smoothing 0.1 --no-progress-bar\
    --max-tokens 8192 --update-freq 1 --log-format tqdm --max-epoch 64 \
    --log-interval 100  --keep-interval-updates 10 --save-interval 1  --seed 1111  \
    --decay-k 10 --use-greed-gumbel-noise --gumbel-noise 0.8 \
    --distributed-port 31111 --distributed-world-size 1 --ddp-backend=no_c10d \
    --train-subset train \
    --valid-subset valid \
    --train_adapt_type 0 \
    --use_adapt_n_gram --adapt_n_gram_number 3\
    --source-lang en --target-lang de --save-dir $model_dir | tee -a $model_dir/training.log \
    
