#!/bin/bash

# Request half an hour of runtime:
#SBATCH --time=1-23:59:00

# Ask for the GPU partition and 1 GPU
#SBATCH --partition=gpu-he --gres=gpu:1
#SBATCH --exclude=gpu1404,gpu1405,gpu1211,gpu1212

# Default resources are 1 core with 2.8GB of memory.
#SBATCH --ntasks=8

# Use more memory (10GB) (CPU RAM):
#SBATCH --mem=50g

# Specify a job name:
#SBATCH -J exp-302-ctg-bloomz_ctg_en

# Specify an output file
#SBATCH -o /users/zyong2/data/zyong2/scaling/logs/log-302-ctg/bloomz_ctg_en.out
#SBATCH -e /users/zyong2/data/zyong2/scaling/logs/log-302-ctg/bloomz_ctg_en.err

module load python/3.9.0
module load gitlfs/2.7.1
module load cuda/11.8.0
module load gcc/10.2

source /users/zyong2/data/zyong2/scaling/env-mistral-ctg/bin/activate

nvidia-smi
echo "Testing"

# CUDA_VISIBLE_DEVICES=0,1,2,3
# accelerate launch \
#     --config_file /users/zyong2/data/zyong2/scaling/scripts/exp-302-ctg/accelerate_config.yaml \
python3 \
	/users/zyong2/data/zyong2/scaling/scripts/exp-302-ctg/mistral_train.py \
	--model bigscience/bloomz-7b1 \
	--dataset /users/zyong2/data/zyong2/scaling/data/processed/302-ctg/dataset/bloomz-7b1-sa-en.txt \
	--output_dir /users/zyong2/data/zyong2/scaling/data/processed/302-ctg/bloomz-7b1-sa-en-ctg/ \
	--logging_steps 500 \
	--eval_steps 500 \
	--save_steps 500 \
	--num_train_epochs 10

#	--output_dir /users/zyong2/data/zyong2/scaling/data/processed/302-ctg/bloomz-results-id/ \

# NCCL_DEBUG=INFO python3 -m torch.distributed.launch --nproc_per_node 2 \
#     /users/zyong2/data/zyong2/scaling/scripts/exp-302-ctg/mistral_train.py \
#     --dataset /users/zyong2/data/zyong2/scaling/data/processed/302-ctg/dataset/mini-mistral-7b-enwiki.v1.txt \
#     --output_dir /users/zyong2/data/zyong2/scaling/data/processed/302-ctg/results

# python -m torch.distributed.run --nproc_per_node 2 --nnodes 1 /users/zyong2/data/zyong2/scaling/scripts/exp-302-ctg/torch-distributed-gpu-test.py

# python3 \
#     /users/zyong2/data/zyong2/scaling/scripts/exp-302-ctg/mistral_train.py \
#     --dataset /users/zyong2/data/zyong2/scaling/data/processed/302-ctg/dataset/mini-mistral-7b-enwiki.v1.txt \
#     --output_dir /users/zyong2/data/zyong2/scaling/data/processed/302-ctg/results
