#! /bin/csh

# runBitextCotrain.sh
# Shane Bergsma
# December 6, 2010

if ($#argv != 1) then
	echo "Must pass how much training to start with"	
	exit
endif

set recreate=0;

# Initialize the sets:
# Things involved in view#1:
cp Data/FVs/bitext.b1.train Data/Cotrain/bitext.1.train
cp Data/FVs/bitext.b1.dev Data/Cotrain/bitext.1.dev
cp Data/FVs/bitext.b1.test Data/Cotrain/bitext.1.test
cp Data/FVs/bitext.b1.unlab Data/Cotrain/bitext.1.unlab
cp Data/FVs/wsj.dev Data/Cotrain/wsj.dev
cp Data/FVs/wsj.test Data/Cotrain/wsj.test
cp Data/FVs/wsj.train Data/Cotrain/wsj.train  # So you can augment it later

# Things involved in view#2:
cp Data/FVs/bitext.b2.train Data/Cotrain/bitext.2.train
cp Data/FVs/bitext.b2.dev Data/Cotrain/bitext.2.dev
cp Data/FVs/bitext.b2.test Data/Cotrain/bitext.2.test
cp Data/FVs/bitext.b2.unlab Data/Cotrain/bitext.2.unlab

# Note the two unlab sets change in tandem

# Plus we also evaluate performance on our dev sets.  These change
# each time as the feature lists change.

# New: generate all the fvs in advance:

if ($recreate) then

echo "Generating the feature lists"
cat Data/Cotrain/bitext.1.train Data/Cotrain/bitext.1.unlab | sed 's/^[01] //' | sed 's/#.*//' | tr ' ' '\n' | grep . | sed 's/:[^:][^:]*//g' | sort | uniq -c | sort -nr | awk '$1 > 0' | awk '{print $2}' > Data/Cotrain/bitext.1.train.featList
cat Data/Cotrain/bitext.2.train Data/Cotrain/bitext.2.unlab | sed 's/^[01] //' | sed 's/#.*//' | tr ' ' '\n' | grep . | sed 's/:[^:][^:]*//g' | sort | uniq -c | sort -nr | awk '$1 > 0' | awk '{print $2}' > Data/Cotrain/bitext.2.train.featList

endif

if ($recreate) then

############## 3) VECTORIZING ##############

echo "Vectorizing the FVs:"

cat Data/Cotrain/bitext.1.train | Tools/vectorize.pl -f Data/Cotrain/bitext.1.train.featList > Data/Cotrain/Cache/bitext.1.train.fvs
cat Data/Cotrain/bitext.1.dev | Tools/vectorize.pl -f Data/Cotrain/bitext.1.train.featList > Data/Cotrain/Cache/bitext.1.dev.fvs
cat Data/Cotrain/bitext.1.test | Tools/vectorize.pl -f Data/Cotrain/bitext.1.train.featList > Data/Cotrain/Cache/bitext.1.test.fvs
cat Data/Cotrain/bitext.1.unlab | Tools/vectorize.pl -f Data/Cotrain/bitext.1.train.featList > Data/Cotrain/Cache/bitext.1.unlab.fvs
cat Data/Cotrain/wsj.dev | Tools/vectorize.pl -f Data/Cotrain/bitext.1.train.featList > Data/Cotrain/Cache/wsj.dev.fvs
cat Data/Cotrain/wsj.test | Tools/vectorize.pl -f Data/Cotrain/bitext.1.train.featList > Data/Cotrain/Cache/wsj.test.fvs
cat Data/Cotrain/wsj.train | Tools/vectorize.pl -f Data/Cotrain/bitext.1.train.featList > Data/Cotrain/Cache/wsj.train.fvs

cat Data/Cotrain/bitext.2.train | Tools/vectorize.pl -f Data/Cotrain/bitext.2.train.featList > Data/Cotrain/Cache/bitext.2.train.fvs
cat Data/Cotrain/bitext.2.dev | Tools/vectorize.pl -f Data/Cotrain/bitext.2.train.featList > Data/Cotrain/Cache/bitext.2.dev.fvs
cat Data/Cotrain/bitext.2.test | Tools/vectorize.pl -f Data/Cotrain/bitext.2.train.featList > Data/Cotrain/Cache/bitext.2.test.fvs
cat Data/Cotrain/bitext.2.unlab | Tools/vectorize.pl -f Data/Cotrain/bitext.2.train.featList > Data/Cotrain/Cache/bitext.2.unlab.fvs

endif

head -n $1 Data/Cotrain/Cache/bitext.1.train.fvs > Data/Cotrain/bitext.1.train.fvs
cp Data/Cotrain/Cache/bitext.1.dev.fvs Data/Cotrain/bitext.1.dev.fvs
cp Data/Cotrain/Cache/bitext.1.test.fvs Data/Cotrain/bitext.1.test.fvs
cp Data/Cotrain/Cache/bitext.1.unlab.fvs Data/Cotrain/bitext.1.unlab.fvs
cp Data/Cotrain/Cache/wsj.dev.fvs Data/Cotrain/wsj.dev.fvs
cp Data/Cotrain/Cache/wsj.test.fvs Data/Cotrain/wsj.test.fvs
cp Data/Cotrain/Cache/wsj.train.fvs Data/Cotrain/wsj.train.fvs

head -n $1 Data/Cotrain/Cache/bitext.2.train.fvs > Data/Cotrain/bitext.2.train.fvs
cp Data/Cotrain/Cache/bitext.2.dev.fvs   Data/Cotrain/bitext.2.dev.fvs
cp Data/Cotrain/Cache/bitext.2.test.fvs  Data/Cotrain/bitext.2.test.fvs
cp Data/Cotrain/Cache/bitext.2.unlab.fvs Data/Cotrain/bitext.2.unlab.fvs

#Scripts/runBitextCotrain.pl -n 200 -f bitext -u 750 -p 0.57 -a 50 -c 1e-6_1e-5_1e-4_1e-3_1e-2_1e-1_1e0_1e1_1e2
#Scripts/runBitextCotrain.pl -n 200 -f bitext -u 750 -p 0.57 -a 50 -c 1e-1
Scripts/runBitextCotrain.pl -n 53 -f bitext -u 750 -p 0.57 -a 50 -c 1e-1
#Scripts/runBitextCotrain.pl -n 200 -f bitext -u 750 -p 0.43 -a 50 -c 1e-1
