*** Way to get IBM model 1 score
CORPUS=METEO
##########################
#estimating IBM Model 1 with GIZA++
# First step of Moses training is symmetric
perl train-factored-phrase-model.perl -bin-dir . -scripts-root-dir . -root-dir . -corpus $CORPUS -f f -e e -first-step 1 -last-step 1 -alignment grow-diag-final-and -lm 0:3:lmfile >& log.train
mkdir -p ./giza.f-e
./snt2cooc.out ./corpus/e.vcb ./corpus/f.vcb ./corpus/f-e-int-train.snt > ./giza.f-e/f-e.cooc
# GIZA++ alignment is not symmetric
./GIZA++ -CoocurrenceFile ./giza.f-e/f-e.cooc -c ./corpus/f-e-int-train.snt -m1 19 -m2 0 -m3 0 -m4 0 -mh 0 -m5 0 -model1dumpfrequency 1 -nodumps 0 -o ./giza.f-e/f-e -onlyaldumps 0 -s ./corpus/e.vcb -t ./corpus/f.vcb -emprobforempty 0.0 -probsmooth 0.0 >& LOG.f-e
# Output file: giza.f-e/f-e.t1.X
# Format:
# e_code f_code P(f_word | e_word)
# With this script you transform codes into words (looking up into the vocabulary built in the first step
cat giza.f-e/f-e.t1.19 | perl code2word.pl ./corpus/e.vcb ./corpus/f.vcb > f-e.ibm1.giza
##########################
#estimating IBM Model 1 with a standalone software
perl ibm1.pl 20 $CORPUS.f $CORPUS.e > f-e.ibm1.standalone
No comments:
Post a Comment