Monday, 1 December 2014

IBM model 1

*** Way to get IBM model 1 score

CORPUS=METEO

##########################
#estimating IBM Model 1 with GIZA++

# First step of Moses training is symmetric
perl train-factored-phrase-model.perl -bin-dir . -scripts-root-dir . -root-dir . -corpus $CORPUS -f f -e e -first-step 1 -last-step 1 -alignment grow-diag-final-and -lm 0:3:lmfile >& log.train

mkdir -p ./giza.f-e

./snt2cooc.out ./corpus/e.vcb ./corpus/f.vcb ./corpus/f-e-int-train.snt > ./giza.f-e/f-e.cooc

# GIZA++ alignment is not symmetric
./GIZA++ -CoocurrenceFile ./giza.f-e/f-e.cooc -c ./corpus/f-e-int-train.snt -m1 19 -m2 0 -m3 0 -m4 0 -mh 0 -m5 0 -model1dumpfrequency 1 -nodumps 0 -o ./giza.f-e/f-e -onlyaldumps 0 -s ./corpus/e.vcb -t ./corpus/f.vcb -emprobforempty 0.0 -probsmooth 0.0 >& LOG.f-e
# Output file: giza.f-e/f-e.t1.X
# Format:
# e_code f_code P(f_word | e_word)

# With this script you transform codes into words (looking up into the vocabulary built in the first step
cat giza.f-e/f-e.t1.19 | perl code2word.pl ./corpus/e.vcb ./corpus/f.vcb > f-e.ibm1.giza

##########################
#estimating IBM Model 1 with a standalone software
perl ibm1.pl 20 $CORPUS.f $CORPUS.e > f-e.ibm1.standalone

No comments:

Post a Comment