Commit ed930658 by VikParuchuri

Merge pull request #8 from MITx/vik/deployment_work

Vik/deployment work
parents 2e81fda2 844d0f7b
#!/usr/bin/env bash
# posix compliant sanity check
if [ -z $BASH ] || [ $BASH = "/bin/sh" ]; then
echo "Please use the bash interpreter to run this script"
exit 1
fi
error() {
printf '\E[31m'; echo "$@"; printf '\E[0m'
}
output() {
printf '\E[36m'; echo "$@"; printf '\E[0m'
}
### START
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
BREW_FILE=$DIR/"brew-formulas.txt"
APT_PKGS_FILE=$DIR/"apt-packages.txt"
case `uname -s` in
[Ll]inux)
command -v lsb_release &>/dev/null || {
error "Please install lsb-release."
exit 1
}
distro=`lsb_release -cs`
case $distro in
maya|lisa|natty|oneiric|precise|quantal)
output "Installing Ubuntu requirements"
# DEBIAN_FRONTEND=noninteractive is required for silent mysql-server installation
export DEBIAN_FRONTEND=noninteractive
# install packages listed in APT_PKGS_FILE
cat $APT_PKGS_FILE | xargs sudo apt-get -y install
;;
*)
error "Unsupported distribution - $distro"
exit 1
;;
esac
;;
Darwin)
if [[ ! -w /usr/local ]]; then
cat<<EO
You need to be able to write to /usr/local for
the installation of brew and brew packages.
Either make sure the group you are in (most likely 'staff')
can write to that directory or simply execute the following
and re-run the script:
$ sudo chown -R $USER /usr/local
EO
exit 1
fi
output "Installing OSX requirements"
if [[ ! -r $BREW_FILE ]]; then
error "$BREW_FILE does not exist, needed to install brew"
exit 1
fi
# brew errors if the package is already installed
for pkg in $(cat $BREW_FILE); do
grep $pkg <(brew list) &>/dev/null || {
output "Installing $pkg"
brew install $pkg
}
done
# paths where brew likes to install python scripts
PATH=/usr/local/share/python:/usr/local/bin:$PATH
command -v pip &>/dev/null || {
output "Installing pip"
easy_install pip
}
if ! grep -Eq ^1.7 <(virtualenv --version 2>/dev/null); then
output "Installing virtualenv >1.7"
pip install 'virtualenv>1.7' virtualenvwrapper
fi
command -v coffee &>/dev/null || {
output "Installing coffee script"
curl --insecure https://npmjs.org/install.sh | sh
npm install -g coffee-script
}
;;
*)
error "Unsupported platform"
exit 1
;;
esac
__author__ = 'vik'
import os
import sys
base_path = os.path.dirname(__file__)
sys.path.append(base_path)
one_up_path=os.path.abspath(os.path.join(os.path.dirname(__file__),'..'))
sys.path.append(one_up_path)
import util_functions
import essay_set
import feature_extractor
import numpy
import math
from sklearn.ensemble import GradientBoostingClassifier
if not base_path.endswith("/"):
base_path=base_path+"/"
filenames = ['LSQ_W09_60_MLT.tsv',
'LSQ_W10_22_a.tsv',
'LSQ_W11_21_MLT.tsv',
]
for filename in filenames:
base_name = base_path + filename
print base_name
sa_val = file(base_name)
scores=[]
texts=[]
lines=sa_val.readlines()
eset=essay_set.EssaySet(type="train")
for i in xrange(1,len(lines)):
score,text=lines[i].split("\t\"")
scores.append(int(score))
texts.append(text)
eset.add_essay(text,int(score))
#if int(score)==0:
# eset.generate_additional_essays(text,int(score))
extractor=feature_extractor.FeatureExtractor()
extractor.initialize_dictionaries(eset)
train_feats=extractor.gen_feats(eset)
clf=GradientBoostingClassifier(n_estimators=100, learn_rate=.05,max_depth=4, random_state=1,min_samples_leaf=3)
cv_preds=util_functions.gen_cv_preds(clf,train_feats,scores, num_chunks = int(math.floor(len(texts)/2)))
err=numpy.mean(numpy.abs(numpy.array(cv_preds)-scores))
print err
kappa=util_functions.quadratic_weighted_kappa(list(cv_preds),scores)
print kappa
outfile=open(filename + "_cvout.tsv",'w+')
outfile.write("cv_pred" + "\t" + "actual\n")
for i in xrange(0,len(cv_preds)):
outfile.write("{0}\t{1}\n".format(str(cv_preds[i]),str(scores[i])))
outfile.close()
......@@ -269,12 +269,12 @@ def gen_cv_preds(clf, arr, sel_score, num_chunks=3):
preds = []
set_score = numpy.asarray(sel_score, dtype=numpy.int)
chunk_vec = numpy.asarray(range(0, len(chunks)))
for i in range(0, len(chunks)):
for i in xrange(0, len(chunks)):
loop_inds = list(
chain.from_iterable([chunks[int(z)] for z, m in enumerate(range(0, len(chunks))) if int(z) != i]))
sim_fit = clf.fit(arr[loop_inds], set_score[loop_inds])
preds.append(sim_fit.predict(arr[chunks[i]]))
all_preds = numpy.concatenate((preds[0], preds[1], preds[2]), axis=0)
preds.append(list(sim_fit.predict(arr[chunks[i]])))
all_preds = list(chain(*preds))
return(all_preds)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment