Merge pull request #8 from MITx/vik/deployment_work

Vik/deployment work

Merge pull request #8 from MITx/vik/deployment_work
Vik/deployment work
ed930658 · VikParuchuri · 2e81fda2 · 844d0f7b · ed930658 · ed930658
Commit ed930658 authored Feb 11, 2013 by VikParuchuri
Show whitespace changes
Inline Side-by-side

Showing with 165 additions and 3 deletions

install_system_req.sh
+103 -0

tests/__init__.py
+1 -0

tests/test_cv_full.py
+58 -0

util_functions.py
+3 -3

No files found.
--- a/install_system_req.sh
+++ b/install_system_req.sh
+#!/usr/bin/env bash
+
+# posix compliant sanity check
+if [ -z $BASH ] || [  $BASH = "/bin/sh" ]; then
+    echo "Please use the bash interpreter to run this script"
+    exit 1
+fi
+
+error() {
+      printf '\E[31m'; echo "$@"; printf '\E[0m'
+}
+output() {
+      printf '\E[36m'; echo "$@"; printf '\E[0m'
+}
+
+
+### START
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+BREW_FILE=$DIR/"brew-formulas.txt"
+APT_PKGS_FILE=$DIR/"apt-packages.txt"
+
+case `uname -s` in
+    [Ll]inux)
+        command -v lsb_release &>/dev/null || {
+            error "Please install lsb-release."
+            exit 1
+        }
+
+        distro=`lsb_release -cs`
+        case $distro in
+            maya|lisa|natty|oneiric|precise|quantal)
+                output "Installing Ubuntu requirements"
+
+                # DEBIAN_FRONTEND=noninteractive is required for silent mysql-server installation
+                export DEBIAN_FRONTEND=noninteractive
+
+                # install packages listed in APT_PKGS_FILE
+                cat $APT_PKGS_FILE | xargs sudo apt-get -y install
+                ;;
+            *)
+                error "Unsupported distribution - $distro"
+                exit 1
+               ;;
+        esac
+        ;;
+    Darwin)
+
+        if [[ ! -w /usr/local ]]; then
+            cat<<EO
+
+        You need to be able to write to /usr/local for
+        the installation of brew and brew packages.
+
+        Either make sure the group you are in (most likely 'staff')
+        can write to that directory or simply execute the following
+        and re-run the script:
+
+        $ sudo chown -R $USER /usr/local
+EO
+
+            exit 1
+
+        fi
+
+        output "Installing OSX requirements"
+        if [[ ! -r $BREW_FILE ]]; then
+            error "$BREW_FILE does not exist, needed to install brew"
+            exit 1
+        fi
+
+        # brew errors if the package is already installed
+        for pkg in $(cat $BREW_FILE); do
+            grep $pkg <(brew list) &>/dev/null || {
+                output "Installing $pkg"
+                brew install $pkg
+            }
+        done
+
+        # paths where brew likes to install python scripts
+        PATH=/usr/local/share/python:/usr/local/bin:$PATH
+
+        command -v pip &>/dev/null || {
+            output "Installing pip"
+            easy_install pip
+        }
+
+        if ! grep -Eq ^1.7 <(virtualenv --version 2>/dev/null); then
+            output "Installing virtualenv >1.7"
+            pip install 'virtualenv>1.7' virtualenvwrapper
+        fi
+
+        command -v coffee &>/dev/null || {
+            output "Installing coffee script"
+            curl --insecure https://npmjs.org/install.sh | sh
+            npm install -g coffee-script
+        }
+        ;;
+    *)
+        error "Unsupported platform"
+        exit 1
+        ;;
+esac
--- a/tests/__init__.py
+++ b/tests/__init__.py
+__author__ = 'vik'
--- a/tests/test_cv_full.py
+++ b/tests/test_cv_full.py
+import os
+import sys
+base_path = os.path.dirname(__file__)
+sys.path.append(base_path)
+
+one_up_path=os.path.abspath(os.path.join(os.path.dirname(__file__),'..'))
+sys.path.append(one_up_path)
+
+import util_functions
+import essay_set
+import feature_extractor
+import numpy
+import math
+
+from sklearn.ensemble import GradientBoostingClassifier
+
+if not base_path.endswith("/"):
+    base_path=base_path+"/"
+
+filenames = ['LSQ_W09_60_MLT.tsv',
+             'LSQ_W10_22_a.tsv',
+              'LSQ_W11_21_MLT.tsv',
+            ]
+
+for filename in filenames:
+    base_name = base_path + filename
+    print base_name
+    sa_val = file(base_name)
+    scores=[]
+    texts=[]
+    lines=sa_val.readlines()
+    eset=essay_set.EssaySet(type="train")
+    for i in xrange(1,len(lines)):
+        score,text=lines[i].split("\t\"")
+        scores.append(int(score))
+        texts.append(text)
+        eset.add_essay(text,int(score))
+        #if int(score)==0:
+        #    eset.generate_additional_essays(text,int(score))
+    extractor=feature_extractor.FeatureExtractor()
+    extractor.initialize_dictionaries(eset)
+    train_feats=extractor.gen_feats(eset)
+    clf=GradientBoostingClassifier(n_estimators=100, learn_rate=.05,max_depth=4, random_state=1,min_samples_leaf=3)
+    cv_preds=util_functions.gen_cv_preds(clf,train_feats,scores, num_chunks = int(math.floor(len(texts)/2)))
+    err=numpy.mean(numpy.abs(numpy.array(cv_preds)-scores))
+    print err
+    kappa=util_functions.quadratic_weighted_kappa(list(cv_preds),scores)
+    print kappa
+
+    outfile=open(filename + "_cvout.tsv",'w+')
+    outfile.write("cv_pred" + "\t" + "actual\n")
+    for i in xrange(0,len(cv_preds)):
+        outfile.write("{0}\t{1}\n".format(str(cv_preds[i]),str(scores[i])))
+    outfile.close()
+
+
+
+
--- a/util_functions.py
+++ b/util_functions.py
@@ -269,12 +269,12 @@ def gen_cv_preds(clf, arr, sel_score, num_chunks=3):
    preds = []
    set_score = numpy.asarray(sel_score, dtype=numpy.int)
    chunk_vec = numpy.asarray(range(0, len(chunks)))
-    for i in range(0, len(chunks)):
+    for i in xrange(0, len(chunks)):
        loop_inds = list(
            chain.from_iterable([chunks[int(z)] for z, m in enumerate(range(0, len(chunks))) if int(z) != i]))
        sim_fit = clf.fit(arr[loop_inds], set_score[loop_inds])
-        preds.append(sim_fit.predict(arr[chunks[i]]))
-    all_preds = numpy.concatenate((preds[0], preds[1], preds[2]), axis=0)
+        preds.append(list(sim_fit.predict(arr[chunks[i]])))
+    all_preds = list(chain(*preds))
    return(all_preds)