{
 "metadata": {
  "name": "",
  "signature": "sha256:9964dcc60b16a9ada446a98196d1a79894d8e4a13dadb296907234ed51e98847"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from sklearn.externals import joblib"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cfier, labels = joblib.load(\"GradientBoostingClassifier-n_estimators=200,max_depth=2,min_samples_split=10,min_samples_leaf=5,max_features='sqrt',exclude=['.simpleScore','\\!.*'].pkl\")"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 23
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "cfier"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 24,
       "text": [
        "GradientBoostingClassifier(init=None, learning_rate=0.1, loss='deviance',\n",
        "              max_depth=2, max_features='sqrt', max_leaf_nodes=None,\n",
        "              min_samples_leaf=5, min_samples_split=10,\n",
        "              min_weight_fraction_leaf=0.0, n_estimators=200,\n",
        "              random_state=None, subsample=1.0, verbose=0,\n",
        "              warm_start=False)"
       ]
      }
     ],
     "prompt_number": 24
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from pandas import read_table, DataFrame"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 25
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print DataFrame(cfier.feature_importances_, columns = [\"Imp\"], index=labels).sort(['Imp'], ascending = False)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "                                   Imp\n",
        "%passageLogScore              0.033765\n",
        "%simpleScore                  0.029856\n",
        "%spWordNet                    0.029451\n",
        "@resultLogScore               0.027105\n",
        "%clOCSuffixedScore            0.026031\n",
        "%clOCMatchScore               0.025232\n",
        "%originDocTitle               0.025153\n",
        "%clOCSubstredScore            0.024124\n",
        "%originPsgByClueLAT           0.022690\n",
        "%originPsgNE                  0.022410\n",
        "%clOSubstredScore             0.021850\n",
        "%tyCorSpQHit                  0.020869\n",
        "%originDBpONoClue             0.018971\n",
        "%tyCorAQuantityCD             0.018720\n",
        "%originPsgByClueConcept       0.018101\n",
        "%clOSuffixedScore             0.017846\n",
        "%tyCorADBpProperty            0.015705\n",
        "%clOCPrefixedScore            0.015058\n",
        "%LATDBpType                   0.014998\n",
        "%originFBOClueLAT             0.014362\n",
        "%clOCPrefixingScore           0.014079\n",
        "%tyCorADBp                    0.013449\n",
        "%tyCorSpAHit                  0.012999\n",
        "!clOSubstredScore             0.012286\n",
        "%AF_PsgDistClueLAT            0.012239\n",
        "%originPsgByClueToken         0.012115\n",
        "%clOSubstringScore            0.010987\n",
        "%LATQuantityCD                0.010930\n",
        "@clOCSubstredScore            0.010764\n",
        "%originFBONoClue              0.010749\n",
        "...                                ...\n",
        "!tyCorSpNoHit                 0.000000\n",
        "@LATQNoWordNet                0.000000\n",
        "@noTyCor                      0.000000\n",
        "@originFBOClueSubjectToken    0.000000\n",
        "%AF_PsgDistClueToken          0.000000\n",
        "%originFBOClueSubjectToken    0.000000\n",
        "!originFBOClueSubjectToken    0.000000\n",
        "@originFBOClueConcept         0.000000\n",
        "!originFBOClueConcept         0.000000\n",
        "@originConceptBySubject       0.000000\n",
        "@originConceptByLAT           0.000000\n",
        "!originConceptByLAT           0.000000\n",
        "@originConceptByNE            0.000000\n",
        "!originConceptByNE            0.000000\n",
        "!originMultiple               0.000000\n",
        "@AF_PsgDistClueToken          0.000000\n",
        "!AF_PsgDistClueToken          0.000000\n",
        "!AF_PsgDistClueConcept        0.000000\n",
        "%AF_PsgDistCluePhrase         0.000000\n",
        "!AF_PsgDistCluePhrase         0.000000\n",
        "@AF_PsgDistClueSV             0.000000\n",
        "%AF_PsgDistClueSV             0.000000\n",
        "!AF_PsgDistClueSV             0.000000\n",
        "!AF_PsgDistClueNE             0.000000\n",
        "@AF_PsgDistClueSubjectToken   0.000000\n",
        "%AF_PsgDistClueSubjectToken   0.000000\n",
        "!AF_PsgDistClueSubjectToken   0.000000\n",
        "@AF_PsgDistClueSubjectPhrase  0.000000\n",
        "!AF_PsgDistClueSubjectPhrase  0.000000\n",
        "!phase1Score                  0.000000\n",
        "\n",
        "[331 rows x 1 columns]\n"
       ]
      }
     ],
     "prompt_number": 26
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import sklearn.tree\n",
      "import os"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 16
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for i, tree in enumerate(cfier.estimators_):\n",
      "    with open('gradboost_tree_%03d.dot' % (i,), 'w') as dotfile:\n",
      "        sklearn.tree.export_graphviz(tree[0], dotfile,  feature_names=labels)\n",
      "    os.system('dot -Tpng gradboost_tree_%03d.dot -o gradboost_tree_%03d.png' % (i,i))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 17
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}