Created
October 19, 2015 14:21
-
-
Save jeffatennis/b7fc6501d80647968516 to your computer and use it in GitHub Desktop.
Marriage Evaluation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "###Supervised Learning: Logistic Regression\n", | |
| "\n", | |
| "-In this code, we use a simple binary dataset (i.e. emails classified as either \"junk\" or \"not junk\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np\n", | |
| "import pandas as pd\n", | |
| "from pandas import Series, DataFrame\n", | |
| "\n", | |
| "import math\n", | |
| "import matplotlib.pyplot as plt\n", | |
| "import seaborn as sns\n", | |
| "sns.set_style('whitegrid')\n", | |
| "%matplotlib inline\n", | |
| "\n", | |
| "from sklearn.linear_model import LogisticRegression\n", | |
| "from sklearn.cross_validation import train_test_split\n", | |
| "\n", | |
| "from sklearn import metrics\n", | |
| "\n", | |
| "import statsmodels.api as sm" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>rate_marriage</th>\n", | |
| " <th>age</th>\n", | |
| " <th>yrs_married</th>\n", | |
| " <th>children</th>\n", | |
| " <th>religious</th>\n", | |
| " <th>educ</th>\n", | |
| " <th>occupation</th>\n", | |
| " <th>occupation_husb</th>\n", | |
| " <th>affairs</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 3</td>\n", | |
| " <td> 32</td>\n", | |
| " <td> 9.0</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 17</td>\n", | |
| " <td> 2</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 0.111111</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 3</td>\n", | |
| " <td> 27</td>\n", | |
| " <td> 13.0</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 14</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> 3.230769</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 4</td>\n", | |
| " <td> 22</td>\n", | |
| " <td> 2.5</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 16</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 1.400000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td> 4</td>\n", | |
| " <td> 37</td>\n", | |
| " <td> 16.5</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 16</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 5</td>\n", | |
| " <td> 0.727273</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td> 5</td>\n", | |
| " <td> 27</td>\n", | |
| " <td> 9.0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 14</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> 4.666666</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " rate_marriage age yrs_married children religious educ occupation \\\n", | |
| "0 3 32 9.0 3 3 17 2 \n", | |
| "1 3 27 13.0 3 1 14 3 \n", | |
| "2 4 22 2.5 0 1 16 3 \n", | |
| "3 4 37 16.5 4 3 16 5 \n", | |
| "4 5 27 9.0 1 1 14 3 \n", | |
| "\n", | |
| " occupation_husb affairs \n", | |
| "0 5 0.111111 \n", | |
| "1 4 3.230769 \n", | |
| "2 5 1.400000 \n", | |
| "3 5 0.727273 \n", | |
| "4 4 4.666666 " | |
| ] | |
| }, | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "MarriageDF = sm.datasets.fair.load_pandas().data #fair = affairs dataset; this is specific to statsmodel\n", | |
| "MarriageDF.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "<class 'pandas.core.frame.DataFrame'>\n", | |
| "Int64Index: 6366 entries, 0 to 6365\n", | |
| "Data columns (total 9 columns):\n", | |
| "rate_marriage 6366 non-null float64\n", | |
| "age 6366 non-null float64\n", | |
| "yrs_married 6366 non-null float64\n", | |
| "children 6366 non-null float64\n", | |
| "religious 6366 non-null float64\n", | |
| "educ 6366 non-null float64\n", | |
| "occupation 6366 non-null float64\n", | |
| "occupation_husb 6366 non-null float64\n", | |
| "affairs 6366 non-null float64\n", | |
| "dtypes: float64(9)\n", | |
| "memory usage: 497.3 KB\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "MarriageDF.info()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Create a function which updates a variable to 1 if woman spent any time in an affair\n", | |
| "def affair_check(x):\n", | |
| " if x != 0:\n", | |
| " return 1\n", | |
| " else:\n", | |
| " return 0" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Create a new column in DF, then update based on function\n", | |
| "MarriageDF['Had_Affair'] = MarriageDF['affairs'].apply(affair_check)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Had_Affair\n", | |
| "0 0.000000\n", | |
| "1 2.187243\n", | |
| "Name: affairs, dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "AffairCategories = MarriageDF.groupby(['Had_Affair'])\n", | |
| "AffairCategories['affairs'].mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<seaborn.axisgrid.FacetGrid at 0x18494390>" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVkAAAEZCAYAAAA9qla3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGVJJREFUeJzt3X+UXWV97/H3RJKAcSatGgFvvaZX5QvK4lpmlBo0JBVB\nQIsSl1hqBYrQInLtxdVYI8tVLRQvqahpLctLRPBqrxcyVOvihy75GdNW8Fyvlov9UsRQ1/UXP0J+\niRiSc//Ye/QwmWRO5sxz5pzJ+7VWFrOfs/c+39knfPKcZ+/97IFms4kkqYw5M12AJM1mhqwkFWTI\nSlJBhqwkFWTISlJBhqwkFXRAyZ1HxDHARzJzeUvbGcC7M3NJvXwucB7wFHBJZt4YEQcBnwMWAVuB\nMzPzkZK1SlIJxXqyEbESuAqY39L2W8AftiwfAlwILAFOBC6LiHnA+cC3M3Mp8Fng4lJ1SlJJJYcL\nHgBOAwYAIuI5wKXAn4y1Aa8ENmTmjszcUm9zFHAscEu9zi3A8QXrlKRiioVsZt5ANQRARMwBPg1c\nBGxrWW0I2NyyvBVYWLdvGdcmSX2n6Jhsi2HgxcCVwIHASyPiCuB2YLBlvUHgcaqAHRzXtleNRqNv\n7g/esWMHP/zhDzvax/Of/3zmzp07TRVJM2d4eHhg8rX6V1dCNjPvAY4EiIgXAl/IzIvqMdlLI2I+\nVfgeAdwLbABOBu4BTgLuaud9+uXDGhgYOGzZW1bmgoWLprT99s0Pc8e6y6PZbN4/zaVJmmbdCNnx\nPcyBsbbM/HFErAHWUw1drMrMJyPiSuDaiFgPPAmc0YU6u2rBwkUMPfvQmS5DUmFFQzYzN1JdObDH\ntsxcC6wdt84TwFtL1iZJ3eDNCJJUkCErSQUZspJUkCErSQUZspJUkCErSQUZspJUkCErSQUZspJU\nkCErSQUZspJUkCErSQV1az5Z9ZGBgYF5wOIOd7Ox2Wz+YhrKkfqaIauJLJ6O+W4B57vVfs+Q1YSc\n71aaHo7JSlJBhqwkFWTISlJBhqwkFWTISlJBhqwkFWTISlJBhqwkFWTISlJBhqwkFVT0ttqIOAb4\nSGYuj4iXA2uAncCTwDsy86cRcS5wHvAUcElm3hgRBwGfAxYBW4EzM/ORkrVKUgnFerIRsRK4Cphf\nN30ceHdmLgduAN4XEQcDFwJLgBOByyJiHnA+8O3MXAp8Fri4VJ2SVFLJ4YIHgNOAgXr5bZn5nfrn\nucATwCuBDZm5IzO31NscBRwL3FKvewtwfME6JamYYiGbmTdQDQGMLf8YICKWABcAHwOGgM0tm20F\nFtbtW8a1SVLf6epUhxFxOrAKODkzH42ILcBgyyqDwONUATs4rm1SjUajOY3lFjM6OsrVNz3Y6T6y\n0WhMU0W77bun69PsMjw8PDD5Wv2rayEbEW+nOsG1LDM31c13A5dGxHzgQOAI4F5gA3AycA9wEnBX\nO+/RLx/WyMjIYaecszo72ceKFSui2WwWmRS71+uT+kk3QrYZEXOATwAPATdEBMAdmfmhiFgDrKca\nuliVmU9GxJXAtRGxnupKhDO6UKckTbuiIZuZG6muHAB4zh7WWQusHdf2BPDWkrVJUjd4M4IkFWTI\nSlJBhqwkFWTISlJBhqwkFWTISlJBhqwkFWTISlJBhqwkFWTISlJBhqwkFWTISlJBhqwkFWTISlJB\nhqwkFWTISlJBhqwkFWTISlJBhqwkFWTISlJBhqwkFWTISlJBhqwkFWTISlJBhqwkFXRAyZ1HxDHA\nRzJzeUS8GLgG2AXcC1yQmc2IOBc4D3gKuCQzb4yIg4DPAYuArcCZmflIyVolqYRiPdmIWAlcBcyv\nm64AVmXmUmAAODUiDgEuBJYAJwKXRcQ84Hzg2/W6nwUuLlWnJJVUcrjgAeA0qkAFODoz76p/vhk4\nHngFsCEzd2Tmlnqbo4BjgVvqdW+p15WkvlMsZDPzBqohgDEDLT9vBRYCQ8DmPbRvGdcmSX2n6Jjs\nOLtafh4CHqcK0sGW9sEJ2sfaJtVoNJqdl1ne6OgoV9/0YKf7yEajMU0V7bbvnq5Ps8vw8PDA5Gv1\nr26G7Lci4rjMvBM4CbgVuBu4NCLmAwcCR1CdFNsAnAzcU69718S7fLp++bBGRkYOO+Wc1dnJPlas\nWBHNZvP+6aqpVa/XJ/WTblzCNda7fC/woYj4R6pwX5eZPwHWAOupQndVZj4JXAm8LCLWA+8EPtSF\nOiVp2hXtyWbmRqorB8jMfwOWTbDOWmDtuLYngLeWrE2SusGbESSpIENWkgoyZCWpIENWkgoyZCWp\nIENWkgoyZCWpIENWkgoyZCWpIENWkgoyZCWpIENWkgoyZCWpIENWkgoyZCWpIENWkgoyZCWpIENW\nkgoyZCWpIENWkgoyZCWpIENWkgoyZCWpIENWkgoyZCWpIENWkgo6oJtvFhFzgLXAYcAu4FxgJ3BN\nvXwvcEFmNiPiXOA84Cngksy8sZu1StJ06HZP9gRgQWa+Gvgw8JfAR4FVmbkUGABOjYhDgAuBJcCJ\nwGURMa/LtUpSx7rakwWeABZGxACwEPgFcExm3lW/fjNVEO8ENmTmDmBHRDwAHAV8s8v1SlJHuh2y\nG4ADgX8FngO8EVja8vpWqvAdAjZP0L5XjUajOW2VFjQ6OsrVNz3Y6T6y0WhMU0W77bun69PsMjw8\nPDDTNZTU7ZBdSdVD/UBE/AZwOzC35fUh4HFgCzDY0j4IbJps5/3yYY2MjBx2yjmrs5N9rFixIprN\n5v3TVVOrXq9P6ifdHpNdQBWgUIXmAcC3IuK4uu0k4C7gbuA1ETE/IhYCR1CdFJOkvtLtnuxq4DMR\nsZ6qB/t+oAFcVZ/Yug9YV19dsAZYT/UPwarM/EWXa5WkjnU1ZDPzceDNE7y0bIJ111Jd7iVJfcub\nESSpoElDNiL+eoK2a8uUI0mzyx6HCyJiLfAiYCQijhy3za+VLkySZoO9jcleCrwQWAP8OdXdWFDd\n5npf2bIkaXbYY8hm5veB7wNHRcQQ1c0AY0H7LOCx8uVJUn+b9OqCiFgF/BlVqLbeUfWbpYqSpNmi\nnUu43gm8KDMfLl2MJM027VzC9RBt3NIqSdMhIpZFxJXj2r7bxnYvj4jPTLLO4oj4eUS8oqXtRRHx\n7Yj4i4h4T0Q0IuLYPWy/z1dWtdOTfQD4ekTcBjxZtzUz88P7+maS1IaSEz29A/hrqm/o99RtxwLX\nZealEXEr8MbM/OFEG2fmmfv6hu2E7P+r/4zpi0lYJM0uEfFy4HKq3HoG1Sx+TeALwEHAI8D2SXbz\nZqqZ//4pIg6qt1sFzI2I7wNHA9dFxOuBK4Hn1X9WZebNEfHdzDwiIhrAj4BvZ+YH9vaGk4ZsZv75\nZOtI0jQaAH43Ig5vafuPwOHAOzPz3+vhhFcDLwW+lpkfi4izgON221stIpYA/zczt0bEl4Hfy8yr\nI+Iy4ODM/Lv6iSynA4uAL2fmdRFxDPBeqvmuxzwbOC0zH5rsl2nn6oJdEzT/MDN/Y7JtJWkKmsA/\nZOb5Yw31mOyPgcsj4mdUM/PdArwE+Hy92j+xl5AFzgQOj4ibqS5DPQ64mirUx39D3wScEBGn1Mvj\ns/IX7QTsRBvuJjN/eXIsIuYCb6J6LIwklTDRkOQAcAVwPNWc0zfXbQkcQzVF6siedhgR84HfAQ7P\nzJ112z0R8TJ2HwMeoArk+zLzioj4A2DFuHUm6nxOaJ8miMnMHZl5fV2sJJXQZPfgGxt7XQ98ieqq\np0Ooxk1fFRG3Uz0PcE8nzd4I3DYWsLX/QfUw17H9t77XbcAfRsTXqHrLz5lgvba0M1zQejZtAHgZ\nv7rKQJKmVWbeCdw5ru2I+sfLJ9jktDb2uQ5YN65tzQTrLa9//Clw5ASvv3RcPZNq5+qC5fwqvZtU\nZ/BOb/cNJKmbIuI6qhNXrdZl5idnop52xmTPqp9aEPX699ZPkZWknpOZb53pGlq1M5/sCHA/cC3V\nmbiHIuK3SxcmSbNBO8MFa4DTM/MbAHXArgFeWbIwSZoN2rm6YMFYwAJk5j8DB5YrSZJmj3Z6spsi\n4k2Z+UWAiHgz8GjZsqSJDQwMzAMWd7ibjc1m06cf94lp+szH69rfgXZC9jzgyxHxaapLuHZRTagg\nzYTFy96yMhcsHH/yuD3bNz/MHesuD6rzDOoPHX3m47XzdyAi5gB/CxxFdcnqOzPze1N5v3ZC9vXA\nz6juHX4RcD3VI7xzKm+o2W/XzqcAFg8MTHkuob32MhYsXMTQsw+d6r7Vh2bgM38TMC8zl9RzF3y0\nbttn7YTsHwGvzMztwHci4reAu4FPTeUNNfv9bNsmjr/o1K8867lD+7zttke28LUrvmRPUzPtWKq5\nEcjMb9RXWU1JOyF7ANDaq/gF+3DfrvZPz3ruEAsP+fWZLkOaqiFgS8vyzoiYk5n7nH3thOwXgdsi\n4n9RjcmeBvzDvr7RmIh4P9V9xHOBvwE2ANdQBfe9wAWZ2aynHDuP6um4l2TmjVN9T0naR1uAwZbl\nKQUstHEJV2a+j+q62KB6eOInMvPiqbxZRCwDXpWZS6jGdf8T1VjHqsxcShXip0bEIcCFVLN9nQhc\nVt91JkndsAE4GX55b8B3prqjdnqy1DNvXT/VN2lxAvAvEfFFqu74nwLnZOZd9es31+vsBDbUt+/u\niIgHqM7yfXMaapDUZ7Zvnr7nuLa5r78HXhcRG+rls6f6fm2F7DRaBLwAeANVL/bLPH3uyK3AQqoA\n3jxB+141Go2SzwaaNqOjo1x904Od7iMbjcY0VbTbvjuur8P33+Pv1uvHTvtueHh4sstQNtaXXE2n\njXt7MTObwPl7W6dd3Q7ZR4DvZuZTwP0R8XPgP7S8PkQ1Ie/48ZBB2nhibhsfVk8YGRk57JRzVnd0\nCdyKFSui2WwWOQM/HfV1Ym+/W68fO02/+nK+vv289mnS7mnwdarrbomI5wPPBG6NiLFHRpxENcP5\n3cBrImJ+RCyketTEvV2uVZI61tWebGbeGBFLI+JuqoB/F1W3/ar6xNZ9VPM+NiNiDdUs6HOoTox5\nG6SkvtPt4YKxqxXGWzbBemuBtcULkqSCuj1cIEn7la73ZCVpX+wPs3BJ0kxafPxFp+ZU5sKYSLvz\nY9QTw3yk5eGKU2LISup53Z4LIyJWAm8HtnW6L8dkJWl3D1DN09LxtfeGrCSNk5k3UE1O1TFDVpIK\nMmQlqSBPfEnqedse2TL5SmX21fGkU4aspF63sb7kalr3OdkKmbmRak7rjhiyknqas3BJkvbIkJWk\nghwukPYj0zQPQNfu+58NDFlp/7J42VtW5oKFi6a08fbND1M/CqZvx0i7zZCV9jMLFi5i6NmHznQZ\n+w3HZCWpIENWkgoyZCWpIENWkgoyZCWpIENWkgoyZCWpIK+T1X5l186nABYPDHT0VBHveFLbDFnt\nV362bRPHX3TqV6b65NN2n3QqjZmRkI2I5wEN4LXALuCa+r/3AhdkZjMizgXOo3rOziWZeeNM1KrZ\np9tPPtX+retjshExF/gUsJ3qSZBXAKsyc2m9fGpEHAJcSDVh7onAZRExr9u1SlKnZqInuxq4Enh/\nvXx0Zt5V/3wzcAKwE9iQmTuAHRHxAHAU8M1uF9uLHFeU+kdXQzYizgIezsyvRsT7qXqurUmxFVgI\nDAGbJ2gXjitK/aTbPdmzgWZEHA+8HLgWaJ1zbQh4HNgCDLa0DwKbJtt5o9Ho+KFn3TA6OsrVNz3Y\n0T46HVccHR3NRqOxp9c6rq8TvVxbXcMe6+t103H8pvv3Hx4e7ugrWa/rashm5nFjP0fE7cAfA6sj\n4rjMvBM4CbgVuBu4NCLmAwcCR1CdFNurfvmwRkZGDjvlnNU5kzWsWLEims3mhD3Zma6vl2uDvdfX\n66bj+PXz7z8TZvoSribwXuCq+sTWfcC6+uqCNcB6qpNzqzLT8UNJfWfGQjYzl7csLpvg9bXA2q4V\nJEkFeFutJBU008MF0qzigwo1niErTS8fVKinMWSlaeaDCtXKMVlJKsiQlaSCDFlJKsiQlaSCDFlJ\nKsiQlaSCvIRLUtucy3jfGbKS2uZcxvvOkJW0T3xG2r5xTFaSCjJkJakgQ1aSCnJMVuohnr2ffQxZ\nqYd49n72MWSlHuPZ+9nFMVlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCunoJV0TMBa4GXgjMBy4B\nvgtcA+wC7gUuyMxmRJwLnAc8BVySmTd2s1ZJmg7d7sn+PvBwZi4FXg98EvgosKpuGwBOjYhDgAuB\nJcCJwGURMa/LtUpSx7p9M8L1wLr65znADuDozLyrbrsZOAHYCWzIzB3Ajoh4ADgK+GaX65WkjnQ1\nZDNzO0BEDFIF7sXAX7WsshVYCAwBmydol6S+0vXbaiPiBcANwCcz839GxOUtLw8BjwNbgMGW9kFg\n02T7bjQazemstZTR0VGuvunBma4hG43Gnl6b0fp6uba6BuvrwPj6hoeHO5oNp9d1+8TXwcBXgXdl\n5u1187ci4rjMvBM4CbgVuBu4NCLmAwcCR1CdFNurfvmwRkZGDjvlnNU5kzWsWLEims3mhJOIzHR9\nvVwbWF+n9lbfbNTtnuwqqq/9H4yID9Zt7wHW1Ce27gPW1VcXrAHWU43drspMp26T1He6PSb7HqpQ\nHW/ZBOuuBdaWrkmSSvJmBEkqyJCVpIIMWUkqyJCVpIIMWUkqyJCVpIIMWUkqaFY+rXZgYGAesLjD\n3fjsekkdm5UhCyxe9paVuWDhoiltvH3zw9yx7nKfXS+pY7M1ZFmwcBFDzz50psuQtJ9zTFaSCjJk\nJakgQ1aSCjJkJakgQ1aSCjJkJakgQ1aSCjJkJakgQ1aSCjJkJakgQ1aSCjJkJakgQ1aSCjJkJakg\nQ1aSCurZ+WQjYg7wt8BRwJPAOzPzezNblSTtm54NWeBNwLzMXBIRxwAfrduK27XzKYDFAwMDnezG\nx9dI6umQPRa4BSAzvxERI916459t28TxF536lWc9d2hK2297ZAtfu+JLPr5GUk+H7BCwpWV5Z0TM\nycxd7Wy8ffPDU37jJ7Y+xtwpb92ejut7ZMvkK+7Btja2nan6erk2sL5u1DfbDDSbzZmuYUIR8VHg\nnzPz+nr5B5n5gj2t32g0evMXkTSp4eHhjsbmelkv92Q3AG8Ero+I3wa+s7eVZ/OHJKl/9XLI/j3w\nuojYUC+fPZPFSNJU9OxwgSTNBt6MIEkFGbKSVJAhK0kFGbKSVFAvX11QVH2r7kcyc3lEfAE4uH7p\nN4F/zMwzxq3/v4HN9eKDmXlOobrmAlcDLwTmA5cAPwDWADup5nF4R2b+tGWbrs3zsIf6zgAOqVfZ\n7fh1ub5nAFcBhwFN4I+BufTO8ZuovovpkePX8p7PAxrAa4Fn0iPHrx/tlz3ZiFhJ9Rd9PkBmvi0z\nlwNvBjYB/3Xc+gfW6y2v/xQJ2NrvAw9n5lLg9cAngY8B765rvAF437htfjnPA/BnVPM8dKu+v8nM\n39vb8etyfW8AdmXmq6nC6y+Bj9M7x298fZf22PEb+4f0U8B2YIDeOn59Z78MWeAB4DSqv0CtPgys\nycyfjGv/z8AzI+IrEXFr3Qsu5Xrgg/XPc4AdwNsyc+xmjLnAE+O2edo8D0DJeR7G1/dUy2t7On5d\nqy8zvwT8Ub24GHgMOL1Xjt8E9W1qeXnGj19tNXAl8COq3nbPHL9+tF+GbGbewNPDYezr0e8A10yw\nyXZgdWaeSPX17vP1V6QStW3PzG0RMUgVaB8Y+58uIpYAF1D1bFtNOM9Dt+qra9vb8etafXWNOyPi\nGqqvuH/XS8dvovrq2nri+EXEWVTfVL5aNw302vHrNx6IX3kL8PnMnOjujPuBzwNk5r8BjwKHliok\nIl4A3AZ8NjO/ULedTtW7ODkzHx23yRZgsGW57Yl0pqs+9n78ulofQGaeRTXueVVEPLOXjt9E9dE7\nx+9sqjstbwdeDlwbEQf32vHrJ4bsr7wWuHkPr51NPc4UEc+n+pf7RyWKiIiDga8CKzPzmrrt7VQ9\niGWZuXGCzTYAJ9frTjrPw3TXV9vb8etmfX8QEe+vF58AdgEr6J3jN76+nXWNx9MDxy8zj8vMZfX4\n6/8B3gG8jh45fv1ov726oNbaawjgwdYXI+Jaqq/DnwY+ExF31S+dXfBf6lXAQuCDEfFB4BnAkcBG\n4IaIALgjMz/UUl8353kYX1+T6n+wvR2/bta3DrgmIu6kGj/8E+AzwEP0xvHbrb7M/HlEHEZvHL9W\nTaqM+AS9c/z6jnMXSFJBDhdIUkGGrCQVZMhKUkGGrCQVZMhKUkGGrCQVZMhKUkGGrCQVtL/f8aUe\nEBEHUN0X/zKqeX2Tapa084B3A48D/wp8r77T6PXAh6jumPo+cG5mPjYTtUuTsSerXvAq4Of1fKQv\nBg4CVgLvAo4GXgO8BGhGxCLgMuCEzDyaah6F/zYjVUtt8LZa9YSIeCmwHDicavLq/w4MZuaf1q//\nF+DXgW8CnwX+vd70GcCj9YQmUs9xuEAzLiJ+l+rr/8epHm3zHKohgl9rWW1sgvVnAF/PzFPrbQ/k\n6dPsST3F4QL1gtcC12XmtcBPgKV1+8kRMRgR86imK9wFfAN4VUS8pF7nYuDybhcstcvhAs24iDiS\n6gkBTwI/phoKeIxqzt53AduAR6im2PuriHgD8BdUvdofAG/PzE0T7VuaaYaselLdUz0lMz9eL38R\nuCozb5zZyqR945isetVDwCsi4l+oJo++xYBVP7InK0kFeeJLkgoyZCWpIENWkgoyZCWpIENWkgr6\n/zwJTkT3N+46AAAAAElFTkSuQmCC\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x18494198>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "sns.factorplot(x='age',data=MarriageDF.sort('age'), hue ='Had_Affair', kind='count')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<seaborn.axisgrid.FacetGrid at 0x17ac7048>" | |
| ] | |
| }, | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVkAAAEZCAYAAAA9qla3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHIhJREFUeJzt3XucXGWd5/FPZcyFCd3xQiCiaGbU/ER8ZZVuzRjcXGYi\nCBFRmtURHNCFoAzDOKO7USPryixZWDIyY7yws4kREEdHaLzwCsQLt4SogKWLxss3RozMeiUC6SRA\nzKX2j+c0KSuddHV1P9VV3d/365UXVU+dy4+q6m+d85xznlOqVCqYmVkeE0a7ADOzscwha2aWkUPW\nzCwjh6yZWUYOWTOzjByyZmYZPS3nwiNiDnClpIURcTSwCng6UALOlbQ1IpYAFwJ7gcslrY2II4Ab\ngOnADuA8Sdty1mpmlkO2LdmIWEoK1clF01XApyXNBz4IvDQiZgCXAHOBU4ArImIScBHwgKR5wPXA\npbnqNDPLKWd3wRbgTNJWK6QgPS4ivgacA9wBvBLYKGmPpL5intnAScC6Yr51wKKMdZqZZZMtZCXd\nTOoC6DcTeETSa4CHgPcCHcD2qml2ANOATqCvps3MrO1k7ZOt8Tvgy8XjW4DlwLdJQduvA3iMFLAd\nNW2HVS6XW+764D179vDLX/5yyPMde+yxTJw4MUNFZq2nq6urNPhU7auZIXsPsJh0QGs+sAm4D1ge\nEZOBKcDxRftG4DTgfuBUYH09K2i1D6tUKs1acNZSTZ02ve55dm1/mLtuuioqlcrmjKWZWZM0I2T7\ntzDfA6yOiItIW6ZnS9oeESuBDaSui2WSdkfENcB1EbEB2A2c3YQ6s5g6bTqdz3z2aJdhZqMka8hK\n2ko64IWkh4CTB5hmNbC6pu0J4E05azMzawZfjGBmlpFD1swsI4esmVlGDlkzs4wcsmZmGTlkzcwy\ncsiamWXkkDUzy8gha2aWkUPWzCwjh6yZWUYOWTOzjByyZmYZOWTNzDJyyJqZZeSQNTPLyCFrZpaR\nQ9bMLCOHrJlZRg5ZM7OMHLJmZhllvVttRMwBrpS0sKrtbOBvJM0tni8BLgT2ApdLWhsRRwA3ANOB\nHcB5krblrNXMLIdsW7IRsRRYBUyuans58J+rns8ALiHdNvwU4IqImARcBDwgaR5wPXBprjrNzHLK\n2V2wBTgTKAFExLOA5cDf9bcBrwQ2Stojqa+YZzZwErCumGYdsChjnWZm2WQLWUk3k7oAiIgJwCeB\ndwM7qybrBLZXPd8BTCva+2razMzaTtY+2SpdwAuBa4ApwEsi4mrgTqCjaroO4DFSwHbUtA2qXC5X\nRqrgkdDb28uaWx9sZD6Vy+UMFZm1nq6urtLgU7WvpoSspPuBlwJExPOBz0l6d9EnuzwiJpPC93hg\nE7AROA24HzgVWF/Pelrtw+ru7p61+PwVGup8PT09UalUNueoycyaqxmncNVuXZb62yT9GlgJbABu\nB5ZJ2k3a4j0hIjYAFwCXNaFOM7MRl3VLVtJW0pkDh2yTtBpYXTPNE8CbctZmZtYMvhjBzCwjh6yZ\nWUYOWTOzjByyZmYZOWTNzDJyyJqZZeSQNTPLyCFrZpaRQ9bMLCOHrJlZRg5ZM7OMHLJmZhk5ZM3M\nMnLImpll5JA1M8vIIWtmlpFD1swsI4esmVlGDlkzs4wcsmZmGTlkzcwyynq32oiYA1wpaWFEvIx0\n++99wG7gXEm/jYglwIXAXuBySWsj4gjgBmA6sAM4T9K2nLWameWQbUs2IpYCq4DJRdM/A38jaSFw\nM/DeiDgGuIR0i/BTgCsiYhJwEfCApHnA9cClueo0M8spZ3fBFuBMoFQ8/0tJ3yseTwSeAF4JbJS0\nR1JfMc9s4CRgXTHtOmBRxjrNzLLJFrKSbiZ1AfQ//zVARMwFLgb+CegEtlfNtgOYVrT31bSZmbWd\nrH2ytSLizcAy4DRJv4uIPqCjapIO4DFSwHbUtA2qXC5XRrDcYevt7WXNrQ82Mp/K5XKGisxaT1dX\nV2nwqdpX00I2It5KOsC1QNKjRfN9wPKImAxMAY4HNgEbgdOA+4FTgfX1rKPVPqzu7u5Zi89foaHO\n19PTE5VKZXOOmsysuZoRspWImAB8BPg5cHNEANwl6bKIWAlsIHVdLJO0OyKuAa6LiA2kMxHObkKd\nZmYjLmvIStpKOnMA4FmHmGY1sLqm7QngTTlrMzNrBl+MYGaWkUPWzCwjh6yZWUYOWTOzjByyZmYZ\nOWTNzDJyyJqZZeSQNTPLyCFrZpaRQ9bMLCOHrJlZRg5ZM7OMHLJmZhk5ZM3MMnLImpll5JA1M8vI\nIWtmlpFD1swsI4esmVlGDlkzs4wcsmZmGWW9W21EzAGulLQwIl4IXAvsBzYBF0uqRMQS4EJgL3C5\npLURcQRwAzAd2AGcJ2lbzlrNzHLItiUbEUuBVcDkoulqYJmkeUAJOCMiZgCXkG4bfgpwRURMAi4C\nHiimvR64NFedZmY55ewu2AKcSQpUgBMlrS8e3wYsAl4BbJS0R1JfMc9s4CRgXTHtumJaM7O2ky1k\nJd1M6gLoV6p6vAOYBnQC2w/R3lfTZmbWdrL2ydbYX/W4E3iMFKQdVe0dA7T3tw2qXC5Xhl/myOnt\n7WXNrQ82Mp/K5XKGisxaT1dXV2nwqdpXM0P2uxExX9LdwKnA7cB9wPKImAxMAY4nHRTbCJwG3F9M\nu37gRf6hVvuwuru7Zy0+f4WGOl9PT09UKpXNOWoys+Zqxilc/VuX7wEui4hvkML9Jkm/AVYCG0ih\nu0zSbuAa4ISI2ABcAFzWhDrNzEZc1i1ZSVtJZw4g6SfAggGmWQ2srml7AnhTztrMzJrBFyOYmWXk\nkDUzy8gha2aWkUPWzCwjh6yZWUYOWTOzjByyZmYZOWTNzDJyyJqZZeSQNTPLyCFrZpaRQ9bMLCOH\nrJlZRg5ZM7OMBg3ZiPjoAG3X5SnHzGxsOeR4shGxGngB0B0RL62Z5+m5CzMzGwsON2j3cuD5pDsX\nfIgDN0LcC/wwb1lmZmPDIUNW0s+AnwGzI6KTdMfY/qA9Engkf3lmZu1t0NvPRMQy4H2kUK2+G+yf\n5CrKzGysqOceXxcAL5D0cO5izMzGmnpO4fo58GjuQszMACJiQURcU9P2ozrme1lEfGqQaWZGxJMR\n8YqqthdExAMR8T8i4l0RUY6Ikw4x/5DPrKpnS3YLcE9E3AHsLtoqkv5hqCuLiAmkO9POAvYDS4B9\nwLXF803AxZIqEbEEuJB0oO1ySWuHuj4za0uVwSdp2LnAR0l76PcXbScBn5e0PCJuB06X9MuBZpZ0\n3lBXWE/I/qL41690qAnrcDIwVdKrI2IR8D+LGpZJWl/8ep0REd8CLgG6gCNIIf81Sb8fxrrNrI1F\nxMuAq0iZ8UfA6aRA/hwpJ7YBuwZZzBuBecA3I+KIYr5lwMSI+BlwIvD5iHgtcA1wdPFvmaTbIuJH\nko6PiDLwK+ABSR843AoHDVlJHxpsmiF4ApgWESXS2Qq/B+ZIWl+8fhspiPcBGyXtAfZExBZgNvDt\nEazFzFpTCXh9RLy4qu15wIuBCyQ9VGyQvRp4CfB1Sf8UEW8D5h9qoRExF/iBpB0RcQvwFklrIuIK\n4BhJ/1rsQb8ZmA7cIunzETEHeA8pn/o9EzhT0s8H+5+p5+yC/QM0/1LScwebdwAbgSnAj4FnkX6J\n5lW9voMUvp3A9gHazWzsqwBflnRRf0PRJ/tr4KqIeBw4HlgHvAj4TDHZNzlMyALnAS+OiNtIp6HO\nB9aQQr12D/1R4OSIWFw8r83K39cTsAPNeBBJTx0ci4iJwBuAufUsfABLSVuoH4iI5wJ3AhOrXu8E\nHgP6gI6q9g7qOPhWLpdz9uUMWW9vL2tufbCR+VQulzNUZNZ6urq6agNuoC7JEnA1sIiUEbcVbQLm\nAOuB7kOtIyImA38OvFjSvqLt/og4gYP7gEukQP6hpKsj4q+AnpppBtr4HFA9fbJPKXbfb4yIS4cy\nX5WppACFFJpPA74bEfMl3Q2cCtwO3AcsL96YKaRfrU2DLXyAD2tUdXd3z1p8/goNdb6enp6oVCqb\nc9Rk1gYqHBx8/X2vG4AHSWc9zSD1m34mIu4E/p10oHwgpwN39Ads4dOkg+/frVlfBbgD+GxEnAZ8\ng7TnDQ0clCtVKoefJyKqj6aVgBOA+ZJeOdSVRcTTgU8BR5G2YP8ZKAOrgEmky3WXFGcXXEA6u2AC\nsFzSFw637HK5XGm1kC2VSrMWn79Cnc98dt3z9D3yK9Z+8r86ZM3GiHq2ZBdyIL0rpCN4b25kZZIe\nIx3dq7VggGlXk073MjOrW0R8nnTgqtpNkj4+GvXU0yf7toiYBEQx/aai28DMrOVIetNo11CtnvFk\nu4HNwHWkI3E/j4g/y12YmdlYUE93wUrgzZLuBSgCdiUw5D5ZGx2lUmkSMLOBWbdWKhVfAGI2DPWE\n7NT+gAWQ9K2ImJKxJht5MxectVRTp9V2Ux3aru0Pc9dNVwVpL8bMGlRPyD4aEW+Q9EWAiHgj8Lu8\nZdlImzptOkM5y8GsVQxjT+xwmraXVk/IXgjcEhGfJJ3CtZ80oIKZWTMMeU/scOrZSysGs/oE6XL+\n3aTLeX/ayPrqCdnXAo+Trh1+AXAj6ZSrIZ9kb2bWiFHYE3sDMEnS3GLsgg8XbUNWz3iy7wBeLWmX\npO8BLyeNkGVmNladRBobgeKY1CEv2R1MPSH7NNJoWf1+zxCu2zUza0OdHBgCAGBf0YUwZPV0F3wR\nuCMi/o3UJ3sm8OVGVmZm1iZqB6maIKmhjctBk1nSe0nnxQbp5okfkdToADFmZu1gI3AaPHVtwPca\nXVBdo3BJupF0wMvMrOl2bR+5+7jWuawvAK+JiI3F87c3ur4hDXVoZjYKthanXI3oMg/3oqQKcNHh\npqmXQ9bMWlpx0UDbXnnY0NEyMzOrj0PWzCwjh6yZWUYOWTOzjHzgy8xa2ngYhcvMbDTNXPTuM3Tk\nUZ0jsrCd2/r4+tVfGnSs5GJgmCslLRzO+hyyZtbyjjyqk2kzntG09UXEUuCtwM7hLqvpIRsR7yfd\nA30i8DHS5WvXkgad2QRcXNwSfAlpLNu9wOWS1ja7VjMbt7aQxmn59HAX1NQDXxGxAHiVpLmkMWn/\nlDRO4zJJ80gD0JwRETNIwynOBU4BrijumGtmlp2km0kbeMPW7LMLTga+HxFfBG4hjebVJWl98fpt\nwCLgFcBGSXsk9ZF+VWY3uVYzs2FrdnfBdOA44HWkrdhbSFuv/XYA00hjOW4foN3MrK00O2S3AT+S\ntBfYHBFPAs+per0TeIyDx3LsAB4dbOHlcrkygrUOW29vL2tufbCR+VQul8dcHWYD6erqKg02zc5t\nfYNNUrchLmvYmdLskL0HeBdwdUQcC/wxcHtEzJd0N3AqcDtwH7A8IiYDU4DjSQfFDqueD6uZuru7\nZy0+f8WQ74XW09MTlUplxAbEaJU6zBq0tTjlakSXOdgEkraSjgsNS1NDVtLaiJgXEfeR+oP/mvQ/\nu6o4sPVD4Kbi7IKVwIZiumWSmnLisJm1lnYfhavpp3AVd1qotWCA6VYDq7MXZGaWkccuMDPLyCFr\nZpaRQ9bMLCOHrJlZRg5ZM7OMHLJmZhk5ZM3MMnLImpll5JA1M8vIIWtmlpFD1swsI4esmVlGDlkz\ns4wcsmZmGfmW4GbjXKlUmgTMbHD2rcV4r3YIDlkzm7ngrKWaOm36kGbatf1h7rrpqqCNB9RuBoes\nmTF12nQ6n/ns0S5jTHLImo0S76aPDw7ZFrN/316AmaVSQ/eE9B9ee/Fu+jjgkG0xj+98lEXvPuMr\nRx7VOaT5dm7ro7ijp//w2oh308c+h2wLOvKoTqbNeMZol2FmI2BUQjYijgbKwF8A+4Fri/9uAi4u\nbgm+BLgQ2AtcLmntaNRqZjYcTb8YISImAv8C7AJKwNXAMknziudnRMQM4BJgLnAKcEVETGp2rWZm\nwzUaV3ytAK4BflU8P1HS+uLxbcAi4BXARkl7JPUBW4DZTa/UzGyYmhqyEfE24GFJXy2aSsW/fjuA\naUAnsH2AdjOzttLsPtm3A5WIWAS8DLgOqD5/pRN4DOgDOqraO4BHB1t4uVyujFypw9fb28uaWx9s\n5vpULpdHrI5DLc9GxnC+HyP52Yx2HV1dXQ2dr9gumhqykub3P46IO4F3AisiYr6ku4FTgduB+4Dl\nETEZmAIcTzoodlit9mF1d3fPWnz+CjVrfT09PVGpVA46hauROvbv20tPT88pwNYGSvH5unUYzvfj\nUJ91O9cxVo32KVwV4D3AquLA1g+Bm4qzC1YCG0hdGssk+Y+2iXy+rtnIGLWQlbSw6umCAV5fDaxu\nWkF2EJ+v25qGcVWg9zBGwWhvyZrZEDWyl+E9jNEz5kLWg27YeOC9jPYx5kIWD7rRsobxA+gfP2tb\nYzFkPehG6xryD2COHz/v7YwM9w3XZ0yGrLWuFvkB9N7OCHDfcH0csjYutUjYtz33DQ/Od6s1M8vI\nIWtmlpFD1swsI4esmVlGDlkzs4wcsmZmGTlkzcwycsiamWXkkDUzy8gha2aWkS+rtZY2jEFIYJwN\nRGKtySFrLc23wbF255C1ltcqg5B4aD9rhEPWrE4e2s8a4ZA1G4JW2aq29tHUkI2IicAa4PnAZOBy\n4EfAtcB+YBNwcXFL8CXAhcBe4HJJa5tZq5nZSGj2KVznAA9Lmge8Fvg48GFgWdFWAs6IiBnAJcBc\n4BTgioiY1ORazcyGrdndBTcCNxWPJwB7gBMlrS/abgNOBvYBGyXtAfZExBZgNvDtJtdrZjYsTQ1Z\nSbsAIqKDFLiXAv9YNckOYBrQCWwfoN3MrK00/cBXRBwH3Ax8XNJnI+Kqqpc7gceAPqCjqr0DeHSw\nZZfL5Upvby9rbn2wodp6e3tVLpcbmvcQy2u4lgbXN2D9rsN1tHIdXV1dDV1p0i6afeDrGOCrwF9L\nurNo/m5EzJd0N3AqcDtwH7A8IiYDU4DjSQfFDqurq6vU3d09a/H5K9RIfT09PVGpVEbsVJvh1NKI\nQ9XvOlxHO9QxVjV7S3YZabf/gxHxwaLtXcDK4sDWD4GbirMLVgIbSH23yyT5ZG4zazvN7pN9FylU\nay0YYNrVwOrcNZmZ5eRRuMzMMnLImpll5JA1M8vIIWtmlpFD1swsI4esmVlGDlkzs4wcsmZmGTlk\nzcwycsiamWXkkDUzy8gha2aWkUPWzCwjh6yZWUYOWTOzjByyZmYZOWTNzDJyyJqZZeSQNTPLyCFr\nZpaRQ9bMLKNm3xK8bhExAfgEMBvYDVwg6ae51rd/316AmaVSqZHZt1YqFd+y3MwO0rIhC7wBmCRp\nbkTMAT5ctGXx+M5HWfTuM75y5FGdQ5pv57Y+vn71lwLYnKcyM2tnrRyyJwHrACTdGxHduVd45FGd\nTJvxjNyrMbNxpJVDthPoq3q+LyImSNo/2Iy7tj885JU9seMRJm7rG3zCGjsHmWeotbiO1qyj0Vpc\nx9DqGItKlUpltGsYUER8GPiWpBuL5/8u6bhDTV8ul1vzf8TMBtXV1dXQwZB20MpbshuB04EbI+LP\ngO8dbuKx/CGZWftq5ZD9AvCaiNhYPH/7aBZjZtaIlu0uMDMbC3wxgplZRg5ZM7OMHLJmZhk5ZM3M\nMmrlswuyGGxMhIj4e+B8oP/s7HdIynLJbERMBNYAzwcmA5dLumWUank/6ZS5icDHJF1X9drpwH8D\n9gJrJK3OVMMkYDXwQmAP8LeSHmhmHcUl3FdKWhgRLwH+T/HST0jflX1V02YbX6OmjqOBVcDTgRJw\nrqStNdN/B9hePH1Q0vnDXP9B303gp4zS+9HOxuOW7FNjIgDvI42JUO1E4K8kLSz+5RyT4BzgYUnz\ngNcCHxuNWiJiAfCq4j1ZAPxp1WsTgauB1wDzgQuLP/oclgCPF3UsIf2RN62OiFhKCrPJRdNy4H2S\nXl08P71mlsG+SyNVx1XApyXNBz4IvLRm+ikAVd+TYQVsofa7+XFS0Db9/Wh34zFk/2BMBKB2TIQu\nYFlEbIiI92Wu5UbSHw2kz2LvKNVyMvD9iPgicAvw5arXjge2SNouaQ9wDzAvUx0v4cBnsxl4TkT0\nj9jTjDq2AGeSthYBeiTdU2xhzwAeq5l+sO/SSNUxFzguIr5GCr87aqb/D8AfR8RXIuL2Yit4uGq/\nm3sYvfejrY3HkB1wTISq558F3gH8OfDqiFicqxBJuyTtjIgO0pf6AzWTNKuW6aRAPwt4J/CZqtc6\nObAbCrADmJapjv8LvA6guMpvOjC1WXVIupmqHzpJ+yPiecAm4FkcfNXhYN+lEakDmAk8Iuk1wEPA\ne2tm2QWskHQKxec33DoG+m5KqozG+9HuxuMb0Ad0VD2vHXTmI5IeKbaW1gIvz1lMRBxH2jK5XtLn\nal5uVi3bgK9K2ltsQT4ZEUcVr23nD9+vDuDRTHWsAfoiYgNp13Mz8Mgo1PEUSQ9JmgX8C6m7otpg\n36WR8jsO7F3cwsFbiJspfhgl/aSY/tnDXelA380WeT/ayngM2Y3AafDU1tJTv8YRMY202zw1Ikqk\nLchv5yokIo4BvgoslXRtzWvNrOUeUr8bEXEsaeuxP9x+DLwoIp5R7CbOA76ZqY5XAndI+o/ATcCv\nJO0ehToAiIgvR8QLi6c7gX01kxzyuzTC7gH692Lmk7Ykq72dov+z+Pw6gV8NZ4UDfTdb6P1oK+Pu\nstoisPqPgEL6gnYBR0paFRFvAf6edHT065Iuy1jLR4D/BKiqeRUwdRRq+V/AQtIP7/uBozjwnryO\n1D83AfikpGsy1fBM4N9IIf8EcCEpeJtWR0TMBP61GCz+VcAK4PekXfILJP0mIq4jde38gprv0kgd\nnKyp43mksy6mkvpBz5a0vaqOXwOfIp0JACkYvzXM9Q/03fwA6SBc09+PdjbuQtbMrJnGY3eBmVnT\nOGTNzDJyyJqZZeSQNTPLyCFrZpaRQ9bMLCOHrI1pEXFZMXpXvdOfFRGfylmTjS/jbqhDG18k/ffR\nrsHGN4esDVlEXA9skLSqeH4nMAe4DTgBeCvwruIxwCcON/ZrRHwIeB7pSqGjgUtJlxHPAR6Q9JcR\n8TTgmmKZx5CuRDqTNBrUOtKYu08CNwBvIw1gcgtwLHCXpOsi4tyirglAGbhY0u6IOKdY507SCFhP\nDu8dMjvA3QXWiDWkICUink8aLeteUiC+mHT55zMknQgsIg2BN5gTSJfQvrVY/pWkcVNPjIjZwKuA\nJ4uxSl8IHEFxnTwwCzinGKWqBDwHeJmk/lHNKhFxAnABadzcl5NC+b8U1/r/I2kc3TnFcn0ZpI0Y\nh6w14m7g2CJgzwWuL9rvLf77fSAiYh0pNGuH5qtVAb5WjNj0EGlgmB8Xo+7/Ani6pA3A/46Ii4GV\nwIs4MAzibyU9VLW879SM/lQijcvwIuDeiPgu8HogSOH9DUm/Kea5lgPjuJoNm0PWhkxSBbgOOJs0\niMini5eeKF5/hLRl+lFSkH2nGFXscPZUPa4dvLwUEa8ndQXsJG3prudAGD5RNW2l5nm/CcDnJb28\n2JKdA/xtMX11qNaOLGU2LA5Za9S1pAGiH5L0B8PqFaNl3SBpLakPdCfw3GGu7y9IIXkd8BvSUId/\nNMB0h9oKvQt4Y0RML0Ziu4YUsvcAr4qI5xbtbxlmnWZ/wCFrDZH0/4Cfk8K21jrg8Yj4AakLoVfS\nDwZZZOUQj/ufrwLeEhH3kwaM/hLwJ8VrtfMeNL+k7wGXkQah7h+P9UpJvwUuIo2dej/poJf7ZG3E\neKhDa0hxwOgu4ITizg1mNgCfwmVDFhFnkQZnfme9ARsRfwecN8BLv5D0upGsz6yVeEvWzCwj98ma\nmWXkkDUzy8gha2aWkUPWzCwjh6yZWUb/H2FsdN+jak5mAAAAAElFTkSuQmCC\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x1a58f278>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "sns.factorplot('yrs_married',data=MarriageDF.sort('yrs_married'),hue='Had_Affair',kind='count')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<seaborn.axisgrid.FacetGrid at 0x1a56fc18>" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVkAAAEZCAYAAAA9qla3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFTNJREFUeJzt3XuQnXV9x/H3IgQCJhQvxDahpFL5ijoIbBQnUAJWVFRE\nySjjrYQR8AKIrR3KRASxMCAKHSOKLbeg4C0ErJQBlYskpHJxtTiI/UIErKCGiyEE0JjA6R/Ps3Dc\n7GbPXn7nnN28XzM72fN7bt+zO/ns7/k9z/M7PY1GA0lSGVt0ugBJmswMWUkqyJCVpIIMWUkqyJCV\npIIMWUkqaMtSO46IrYCLgJ2BrYHTgF8Ai4FngDuBYzKzERFHAUcDG4DTMvPqiJgKXAq8GFgLHJ6Z\nj5SqV5JKKNmTfR/wcGbuB7wZ+BJwNrCwbusBDomIlwDHAXOBNwFnRMQU4CPAHfW6XwVOKlirJBVR\nMmSXACc3HWc9sFdmLqvbrgHeALwGWJGZ6zPzcWAlsDuwD3Btve619bqSNKEUGy7IzCcBImIaVeCe\nBHy+aZW1wPbAdGDNEO2PD2iTpAmlWMgCRMROwBXAlzLzGxFxVtPi6cBjVEE6ral92iDt/W2b1NfX\n5zPC0gTT29vb0+kaSip54WsG8H3go5l5Y93804iYl5k3AQcB1wO3AadHxNbANsBuVBfFVgBvAW6v\n111GCyb7L0zSxFKyJ7uQ6hT/5IjoH5s9HlhUX9i6C7i8vrtgEbCcaux2YWaui4jzgEsiYjmwDnhv\nwVolqYieyTQLV19fX8OerKRu4sMIklSQIStJBRmyklSQIStJBRmyklRQ0YcROqmnp2cKMHuMu7m/\n0Wj8aRzKkbSZmrQhC8w+9dzFOWPmrFFtvOrBBzjl2AUB3D2+ZUnanEzmkGXGzFnM3PmlnS5D0mbM\nMVlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJ\nKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQ\nlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSCDFlJKsiQlaSC\nDFlJKsiQlaSCDFlJKsiQlaSCtix9gIjYGzgzMw+IiD2Bq4B76sVfzswlEXEUcDSwATgtM6+OiKnA\npcCLgbXA4Zn5SOl6JWk8FQ3ZiDgBeD/wRN3UC5yTmec0rfMS4Lh62VTg5oj4AfAR4I7M/ExEHAac\nBHy8ZL2SNN5K92RXAocCX6tf9wK7RsQhVL3ZjwOvBVZk5npgfUSsBHYH9gE+W293LfCpwrVK0rgr\nOiabmVdQDQH0uxX458ycB9wLnAJMA9Y0rbMW2B6YDjw+oE2SJpTiY7IDXJmZ/YF6JfBFYBlV0Pab\nBjxGFbDTBrQNq6+vrwGwdOlSHh5jsUuXLs2+vr4x7kXSpvT29vZ0uoaS2h2y10bExzLzduANwI+B\n24DTI2JrYBtgN+BOYAXwFuB24CCqMB5W/y9szpw5u37lyutyLMXOnz8/Go3G3WPZh6TNW7tCtlH/\n+2HgSxGxHvgtcHRmPhERi4DlVMMXCzNzXUScB1wSEcuBdcB721SrJI2b4iGbmfcDc+vv7wD2HWSd\nC4ALBrT9AXh36fokqSQfRpCkggxZSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkggxZ\nSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkggxZSSrI\nkJWkggxZSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkggxZSSrIkJWk\nggxZSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkgoYN2Yj44iBtl5QpR5Imly2HWhAR\nFwC7AHMi4lUDtvmL0oVJ0mQwZMgCpwM7A4uATwM9dfsG4K6yZUnS5DBkyGbmfcB9wO4RMR3YnueC\n9vnA78uXJ0kT26Z6sgBExELgRKpQbTQt+ptSRUnSZDFsyAJHArtk5sOli5GkyaaVW7h+BawuXYgk\nAUTE/hFx3oC2X7Sw3R4RcfEw68yOiD9GxGua2naJiDsi4l8j4viI6IuIfYbYfsR3VrXSk10J3BwR\nNwDr6rZGZn6mlQNExN7AmZl5QET8LbAYeAa4EzgmMxsRcRRwNNVFtdMy8+qImApcCrwYWAscnpmP\njOC9SZqYGsOvMmr/AHyR6gz99rptH+DbmXl6RFwPHJyZvxls48w8fKQHbCVkH6y/+vUMteJAEXEC\n8H7gibrpHGBhZi6r/1IdEhG3AMcBvcBUqkD/AfAR4I7M/ExEHAacBHy81WNLmlwiYg/gLKrceh5w\nMFUgf5MqOx4BnhxmN+8E9gN+VHfkpgILga0i4j5gL+DbEfFm4Dxgx/prYWZeExG/yMzdIqIP+C1V\nRn1yUwccNmQz89PDrbMJK4FDga/Vr/fKzGX199cAbwSeBlZk5npgfUSsBHan+uvy2Xrda4FPjaEO\nSRNHD/D2iHh5U9tfAy8HjszM/6s7afsCrwCuy8x/i4gFwLyhdhoRc4GfZ+baiLgKeE9mXhQRZwAz\nMvPr9Vn1YVRn0Fdl5rfrs/FPUGVWvxcAh2bmr4Z7M63cXfDMIM2/ycxZw22bmVdExOympuZe8Fqq\n28KmA2uGaH98QJukya8BfDczP9LfUI/J/g44KyKeAnaj6ny9DLisXu1HbCJkgcOBl0fENVS3oc4D\nLqLKpYFn6KuBN0bEW+vXA7PyT60E7GAbbiQzn704FhFbAe8A5ray80E0B/Z04DGqIJ3W1D5tkPb+\ntmH19fU1AJYuXcpYb4dYunRp9vX1jXEvkjalt7d3YMANNiTZQzXc+AaqLLimbktgb2AZMGeoY0TE\n1sDrgZdn5tN12+0R8Uo2HgPuoQrkuzLznIj4ADB/wDqDdT4H1cqY7LPqU/olEXHSSLZr8tOImJeZ\nNwEHAdcDtwGn1z+Ebaj+Qt0JrADeQjU4fRDVD3FY/b+wOXPm7PqVK6/LUdYJwPz586PRaNw9ln1I\nGrEGGwdf/9jrcuBeqrueXkI1bnpZRNwI/Jrq4vlgDgZu6A/Y2teAo4CfDjheA7gB+EZEvAX4b+CF\nTctGpJXhguaraT3AK3nuLoNW9Rf2CeD8iJhC9Wju5fXdBYuofnhbUA0wr6vHXC6JiOX18d47wmNK\nmoDqTthNA9p2q789a5BNDm1hn5cDlw9oWzTIegfU3z4EvGqQ5a8YUM+wWunJHsBzIdmguoJ3WKsH\nyMz7qYcXMvMeYP9B1rkAuGBA2x+Ad7d6HEkCiIhvU124anZ5Zn6pE/W0Mia7oO55Rr3+nfWwgSR1\nnczsqs5ZK/PJzgHuBi6huhL3q4h4XenCJGkyaGW4YBFwWGbeClAH7CLgtSULk6TJoJW5C7brD1iA\nzLyF6i4ASdIwWunJro6Id2TmdwAi4p3Ao2XLkqRKT0/PFGD2OO/2/kaj8adx3uegWgnZo4GrIuJC\nqlu4nqF65FWS2mH2qecuzhkzh33ItCWrHnyAU45dEFTXmgYVEVsAX6Z6xH8d1eO8vxzN8VoJ2TcD\nT1E9O7wLsITqNqwx3egvSa2aMXMWM3d+aTsP+Q5gSmbOrecuOLtuG7FWxmQ/BOybmU9m5s+APalm\nzZKkyWofqrkRqK9JDfnI7nBaCdktgeaxiz8xgud2JWkCap6gCuDpeghhxFoZLvgOcENEfItqTPZQ\n4LujOZgkTRADJ67aIjNH1bls5Ymvf4mId1FNdLse+EL/nQaanMbpam7brt5KBaygmlRmSf1swM9G\nu6OWZuHKzCVUF7y0eRjT1dxWrt5KI7HqwQfava8rgQMjYkX9+ojRHm9EUx1q89GBq7nSUO6v/2iP\n6z43tTAzG1QfgTVmhqykrlYPO03Ys6JRXS2TJLXGkJWkggxZSSrIkJWkgrzwJamrbQ6zcElSJ83u\nPfzk3HaHHcdlZ0+tfoi+Sz4z7H3c9cQwZzZ9uOKoGLKSut62O+zIdi+a2bbjRcQJwPuBJ8a6L8dk\nJWljK6nmaekZ644MWUkaIDOvADaMx74MWUkqyDHZDnCWK2nzYch2hrNcSSPw1OqHOrWvxliPZ8h2\niLNcSS27v77lalz3OdwKmXk/MHesBzJkJXU1Z+GSJA3JkJWkggxZSSrIkJWkggxZSSrIkJWkggxZ\nSSrIkJWkggxZSSrIkJWkggxZSSrIkJWkgpwgRuNuw/r1ALN7esb0yR3Ol6tJwZDVuHv0oVX0Hn7y\n90b76aKtfpqoNBEYsiqi3Z8uKnUrx2QlqSBDVpIKMmQlqSBDVpIKMmQlqSBDVpIKMmQlqaCO3Ccb\nET8B1tQv7wXOABYDzwB3AsdkZiMijgKOBjYAp2Xm1R0oV5JGre0hGxHbAGTmAU1t3wUWZuayiDgP\nOCQibgGOA3qBqcDNEfGDzPRRS0kTRid6sq8Gto2I79XH/ySwV2Yuq5dfA7wReBpYkZnrgfURsRLY\nHfhxB2qWpFHpRMg+CXwuMy+MiJcB1w5YvhbYHpjOc0MKze1t4SQnksZDJ0L2bmAlQGbeExGPAns2\nLZ8OPAY8Dkxrap8GrB5u5319fQ2ApUuX8vAYihyPSU4Wvv3V9PX1bbRsrLXV+8jB9j0exqO+caih\n2PtTd+nt7R1TT6bbdSJkj6A67T8mIv6KKjy/HxHzMvMm4CDgeuA24PSI2BrYBtiN6qLYJvX/wubM\nmbPrV668LsdS6FgnOZk/f340Go2NZpIaj9qG2vd4GI/6xqrk+5PaqRMheyFwcUT0j8EeATwKnB8R\nU4C7gMvruwsWAcupbjVb6EUvSRNN20M2MzcAHxhk0f6DrHsBcEHpmjSx9PT0TAFmj3E3jperLZxP\nVhPR7FPPXZwzZs4a1carHnyAU45d4KTgagtDVhPSjJmzmLnzSztdhjQsH6uVpIIMWUkqyJCVpIIc\nk52AfBpNmjgM2QnIj9yWJg5DdoLyI7elicExWUkqyJCVpIIMWUkqyJCVpIIMWUkqyJCVpIIMWUkq\nyJCVpIJ8GEGbHR9LVjsZstrs+Fiy2smQ1WbJx5LVLo7JSlJBhqwkFWTISlJBhqwkFWTISlJBhqwk\nFWTISlJBhqwkFWTISlJBhqwkFWTISlJBzl0gjaOenp4pwOwx7sYZviYRQ1YaX7NPPXdxzpg5a1Qb\nr3rwAU45doEzfE0ihqw0zmbMnMXMnV/a6TLUJRyTlaSCDFlJKsiQlaSCDFlJKsiQlaSCvLtA6iKl\nP0nX+3jbz5CVukgbPknX+3jbzJCVukzpT9L1Pt72ckxWkgoyZCWpIENWkgpyTFZSy0rf/TAZGbKS\nWtaGux8mHUNW0oiUvvthsnFMVpIKMmQlqSBDVpIK6uox2YjYAvgysDuwDjgyM3/Z2aokqXXd3pN9\nBzAlM+cCJwJnd7geSRqRbg/ZfYBrATLzVmBOZ8uRpJHp6uECYDrweNPrpyNii8x8ppWNVz34wKgP\n/Miq3/HU6qdGvf1Tqx/a5PJurg2sb1P83Zatb7LpaTQana5hSBFxNnBLZi6pX/86M3caav2+vr7u\nfTOShtTb2zumR8i6Wbf3ZFcABwNLIuJ1wM82tfJk/kVJmpi6PWSvBA6MiBX16yM6WYwkjVRXDxdI\n0kTX7XcXSNKEZshKUkGGrCQVZMhKUkHdfndBccPNjxARBwOfAjYAF2XmBR2ocW/gzMw8YEB7R2uL\niK2Ai4Cdga2B0zLzqi6q73nA+cCuQAP4cGb+vFvqa6pjR6AP+PvMvLupvdM/v58Aa+qX92bmB5uW\n/SPwQeDhuulDzbXrOfZkNzE/Qh0i5wAHAvOAo+v/EG0TESdQBcXWA9o7XhvwPuDhzNwPeDNwbpfV\n9zbgmczcFzgJOL3L6uuv49+BJwdp71h9EbENQGYeUH99cMAqewEfaFpuwA7BkN30/Ai7ASszc01m\nrgduBvZrc30rgUOBgQ9adENtS4CT6++3oOpxdU19mfmfwIfql7OB1d1UX+1zwHnAbwe0d7q+VwPb\nRsT3IuL6+myqWS+wMCKWR8SJbaxrwjFkh5gfoWnZmqZla4Ht21UYQGZewZ+HV79uqO3JzHwiIqZR\nBe4nu6k+gMx8OiIWA4uArzct6nh9EbGA6kzg+3VT8x/STtf3JPC5zHwT8GHgsqb/FwDfoPoD9npg\n34h4axtrm1AM2SpgpzW9bp6AZs2AZdP4895QJ3VFbRGxE3AD8NXM/GbToq6oDyAzF1CNy54fEVPr\n5m6o7wiqJxpvBPYALmkaEuh0fXcDlwFk5j3Ao8BfNi3/Qmb+vu5lXw3s2cbaJpTN/sIXm54f4X+B\nl0XEDlR/2fejOr3rBh2vLSJmAN8HPpqZN3ZhfR8AZmXmGcAfgGeoLoB1RX2ZOa+p1hupLh71T1PV\n6fqOoLoYfExE/BVVz/p3da3bAz+LiFcAT1H1Zi9sY20TiiE7yPwIEfEe4PmZeX5E/BPwPape/4WZ\nOXDsrF0aAF1W20KqU9iTI6J/bPZ8YLsuqe9yYHFE3ARsBRwPvDMiuuXnN1BPF/1+LwQujohl9esj\ngHc3/exOBG6kuiPnusy8to21TSjOXSBJBTkmK0kFGbKSVJAhK0kFGbKSVJAhK0kFGbKSVJAhq7aK\niB9GxLxB2k+NiLdFxOyIuG+IbZ8oX6E0vnwYQe026I3ZmXkKQETMHum2UjczZFVURHyWajrJDcB/\n1M1HRsTZwA7A8Zn5X/UkLjcCNzVtuzNwKdVz+z+hPvOKiE8DrwN2Ar4IXEc1J/ALqR7zPC4z/6fe\n52NUM0bNAk7NzMXl3q20MYcLVExEvAuYC7wKeC2wAHgJsDoz5wAf47mpEhts3FM9l2rimT2oJiGZ\n2rRsSma+MjO/AlwCnJCZvVQzQzVPVDMrM/+Oan6Kz4/j25NaYk9WJe0HfKueqWk9sGc9Ecp36uV3\nAS/axPb7A+8ByMylEdE8JeWtABHxfKo5gC+OiP5l20XEC6hCu38awZ8DLxjrG5JGypBVSetpmiO1\nHm/djufmx22w8WTkzRr8+dlW83Z/rL9/HvDHzHx2qr2I2Ckzf1+H7jqAzGw0hbDUNg4XqKRlwKER\nsWVEbEs1o9TMIdbtYePA/QHVEAMR8Sae64k+u15mrgHuiYj31esdCPxwnOqXxsyQVTGZ+R2q+Xp/\nAtxG9ZlVOWC1RtO/zV8AxwBvj4g7qD5PbNWAdfu9j+pi2h1Un+P17kH2P/B7qS2c6lCSCrInK0kF\nGbKSVJAhK0kFGbKSVJAhK0kFGbKSVJAhK0kF/T+BhAj4r+k6xQAAAABJRU5ErkJggg==\n", | |
| "text/plain": [ | |
| "<matplotlib.figure.Figure at 0x1a76c470>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "sns.factorplot('children',data=MarriageDF.sort('children'),hue='Had_Affair',kind='count',palette='Blues')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "source": [ | |
| "Prep the Data for Regression" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>1.0</th>\n", | |
| " <th>2.0</th>\n", | |
| " <th>3.0</th>\n", | |
| " <th>4.0</th>\n", | |
| " <th>5.0</th>\n", | |
| " <th>6.0</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 1 2 3 4 5 6\n", | |
| "0 0 1 0 0 0 0\n", | |
| "1 0 0 1 0 0 0\n", | |
| "2 0 0 1 0 0 0\n", | |
| "3 0 0 0 0 1 0\n", | |
| "4 0 0 1 0 0 0" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Create DFs for the occupation categories (i.e. categorical variables)\n", | |
| "occ_dummies = pd.get_dummies(data=MarriageDF['occupation'])\n", | |
| "husband_occ_dummies = pd.get_dummies(data=MarriageDF['occupation_husb'])\n", | |
| "\n", | |
| "occ_dummies.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Get dummies pivoted the occupancies into a new column for each occupancy listed from 1 to 6.\n", | |
| "If we left the occupancies in a column, it would distort and confuse the regression" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Rename columns in occupancy dataframe\n", | |
| "occ_dummies.columns = ['occ1','occ2','occ3','occ4','occ5','occ6']\n", | |
| "husband_occ_dummies.columns = ['hocc1','hocc2','hocc3','hocc4','hocc5','hocc6']\n", | |
| "\n", | |
| "# set X variable as Marriage DF without drop occupation and husband occupation columns\n", | |
| "X = MarriageDF.drop(['occupation','occupation_husb','Had_Affair'], axis=1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "dummies = pd.concat([occ_dummies,husband_occ_dummies],axis=1)\n", | |
| "#Axis = 1 signifies columns" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>rate_marriage</th>\n", | |
| " <th>age</th>\n", | |
| " <th>yrs_married</th>\n", | |
| " <th>children</th>\n", | |
| " <th>religious</th>\n", | |
| " <th>educ</th>\n", | |
| " <th>affairs</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 3</td>\n", | |
| " <td> 32</td>\n", | |
| " <td> 9.0</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 17</td>\n", | |
| " <td> 0.111111</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 3</td>\n", | |
| " <td> 27</td>\n", | |
| " <td> 13.0</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 14</td>\n", | |
| " <td> 3.230769</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 4</td>\n", | |
| " <td> 22</td>\n", | |
| " <td> 2.5</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 16</td>\n", | |
| " <td> 1.400000</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td> 4</td>\n", | |
| " <td> 37</td>\n", | |
| " <td> 16.5</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 16</td>\n", | |
| " <td> 0.727273</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td> 5</td>\n", | |
| " <td> 27</td>\n", | |
| " <td> 9.0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 14</td>\n", | |
| " <td> 4.666666</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " rate_marriage age yrs_married children religious educ affairs\n", | |
| "0 3 32 9.0 3 3 17 0.111111\n", | |
| "1 3 27 13.0 3 1 14 3.230769\n", | |
| "2 4 22 2.5 0 1 16 1.400000\n", | |
| "3 4 37 16.5 4 3 16 0.727273\n", | |
| "4 5 27 9.0 1 1 14 4.666666" | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>occ1</th>\n", | |
| " <th>occ2</th>\n", | |
| " <th>occ3</th>\n", | |
| " <th>occ4</th>\n", | |
| " <th>occ5</th>\n", | |
| " <th>occ6</th>\n", | |
| " <th>hocc1</th>\n", | |
| " <th>hocc2</th>\n", | |
| " <th>hocc3</th>\n", | |
| " <th>hocc4</th>\n", | |
| " <th>hocc5</th>\n", | |
| " <th>hocc6</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " occ1 occ2 occ3 occ4 occ5 occ6 hocc1 hocc2 hocc3 hocc4 hocc5 \\\n", | |
| "0 0 1 0 0 0 0 0 0 0 0 1 \n", | |
| "1 0 0 1 0 0 0 0 0 0 1 0 \n", | |
| "2 0 0 1 0 0 0 0 0 0 0 1 \n", | |
| "3 0 0 0 0 1 0 0 0 0 0 1 \n", | |
| "4 0 0 1 0 0 0 0 0 0 1 0 \n", | |
| "\n", | |
| " hocc6 \n", | |
| "0 0 \n", | |
| "1 0 \n", | |
| "2 0 \n", | |
| "3 0 \n", | |
| "4 0 " | |
| ] | |
| }, | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dummies.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Now we need to concatenate X and Dummies\n", | |
| "X = pd.concat([X,dummies],axis=1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "<class 'pandas.core.frame.DataFrame'>\n", | |
| "Int64Index: 6366 entries, 0 to 6365\n", | |
| "Data columns (total 19 columns):\n", | |
| "rate_marriage 6366 non-null float64\n", | |
| "age 6366 non-null float64\n", | |
| "yrs_married 6366 non-null float64\n", | |
| "children 6366 non-null float64\n", | |
| "religious 6366 non-null float64\n", | |
| "educ 6366 non-null float64\n", | |
| "affairs 6366 non-null float64\n", | |
| "occ1 6366 non-null float64\n", | |
| "occ2 6366 non-null float64\n", | |
| "occ3 6366 non-null float64\n", | |
| "occ4 6366 non-null float64\n", | |
| "occ5 6366 non-null float64\n", | |
| "occ6 6366 non-null float64\n", | |
| "hocc1 6366 non-null float64\n", | |
| "hocc2 6366 non-null float64\n", | |
| "hocc3 6366 non-null float64\n", | |
| "hocc4 6366 non-null float64\n", | |
| "hocc5 6366 non-null float64\n", | |
| "hocc6 6366 non-null float64\n", | |
| "dtypes: float64(19)\n", | |
| "memory usage: 994.7 KB\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "X.info()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 32, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Y is the target in our linear regression model\n", | |
| "\n", | |
| "Y = MarriageDF.Had_Affair" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "source": [ | |
| "Lecture 79: Multicollinearity\n", | |
| "This occurs when a value of 1 variable is related to all the other values of the same variable.\n", | |
| "If we don't take care of this, it will show perfect correlation once we run the LM.\n", | |
| "\n", | |
| "To remedy, we will drop occ1 and hocc1, we could've dropped any combination of occ and huocc" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "ename": "ValueError", | |
| "evalue": "labels ['occ1'] not contained in axis", | |
| "output_type": "error", | |
| "traceback": [ | |
| "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
| "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", | |
| "\u001b[1;32m<ipython-input-33-c3c2b833de4f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'occ1'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
| "\u001b[1;32mC:\\Anaconda\\lib\\site-packages\\pandas\\core\\generic.pyc\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, axis, level, inplace, **kwargs)\u001b[0m\n\u001b[0;32m 1559\u001b[0m \u001b[0mnew_axis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1560\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1561\u001b[1;33m \u001b[0mnew_axis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1562\u001b[0m \u001b[0mdropped\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[1;33m{\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mnew_axis\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1563\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | |
| "\u001b[1;32mC:\\Anaconda\\lib\\site-packages\\pandas\\core\\index.pyc\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels)\u001b[0m\n\u001b[0;32m 2222\u001b[0m \u001b[0mmask\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2223\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0many\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2224\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'labels %s not contained in axis'\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mlabels\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2225\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2226\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", | |
| "\u001b[1;31mValueError\u001b[0m: labels ['occ1'] not contained in axis" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "X = X.drop('occ1',axis=1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "X = X.drop('hocc1',axis=1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>rate_marriage</th>\n", | |
| " <th>age</th>\n", | |
| " <th>yrs_married</th>\n", | |
| " <th>children</th>\n", | |
| " <th>religious</th>\n", | |
| " <th>educ</th>\n", | |
| " <th>occ2</th>\n", | |
| " <th>occ3</th>\n", | |
| " <th>occ4</th>\n", | |
| " <th>occ5</th>\n", | |
| " <th>occ6</th>\n", | |
| " <th>hocc2</th>\n", | |
| " <th>hocc3</th>\n", | |
| " <th>hocc4</th>\n", | |
| " <th>hocc5</th>\n", | |
| " <th>hocc6</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td> 3</td>\n", | |
| " <td> 32</td>\n", | |
| " <td> 9.0</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 17</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td> 3</td>\n", | |
| " <td> 27</td>\n", | |
| " <td> 13.0</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 14</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td> 4</td>\n", | |
| " <td> 22</td>\n", | |
| " <td> 2.5</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 16</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td> 4</td>\n", | |
| " <td> 37</td>\n", | |
| " <td> 16.5</td>\n", | |
| " <td> 4</td>\n", | |
| " <td> 3</td>\n", | |
| " <td> 16</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td> 5</td>\n", | |
| " <td> 27</td>\n", | |
| " <td> 9.0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 14</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 1</td>\n", | |
| " <td> 0</td>\n", | |
| " <td> 0</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " rate_marriage age yrs_married children religious educ occ2 occ3 \\\n", | |
| "0 3 32 9.0 3 3 17 1 0 \n", | |
| "1 3 27 13.0 3 1 14 0 1 \n", | |
| "2 4 22 2.5 0 1 16 0 1 \n", | |
| "3 4 37 16.5 4 3 16 0 0 \n", | |
| "4 5 27 9.0 1 1 14 0 1 \n", | |
| "\n", | |
| " occ4 occ5 occ6 hocc2 hocc3 hocc4 hocc5 hocc6 \n", | |
| "0 0 0 0 0 0 0 1 0 \n", | |
| "1 0 0 0 0 0 1 0 0 \n", | |
| "2 0 0 0 0 0 0 1 0 \n", | |
| "3 0 1 0 0 0 0 1 0 \n", | |
| "4 0 0 0 0 0 1 0 0 " | |
| ] | |
| }, | |
| "execution_count": 34, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Need to also drop affairs column, since it's our Y (target)\n", | |
| "X = X.drop('affairs',axis=1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "<class 'pandas.core.frame.DataFrame'>\n", | |
| "Int64Index: 6366 entries, 0 to 6365\n", | |
| "Data columns (total 16 columns):\n", | |
| "rate_marriage 6366 non-null float64\n", | |
| "age 6366 non-null float64\n", | |
| "yrs_married 6366 non-null float64\n", | |
| "children 6366 non-null float64\n", | |
| "religious 6366 non-null float64\n", | |
| "educ 6366 non-null float64\n", | |
| "occ2 6366 non-null float64\n", | |
| "occ3 6366 non-null float64\n", | |
| "occ4 6366 non-null float64\n", | |
| "occ5 6366 non-null float64\n", | |
| "occ6 6366 non-null float64\n", | |
| "hocc2 6366 non-null float64\n", | |
| "hocc3 6366 non-null float64\n", | |
| "hocc4 6366 non-null float64\n", | |
| "hocc5 6366 non-null float64\n", | |
| "hocc6 6366 non-null float64\n", | |
| "dtypes: float64(16)\n", | |
| "memory usage: 845.5 KB\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "X.info()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 37, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "6361 0\n", | |
| "6362 0\n", | |
| "6363 0\n", | |
| "6364 0\n", | |
| "6365 0\n", | |
| "Name: Had_Affair, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 37, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "Y.tail()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 38, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([1, 1, 1, ..., 0, 0, 0], dtype=int64)" | |
| ] | |
| }, | |
| "execution_count": 38, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#To use Y with SciKit Learn, we need to flatten the array:\n", | |
| "Y = np.ravel(Y)\n", | |
| "Y" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "source": [ | |
| "##Setup Linear Regression Model:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0.72588752748978946" | |
| ] | |
| }, | |
| "execution_count": 39, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "log_model = LogisticRegression()\n", | |
| "log_model.fit(X,Y)\n", | |
| "log_model.score(X,Y)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0.32249450204209867" | |
| ] | |
| }, | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "#Null Error Rate: this shows portion of the sample of women who HAD an affair\n", | |
| "Y.mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "coeff_df = DataFrame(zip(X.columns,np.transpose(log_model.coef_)))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>0</th>\n", | |
| " <th>1</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0 </th>\n", | |
| " <td> rate_marriage</td>\n", | |
| " <td> [-0.697550975882]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1 </th>\n", | |
| " <td> age</td>\n", | |
| " <td> [-0.0561984867354]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2 </th>\n", | |
| " <td> yrs_married</td>\n", | |
| " <td> [0.103719648979]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3 </th>\n", | |
| " <td> children</td>\n", | |
| " <td> [0.0183331025152]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4 </th>\n", | |
| " <td> religious</td>\n", | |
| " <td> [-0.36826644166]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5 </th>\n", | |
| " <td> educ</td>\n", | |
| " <td> [0.00889710350279]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6 </th>\n", | |
| " <td> occ2</td>\n", | |
| " <td> [0.293350539062]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7 </th>\n", | |
| " <td> occ3</td>\n", | |
| " <td> [0.60267240037]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8 </th>\n", | |
| " <td> occ4</td>\n", | |
| " <td> [0.339968445397]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9 </th>\n", | |
| " <td> occ5</td>\n", | |
| " <td> [0.93773215412]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td> occ6</td>\n", | |
| " <td> [0.914040180617]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td> hocc2</td>\n", | |
| " <td> [0.232100401401]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td> hocc3</td>\n", | |
| " <td> [0.336472058641]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td> hocc4</td>\n", | |
| " <td> [0.202172993046]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td> hocc5</td>\n", | |
| " <td> [0.226092515873]</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15</th>\n", | |
| " <td> hocc6</td>\n", | |
| " <td> [0.229179516333]</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 0 1\n", | |
| "0 rate_marriage [-0.697550975882]\n", | |
| "1 age [-0.0561984867354]\n", | |
| "2 yrs_married [0.103719648979]\n", | |
| "3 children [0.0183331025152]\n", | |
| "4 religious [-0.36826644166]\n", | |
| "5 educ [0.00889710350279]\n", | |
| "6 occ2 [0.293350539062]\n", | |
| "7 occ3 [0.60267240037]\n", | |
| "8 occ4 [0.339968445397]\n", | |
| "9 occ5 [0.93773215412]\n", | |
| "10 occ6 [0.914040180617]\n", | |
| "11 hocc2 [0.232100401401]\n", | |
| "12 hocc3 [0.336472058641]\n", | |
| "13 hocc4 [0.202172993046]\n", | |
| "14 hocc5 [0.226092515873]\n", | |
| "15 hocc6 [0.229179516333]" | |
| ] | |
| }, | |
| "execution_count": 41, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# a POSITIVE coefficient = increased likelihood of having an affair\n", | |
| "# a NEGATIVE coefficient = decreased likelihood of having an affair\n", | |
| "coeff_df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Appears the MORE favorable a woman rates her marriage, the LESS likely she is to have an affair,\n", | |
| "the MORE strongly she rates her faith, the LESS likely she is to have an affair.\n", | |
| "\n", | |
| "Next we'll use SciKit learns train/test/split method:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 42, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Below is 75%/25% random split\n", | |
| "X_train, X_test, Y_train, Y_test = train_test_split(X,Y)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", | |
| " intercept_scaling=1, penalty='l2', random_state=None, tol=0.0001)" | |
| ] | |
| }, | |
| "execution_count": 44, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "log_model2 = LogisticRegression()\n", | |
| "log_model2.fit(X_train, Y_train)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 49, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "class_predict = log_model2.predict(X_test)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 50, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "0.717964824121\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "#Use SciKit learn to compare the Y test to class predict \n", | |
| "#If this returns \"1\", it would be perfect accuracy\n", | |
| "print metrics.accuracy_score(Y_test,class_predict)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 2", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.9" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment