Skip to content

Instantly share code, notes, and snippets.

@kitmonisit
Last active May 20, 2018 13:24
Show Gist options
  • Save kitmonisit/900affc23e780ac853c439b30fce5cd0 to your computer and use it in GitHub Desktop.
Save kitmonisit/900affc23e780ac853c439b30fce5cd0 to your computer and use it in GitHub Desktop.
R&D Spend Administration Marketing Spend State Profit
165349.2 136897.8 471784.1 New York 192261.83
162597.7 151377.59 443898.53 California 191792.06
153441.51 101145.55 407934.54 Florida 191050.39
144372.41 118671.85 383199.62 New York 182901.99
142107.34 91391.77 366168.42 Florida 166187.94
131876.9 99814.71 362861.36 New York 156991.12
134615.46 147198.87 127716.82 California 156122.51
130298.13 145530.06 323876.68 Florida 155752.6
120542.52 148718.95 311613.29 New York 152211.77
123334.88 108679.17 304981.62 California 149759.96
101913.08 110594.11 229160.95 Florida 146121.95
100671.96 91790.61 249744.55 California 144259.4
93863.75 127320.38 249839.44 Florida 141585.52
91992.39 135495.07 252664.93 California 134307.35
119943.24 156547.42 256512.92 Florida 132602.65
114523.61 122616.84 261776.23 New York 129917.04
78013.11 121597.55 264346.06 California 126992.93
94657.16 145077.58 282574.31 New York 125370.37
91749.16 114175.79 294919.57 Florida 124266.9
86419.7 153514.11 0 New York 122776.86
76253.86 113867.3 298664.47 California 118474.03
78389.47 153773.43 299737.29 New York 111313.02
73994.56 122782.75 303319.26 Florida 110352.25
67532.53 105751.03 304768.73 Florida 108733.99
77044.01 99281.34 140574.81 New York 108552.04
64664.71 139553.16 137962.62 California 107404.34
75328.87 144135.98 134050.07 Florida 105733.54
72107.6 127864.55 353183.81 New York 105008.31
66051.52 182645.56 118148.2 Florida 103282.38
65605.48 153032.06 107138.38 New York 101004.64
61994.48 115641.28 91131.24 Florida 99937.59
61136.38 152701.92 88218.23 New York 97483.56
63408.86 129219.61 46085.25 California 97427.84
55493.95 103057.49 214634.81 Florida 96778.92
46426.07 157693.92 210797.67 California 96712.8
46014.02 85047.44 205517.64 New York 96479.51
28663.76 127056.21 201126.82 Florida 90708.19
44069.95 51283.14 197029.42 California 89949.14
20229.59 65947.93 185265.1 New York 81229.06
38558.51 82982.09 174999.3 California 81005.76
28754.33 118546.05 172795.67 California 78239.91
27892.92 84710.77 164470.71 Florida 77798.83
23640.93 96189.63 148001.11 California 71498.49
15505.73 127382.3 35534.17 New York 69758.98
22177.74 154806.14 28334.72 California 65200.33
1000.23 124153.04 1903.93 New York 64926.08
1315.46 115816.21 297114.46 Florida 49490.75
0 135426.92 0 California 42559.73
542.05 51743.15 0 New York 35673.41
0 116983.8 45173.06 California 14681.4
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>R&amp;D Spend</th>\n",
" <th>Administration</th>\n",
" <th>Marketing Spend</th>\n",
" <th>State</th>\n",
" <th>Profit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>165349.20</td>\n",
" <td>136897.80</td>\n",
" <td>471784.10</td>\n",
" <td>New York</td>\n",
" <td>192261.83</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>162597.70</td>\n",
" <td>151377.59</td>\n",
" <td>443898.53</td>\n",
" <td>California</td>\n",
" <td>191792.06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>153441.51</td>\n",
" <td>101145.55</td>\n",
" <td>407934.54</td>\n",
" <td>Florida</td>\n",
" <td>191050.39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>144372.41</td>\n",
" <td>118671.85</td>\n",
" <td>383199.62</td>\n",
" <td>New York</td>\n",
" <td>182901.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>142107.34</td>\n",
" <td>91391.77</td>\n",
" <td>366168.42</td>\n",
" <td>Florida</td>\n",
" <td>166187.94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" R&D Spend Administration Marketing Spend State Profit\n",
"0 165349.20 136897.80 471784.10 New York 192261.83\n",
"1 162597.70 151377.59 443898.53 California 191792.06\n",
"2 153441.51 101145.55 407934.54 Florida 191050.39\n",
"3 144372.41 118671.85 383199.62 New York 182901.99\n",
"4 142107.34 91391.77 366168.42 Florida 166187.94"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv('./50_Startups.csv')\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>State_Florida</th>\n",
" <th>State_New York</th>\n",
" <th>R&amp;D Spend</th>\n",
" <th>Administration</th>\n",
" <th>Marketing Spend</th>\n",
" <th>Profit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>165349.20</td>\n",
" <td>136897.80</td>\n",
" <td>471784.10</td>\n",
" <td>192261.83</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>162597.70</td>\n",
" <td>151377.59</td>\n",
" <td>443898.53</td>\n",
" <td>191792.06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>153441.51</td>\n",
" <td>101145.55</td>\n",
" <td>407934.54</td>\n",
" <td>191050.39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>144372.41</td>\n",
" <td>118671.85</td>\n",
" <td>383199.62</td>\n",
" <td>182901.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>142107.34</td>\n",
" <td>91391.77</td>\n",
" <td>366168.42</td>\n",
" <td>166187.94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" State_Florida State_New York R&D Spend Administration Marketing Spend \\\n",
"0 0.0 1.0 165349.20 136897.80 471784.10 \n",
"1 0.0 0.0 162597.70 151377.59 443898.53 \n",
"2 1.0 0.0 153441.51 101145.55 407934.54 \n",
"3 0.0 1.0 144372.41 118671.85 383199.62 \n",
"4 1.0 0.0 142107.34 91391.77 366168.42 \n",
"\n",
" Profit \n",
"0 192261.83 \n",
"1 191792.06 \n",
"2 191050.39 \n",
"3 182901.99 \n",
"4 166187.94 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn_pandas import DataFrameMapper\n",
"from sklearn.preprocessing import LabelBinarizer\n",
"mapper = DataFrameMapper(\n",
" [('State',\n",
" LabelBinarizer()\n",
" )\n",
" ],\n",
" default=None,\n",
" df_out=True\n",
" )\n",
"df = mapper.fit_transform(df)\n",
"df = df[df.columns[1:]]\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>State_Florida</th>\n",
" <th>State_New York</th>\n",
" <th>R&amp;D Spend</th>\n",
" <th>Administration</th>\n",
" <th>Marketing Spend</th>\n",
" <th>Profit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>165349.20</td>\n",
" <td>136897.80</td>\n",
" <td>471784.10</td>\n",
" <td>192261.83</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>162597.70</td>\n",
" <td>151377.59</td>\n",
" <td>443898.53</td>\n",
" <td>191792.06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>144372.41</td>\n",
" <td>118671.85</td>\n",
" <td>383199.62</td>\n",
" <td>182901.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>131876.90</td>\n",
" <td>99814.71</td>\n",
" <td>362861.36</td>\n",
" <td>156991.12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>134615.46</td>\n",
" <td>147198.87</td>\n",
" <td>127716.82</td>\n",
" <td>156122.51</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" State_Florida State_New York R&D Spend Administration Marketing Spend \\\n",
"0 0.0 1.0 165349.20 136897.80 471784.10 \n",
"1 0.0 0.0 162597.70 151377.59 443898.53 \n",
"3 0.0 1.0 144372.41 118671.85 383199.62 \n",
"5 0.0 1.0 131876.90 99814.71 362861.36 \n",
"6 0.0 0.0 134615.46 147198.87 127716.82 \n",
"\n",
" Profit \n",
"0 192261.83 \n",
"1 191792.06 \n",
"3 182901.99 \n",
"5 156991.12 \n",
"6 156122.51 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"idx_train, idx_test = train_test_split(\n",
" df.index,\n",
" test_size=0.2,\n",
" random_state=0)\n",
"df_train = df.iloc[idx_train].sort_index()\n",
"df_test = df.iloc[idx_test].sort_index()\n",
"df_train.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/kit/UNIX/virtualenvs/engg/lib/python3.6/site-packages/sklearn/linear_model/base.py:509: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.\n",
" linalg.lstsq(X, y)\n"
]
},
{
"data": {
"text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"regressor = LinearRegression()\n",
"regressor.fit(\n",
" df_train[df.columns[:-1]].values,\n",
" df_train[[df.columns[-1]]].values)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[178537.48221054],\n",
" [167921.0656955 ],\n",
" [132447.73845175],\n",
" [132582.27760815],\n",
" [113969.43533012],\n",
" [116161.24230165],\n",
" [103015.20159797],\n",
" [ 98791.73374688],\n",
" [ 67851.69209676],\n",
" [ 71976.09851258]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_pred = regressor.predict(\n",
" df_test[df.columns[:-1]].values)\n",
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Profit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>191050.39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>166187.94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>146121.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>144259.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>110352.25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>105008.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>103282.38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>97483.56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>81229.06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>77798.83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Profit\n",
"2 191050.39\n",
"4 166187.94\n",
"10 146121.95\n",
"11 144259.40\n",
"22 110352.25\n",
"27 105008.31\n",
"28 103282.38\n",
"31 97483.56\n",
"38 81229.06\n",
"41 77798.83"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_test[['Profit']]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Ones</th>\n",
" <th>State_Florida</th>\n",
" <th>State_New York</th>\n",
" <th>R&amp;D Spend</th>\n",
" <th>Administration</th>\n",
" <th>Marketing Spend</th>\n",
" <th>Profit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>165349.20</td>\n",
" <td>136897.80</td>\n",
" <td>471784.10</td>\n",
" <td>192261.83</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>162597.70</td>\n",
" <td>151377.59</td>\n",
" <td>443898.53</td>\n",
" <td>191792.06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>153441.51</td>\n",
" <td>101145.55</td>\n",
" <td>407934.54</td>\n",
" <td>191050.39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>144372.41</td>\n",
" <td>118671.85</td>\n",
" <td>383199.62</td>\n",
" <td>182901.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>142107.34</td>\n",
" <td>91391.77</td>\n",
" <td>366168.42</td>\n",
" <td>166187.94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Ones State_Florida State_New York R&D Spend Administration \\\n",
"0 1 0.0 1.0 165349.20 136897.80 \n",
"1 1 0.0 0.0 162597.70 151377.59 \n",
"2 1 1.0 0.0 153441.51 101145.55 \n",
"3 1 0.0 1.0 144372.41 118671.85 \n",
"4 1 1.0 0.0 142107.34 91391.77 \n",
"\n",
" Marketing Spend Profit \n",
"0 471784.10 192261.83 \n",
"1 443898.53 191792.06 \n",
"2 407934.54 191050.39 \n",
"3 383199.62 182901.99 \n",
"4 366168.42 166187.94 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import statsmodels.formula.api as sm\n",
"df.insert(0, 'Ones', 1)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: y R-squared: 0.947\n",
"Model: OLS Adj. R-squared: 0.945\n",
"Method: Least Squares F-statistic: 849.8\n",
"Date: Sun, 20 May 2018 Prob (F-statistic): 3.50e-32\n",
"Time: 21:12:28 Log-Likelihood: -527.44\n",
"No. Observations: 50 AIC: 1059.\n",
"Df Residuals: 48 BIC: 1063.\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Ones 4.903e+04 2537.897 19.320 0.000 4.39e+04 5.41e+04\n",
"R&D Spend 0.8543 0.029 29.151 0.000 0.795 0.913\n",
"==============================================================================\n",
"Omnibus: 13.727 Durbin-Watson: 1.116\n",
"Prob(Omnibus): 0.001 Jarque-Bera (JB): 18.536\n",
"Skew: -0.911 Prob(JB): 9.44e-05\n",
"Kurtosis: 5.361 Cond. No. 1.65e+05\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 1.65e+05. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n"
]
}
],
"source": [
"def backwardElimination(df, SL):\n",
" df_opt = df.copy()\n",
" inVars = df.columns.tolist()[:-2]\n",
" numVars = len(inVars)\n",
" while True:\n",
" regressor = sm.OLS(\n",
" endog=df_opt[['Profit']].values,\n",
" exog=df_opt[inVars]).fit()\n",
" if regressor.pvalues.max() < SL:\n",
" break\n",
" inVars.remove(regressor.pvalues.idxmax())\n",
" cols = inVars\n",
" cols.append(df.columns[-1])\n",
" print(regressor.summary())\n",
" return df_opt[cols]\n",
"\n",
"df_opt = backwardElimination(df, 0.05)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[104667.27805998],\n",
" [134150.83410578],\n",
" [135207.80019517],\n",
" [ 72170.54428856],\n",
" [179090.58602508],\n",
" [109824.77386586],\n",
" [ 65644.27773757],\n",
" [100481.43277139],\n",
" [111431.75202432],\n",
" [169438.14843539]])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"idx_train, idx_test = train_test_split(\n",
" df_opt.index,\n",
" test_size=0.2,\n",
" random_state=0)\n",
"\n",
"df_train = df_opt.iloc[idx_train]\n",
"df_test = df_opt.iloc[idx_test]\n",
"\n",
"regressor = LinearRegression()\n",
"regressor.fit(\n",
" df_train[df_train.columns[:-1]].values,\n",
" df_train[df_train.columns[-1:]].values)\n",
"\n",
"regressor.predict(\n",
" df_test[df_test.columns[:-1]])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Profit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>103282.38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>144259.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>146121.95</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>77798.83</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>191050.39</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>105008.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>81229.06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>97483.56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>110352.25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>166187.94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Profit\n",
"28 103282.38\n",
"11 144259.40\n",
"10 146121.95\n",
"41 77798.83\n",
"2 191050.39\n",
"27 105008.31\n",
"38 81229.06\n",
"31 97483.56\n",
"22 110352.25\n",
"4 166187.94"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_test[df_test.columns[-1:]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment