In [663]:
import pandas as pd
def import_data(dataName):
    csv_path_full = dataName
    print (csv_path_full)
    return pd.read_csv(csv_path_full)
In [664]:
totalData = import_data("trim_QOL_dataset.csv")
trim_QOL_dataset.csv
In [665]:
import numpy as np


def split_train_test(data, test_ratio):
    shuffled_indices = np.random.permutation(len(data))
    test_set_size= int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]
In [666]:
train_set, test_set = split_train_test(totalData, 0.2)
#test_set
In [667]:
#testRow = test_set.loc[20]
#testRow
importSet = import_data("activeInputValues.csv")
testRow = importSet.loc[0]
#testRow
#test_set.iloc[] = testRow
activeInputValues.csv
In [668]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import Imputer
from sklearn.preprocessing import StandardScaler
def prep(X):
    imputer = Imputer(missing_values='NaN', strategy='median')
    imputer.fit(X)
    imputed = imputer.transform(X)
    scaler = StandardScaler()
    scaler.fit(imputed)
    prepped= scaler.transform(imputed)
    return prepped
In [669]:
def dataScaler(X):
    
    scaler = StandardScaler()
    scaler.fit(X)
    prepped= scaler.transform(X)
    return prepped
In [670]:
def holderMake(testRow):
    holder = train_set - train_set.iloc[1]
    holder.loc[1] = testRow
   
    #print(holder.loc[1])
    imputer2 = Imputer(missing_values='NaN')
    imputer2.fit(holder)
    holderImp = imputer2.transform(holder)
    holderImp

    scaler = StandardScaler()
    scaler.fit(holderImp)
    preppy = scaler.transform(holderImp)
    #print(holderImp)
    return holderImp
In [703]:
#e prepTopred(X, label, out):
    
    imputer = Imputer(missing_values='NaN')
    imputer.fit(X)
    imputed = imputer.transform(X)
    
    
    scaler = StandardScaler()
    scaler.fit(imputed)
    prepped= scaler.transform(imputed)
    #print(prepped)
    
    found_labels = X[[label]].copy()
    imputer2 =Imputer(missing_values='NaN')
    imputer2.fit(found_labels)
    labelImp = imputer2.transform(found_labels)
    
    from sklearn.model_selection import GridSearchCV

    param_grid = [
    {'n_estimators': [3,10,30, 40, 50], 'max_features':[2,4,6,8, 10, 12, 14, 16]},
    {'bootstrap': [False], 'n_estimators': [3,10, 30, 50, 50], 'max_features':[2,3,4, 6, 8, 10, 12, 14, 16]},    
    ]
    forest_reg2 = RandomForestRegressor()

    grid_search = GridSearchCV(forest_reg2, param_grid, cv=5, scoring='neg_mean_squared_error')

    grid_search.fit(prepped, labelImp.ravel())
    
    #forest_reg = RandomForestRegressor()
    #trained =forest_reg.fit(prepped, labelImp.ravel())
    model = grid_search.best_estimator_
    grid_search.best_params_
    predicted = model.predict(out)
  
    #print(predicted)
    #output = predicted[0]
    return model
In [704]:
preppy = holderMake(testRow)
In [715]:
#de prepTopred(X, label, out):
    
imputer = Imputer(missing_values='NaN')
imputer.fit(train_set)
imputed = imputer.transform(train_set)
    
    
scaler = StandardScaler()
scaler.fit(imputed)
prepped= scaler.transform(imputed)
    #print(prepped)
    
found_labels = train_set[["Employment_Rate_2015"]].copy()
imputer2 =Imputer(missing_values='NaN')
imputer2.fit(found_labels)
labelImp = imputer2.transform(found_labels)
    
from sklearn.model_selection import GridSearchCV

param_grid = [
{'n_estimators': [3,10,30, 40, 50, 60, 70, 80, 90, 100], 'max_features':[2,4,6,8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]},
{'bootstrap': [False], 'n_estimators': [3,10, 30, 40, 50, 60, 70, 80, 90, 100], 'max_features':[2,3,4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]},    
]
forest_reg2 = RandomForestRegressor()

grid_search = GridSearchCV(forest_reg2, param_grid, cv=5, scoring='neg_mean_squared_error')

grid_search.fit(prepped, labelImp.ravel())
    
    
    #forest_reg = RandomForestRegressor()
    #trained =forest_reg.fit(prepped, labelImp.ravel())
    #model = grid_search.best_estimator_
grid_search.best_params_
    #predicted = model.predict(out)
  
    #print(predicted)
    #output = predicted[0]
    #return model
Out[715]:
{'bootstrap': False, 'max_features': 28, 'n_estimators': 70}
In [705]:
#predictedHouseChange = prepTopred(train_set, "lowerHouseChange", preppy)
#predictedComConstruct = prepTopred(train_set, "Commercial_Construction_2015", preppy)
#predictedIncome = prepTopred(train_set, "Household_Income_2015", preppy)
#predictedEmploy = prepTopred(train_set, "Employment_Rate_2015", preppy)
In [ ]:
 
In [2]:
h = float(predictedHouseChange)
c = float(predictedComConstruct)
i = float(predictedIncome)
e = float(predictedEmploy)
IS = h + c + i + e
output = [h, c, i, e, IS]
import json
jdat = json.dumps(output)
#print(jsonData)
with open('modelOutput.json', 'w') as f:
    json.dump(jsonData, f)
    return null  
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-2-f0afb1bbfec3> in <module>()
      1 
----> 2 h = float(predictedHouseChange)
      3 c = float(predictedComConstruct)
      4 i = float(predictedIncome)
      5 e = float(predictedEmploy)

NameError: name 'predictedHouseChange' is not defined
In [3]:
#output = [h, c, i, e, IS]
In [680]:
#Below Values are the  predicted output categories: 
#Change in Median House Value, Commercial Construction, Household Income, Employment rate, and an aggregated score.
#actual values (as seen below): 16379.00, 0.55, 67361.00, 95.00
Out[680]:
[16941.58, 7.216666666666668, 78610.98, 90.78, 95650.55666666667]
In [662]:
#import json

#jdat = json.dumps(output)
#print(jsonData)
"output ="
In [658]:
#with open('modelOutput.json', 'w') as f:
     #json.dump(jsonData, f)
     #json.dump(jdat, f)  
In [681]:
testRow
Out[681]:
Unnamed: 0                               255.00
lowerHouseChange                       16379.00
Commercial_Construction_2015               0.55
Household_Income_2015                  67361.00
Employment_Rate_2015                      95.00
Arts_Participation_2013                   14.00
Arts_Participating_Households_2013       118.00
Neighborhood_Organizations_2014            1.00
Neighborhood_Organizations_2015            1.00
Board_Committee_Participation_2015         0.85
Housing_Density_2011                       1.60
Housing_Density_2013                       1.60
Housing_Density_2015                       1.60
Single_Family_Housing_2015               100.00
Single_Family_Housing_2013               100.00
Single_Family_Housing_2011               100.00
Single_Family_Units_2015                 860.00
Single_Family_Units_2013                 860.00
Single_Family_Units_2011                 860.00
311_Requests_2015                         47.50
311_Requests_2013                         45.80
311_Calls_2015                          1116.00
311_Calls_2013                          1075.00
Voter_Participation_2015                  22.00
Voter_Participation_2014                  54.00
Voter_Participation_2012                  76.00
Voter_Participation_2010                  44.00
Job_Density_2013                           0.10
Job_Density_2012                           0.10
New_Residential_2015                       0.18
                                         ...   
Transit_Proximity_2011                   100.00
Transit_Proximate_Units_2011             860.00
Bicycle_Friendliness_2015                  1.60
Bicycle_Friendliness_2013                  1.70
Bicycle_Friendliness_2011                  1.70
Street_Connectivity_2015                   1.09
Street_Connectivity_2013                   1.12
Street_Connectivity_2011                   1.12
High_School_Diploma_2015                  96.00
Early_Care_Proximity_2015                 47.00
Early_Care_Proximate_Units_2015          405.00
Early_Care_Proximity_2013                 53.00
Early_Care_Proximate_Units_2013          452.00
Early_Care_Proximity_2011                 63.00
Early_Care_Proximate_Units_2011          546.00
SchoolAge_Proximity_2015                  47.00
SchoolAge_Proximate_Units_2015           405.00
SchoolAge_Proximity_2013                  47.00
SchoolAge_Proximate_Units_2013           401.00
SchoolAge_Proximity_2011                  47.00
SchoolAge_Proximate_Units_2011           402.00
Proficiency_Elementary_School_2013        40.50
Proficiency_Middle_School_2014            48.00
Proficiency_Middle_School_2013            38.40
Proficiency_High_School_2014              56.00
Proficiency_High_School_2013              42.90
Neighborhood_School_Attendance_2014       86.00
Neighborhood_School_Attendance_2013       88.00
Neighborhood_School_Attendance_2011       88.00
Job_Density_2014                           0.10
Name: 0, Length: 68, dtype: float64