Stacking in ML
Stacking Architecture
#!/usr/bin/env python
# coding: utf-8
# In[38]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
# In[8]:
df = pd.read_csv('diabetes.csv')
# In[10]:
X = df.drop(columns = 'Outcome')
y = df['Outcome']
# In[12]:
train, val_train, test, val_test = train_test_split(X,y, random_state = 355, test_size = 0.5 )
# In[13]:
X_train, X_test, y_train, y_test = train_test_split(train, test, test_size = 0.2, random_state = 355)
# In[15]:
knn = KNeighborsClassifier()
# In[17]:
knn.fit(X_train,y_train)
# In[18]:
knn.score(X_train, y_train)
# In[19]:
svm = SVC()
# In[20]:
svm.fit(X_train, y_train)
# In[21]:
svm.score(X_train, y_train)
# In[25]:
predict_val1 = knn.predict(val_train)
predict_val2 = svm.predict(val_train)
# In[27]:
predict_val = np.column_stack((predict_val1, predict_val2))
# In[28]:
predict_val
# In[33]:
predict_test1 = knn.predict(X_test)
predict_test2 = svm.predict(X_test)
# In[34]:
predict_test = np.column_stack((predict_test1, predict_test2))
# In[ ]:
# In[29]:
rand_clf = RandomForestClassifier()
# In[30]:
rand_clf.fit(predict_val, val_test)
# In[35]:
rand_clf.score(predict_test,y_test)
# In[40]:
grid_parm = {
"n_estimators":[90, 100, 115],
"criterion": ['gini', 'entropy'],
'min_samples_leaf':[1,2,3,4,5],
"min_samples_split":[4,5,6,7,8],
"max_features" :['auto', 'log2']
}
# In[45]:
grid_search = GridSearchCV(estimator= rand_clf, param_grid = grid_parm, cv=5, n_jobs = -1, verbose=3)
# In[46]:
grid_search.fit(predict_val, val_test)
# In[47]:
grid_search.best_params_
# In[50]:
rand_clf = RandomForestClassifier(criterion = 'gini', max_features = 'auto', min_samples_leaf=1, min_samples_split=4, n_estimators=90)
# In[51]:
rand_clf.fit(predict_val, val_test)
# In[52]:
rand_clf.score(predict_test, y_test)
# In[ ]:
Comments
Post a Comment