SlideShare a Scribd company logo
1 of 14
Download to read offline
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 1/14
In [1]:
In [2]:
In [3]:
train set shape: (56000, 52)
test set shape: (24000, 51)
import pandas as pd
import numpy as np
from pandas import DataFrame
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import catboost
from rgf.sklearn import RGFClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
train=pd.read_csv("Datasets/2020 Qualifying competition/Train.csv")
test=pd.read_csv("Datasets/2020 Qualifying competition/Test.csv")
print("train set shape:",train.shape)
print("test set shape:",test.shape)
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 2/14
In [4]:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56000 entries, 0 to 55999
Data columns (total 52 columns):
Applicant_ID 56000 non-null object
form_field1 53471 non-null float64
form_field2 52156 non-null float64
form_field3 55645 non-null float64
form_field4 55645 non-null float64
form_field5 55645 non-null float64
form_field6 42640 non-null float64
form_field7 50837 non-null float64
form_field8 42640 non-null float64
form_field9 47992 non-null float64
form_field10 55645 non-null float64
form_field11 24579 non-null float64
form_field12 46105 non-null float64
form_field13 50111 non-null float64
form_field14 56000 non-null int64
form_field15 33525 non-null float64
form_field16 42964 non-null float64
form_field17 44849 non-null float64
form_field18 45598 non-null float64
form_field19 55996 non-null float64
form_field20 55645 non-null float64
form_field21 40146 non-null float64
form_field22 35600 non-null float64
form_field23 27877 non-null float64
form_field24 42703 non-null float64
form_field25 50550 non-null float64
form_field26 48562 non-null float64
form_field27 46701 non-null float64
form_field28 55645 non-null float64
form_field29 55645 non-null float64
form_field30 30491 non-null float64
form_field31 16592 non-null float64
form_field32 50550 non-null float64
form_field33 54744 non-null float64
form_field34 55645 non-null float64
form_field35 32852 non-null float64
form_field36 54005 non-null float64
form_field37 50550 non-null float64
form_field38 55645 non-null float64
form_field39 51789 non-null float64
form_field40 12271 non-null float64
form_field41 17771 non-null float64
form_field42 54677 non-null float64
form_field43 55432 non-null float64
form_field44 50617 non-null float64
form_field45 24683 non-null float64
form_field46 40096 non-null float64
form_field47 56000 non-null object
form_field48 35111 non-null float64
form_field49 55645 non-null float64
# Information on each of the columns in the dataset
train.info()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 3/14
form_field50 44944 non-null float64
default_status 56000 non-null object
dtypes: float64(48), int64(1), object(3)
memory usage: 22.2+ MB
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 4/14
In [5]:
Out[5]: Applicant_ID object
form_field1 float64
form_field2 float64
form_field3 float64
form_field4 float64
form_field5 float64
form_field6 float64
form_field7 float64
form_field8 float64
form_field9 float64
form_field10 float64
form_field11 float64
form_field12 float64
form_field13 float64
form_field14 int64
form_field15 float64
form_field16 float64
form_field17 float64
form_field18 float64
form_field19 float64
form_field20 float64
form_field21 float64
form_field22 float64
form_field23 float64
form_field24 float64
form_field25 float64
form_field26 float64
form_field27 float64
form_field28 float64
form_field29 float64
form_field30 float64
form_field31 float64
form_field32 float64
form_field33 float64
form_field34 float64
form_field35 float64
form_field36 float64
form_field37 float64
form_field38 float64
form_field39 float64
form_field40 float64
form_field41 float64
form_field42 float64
form_field43 float64
form_field44 float64
form_field45 float64
form_field46 float64
form_field47 object
form_field48 float64
form_field49 float64
form_field50 float64
dtype: object
test.dtypes
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 5/14
In [6]:
Out[6]:
Applicant_ID form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_f
0 Apcnt_1000000 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689
1 Apcnt_1000004 3456.0 0.67400 0.2342 0.0 0.000 0.0 898
2 Apcnt_1000008 3276.0 0.53845 3.1510 0.0 6.282 NaN 956
3 Apcnt_1000012 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044
4 Apcnt_1000016 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214
5 rows × 52 columns
train.head()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 6/14
In [7]:
Out[7]: Applicant_ID object
form_field1 float64
form_field2 float64
form_field3 float64
form_field4 float64
form_field5 float64
form_field6 float64
form_field7 float64
form_field8 float64
form_field9 float64
form_field10 float64
form_field11 float64
form_field12 float64
form_field13 float64
form_field14 int64
form_field15 float64
form_field16 float64
form_field17 float64
form_field18 float64
form_field19 float64
form_field20 float64
form_field21 float64
form_field22 float64
form_field23 float64
form_field24 float64
form_field25 float64
form_field26 float64
form_field27 float64
form_field28 float64
form_field29 float64
form_field30 float64
form_field31 float64
form_field32 float64
form_field33 float64
form_field34 float64
form_field35 float64
form_field36 float64
form_field37 float64
form_field38 float64
form_field39 float64
form_field40 float64
form_field41 float64
form_field42 float64
form_field43 float64
form_field44 float64
form_field45 float64
form_field46 float64
form_field47 object
form_field48 float64
form_field49 float64
form_field50 float64
default_status object
dtype: object
train.dtypes
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 7/14
In [8]:
Out[8]: Applicant_ID 0
form_field1 2529
form_field2 3844
form_field3 355
form_field4 355
form_field5 355
form_field6 13360
form_field7 5163
form_field8 13360
form_field9 8008
form_field10 355
form_field11 31421
form_field12 9895
form_field13 5889
form_field14 0
form_field15 22475
form_field16 13036
form_field17 11151
form_field18 10402
form_field19 4
form_field20 355
form_field21 15854
form_field22 20400
form_field23 28123
form_field24 13297
form_field25 5450
form_field26 7438
form_field27 9299
form_field28 355
form_field29 355
form_field30 25509
form_field31 39408
form_field32 5450
form_field33 1256
form_field34 355
form_field35 23148
form_field36 1995
form_field37 5450
form_field38 355
form_field39 4211
form_field40 43729
form_field41 38229
form_field42 1323
form_field43 568
form_field44 5383
form_field45 31317
form_field46 15904
form_field47 0
form_field48 20889
form_field49 355
form_field50 11056
default_status 0
dtype: int64
# Exploring the number of missing values(NaN) in each column of the dataset
train.isnull().sum()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 8/14
In [9]:
In [10]:
Out[10]: Applicant_ID 0
form_field1 0
form_field2 0
form_field3 0
form_field4 0
form_field5 0
form_field6 0
form_field7 0
form_field8 0
form_field9 0
form_field10 0
form_field11 0
form_field12 0
form_field13 0
form_field14 0
form_field15 0
form_field16 0
form_field17 0
form_field18 0
f fi ld19 0
#fill empty cell(NaN data)with -999
train=train.fillna(-999)
test= test.fillna(-999)
test.isnull().sum()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 9/14
In [11]:
Out[11]: Applicant_ID 0
form_field1 0
form_field2 0
form_field3 0
form_field4 0
form_field5 0
form_field6 0
form_field7 0
form_field8 0
form_field9 0
form_field10 0
form_field11 0
form_field12 0
form_field13 0
form_field14 0
form_field15 0
form_field16 0
form_field17 0
form_field18 0
form_field19 0
form_field20 0
form_field21 0
form_field22 0
form_field23 0
form_field24 0
form_field25 0
form_field26 0
form_field27 0
form_field28 0
form_field29 0
form_field30 0
form_field31 0
form_field32 0
form_field33 0
form_field34 0
form_field35 0
form_field36 0
form_field37 0
form_field38 0
form_field39 0
form_field40 0
form_field41 0
form_field42 0
form_field43 0
form_field44 0
form_field45 0
form_field46 0
form_field47 0
form_field48 0
form_field49 0
form_field50 0
default_status 0
dtype: int64
train.isnull().sum()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 10/14
In [12]:
In [13]:
In [14]:
In [15]:
Out[12]:
Applicant_ID form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_f
0 Apcnt_1000000 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689
1 Apcnt_1000004 3456.0 0.67400 0.2342 0.0 0.000 0.0 898
2 Apcnt_1000008 3276.0 0.53845 3.1510 0.0 6.282 -9999.0 956
3 Apcnt_1000012 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044
4 Apcnt_1000016 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214
5 rows × 52 columns
Out[15]: 0 0
1 0
2 1
3 0
4 0
..
55995 0
55996 1
55997 0
55998 0
55999 0
Name: default_status, Length: 56000, dtype: int32
train.head()
# Encoding Categorical features
encoder= ('form_field47', 'default_status')
for x in encoder:
le= LabelEncoder()
train[x] =le.fit_transform(train[x].values)
test['form_field47']=le.fit_transform(test['form_field47'].values)
y= train["default_status"]
y
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 11/14
In [16]:
In [17]:
In [18]:
In [19]:
In [20]:
In [21]:
In [23]:
In [24]:
Out[16]:
form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_field7 form_field
0 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689720.0 252072
1 3456.0 0.67400 0.2342 0.0 0.000 0.0 898979.0 497531
2 3276.0 0.53845 3.1510 0.0 6.282 -9999.0 956940.0 -9999
3 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044703.0 385499
4 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214728.0 214728
5 rows × 50 columns
# drop appropriate column
train= train.drop(["Applicant_ID","default_status"], axis=1)
test= test.drop(["Applicant_ID"], axis=1)
train.head()
robust= MinMaxScaler()
scaledtrain= robust.fit_transform(train)
scaledtrain= DataFrame(scaledtrain)
scaledtest= robust.transform(test)
scaledtest= DataFrame(scaledtest)
# split input(x) and output(y) data
#from sklearn.model_selection import train_test_split
#X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.3,
#stratify=y,random_state=42)
params= {'n_estimators': 2500,'learning_rate':0.01,'objective': 'CrossEntropy','e
'random_seed': 3500,'verbose': False}
fold= StratifiedKFold(n_splits= 15, shuffle= True,random_state= 999)
from catboost import CatBoostClassifier
model1= CatBoostClassifier(random_state= 499, verbose= False)
model2= RGFClassifier(max_leaf= 1500)
model3= LGBMClassifier(n_estimators= 900, random_state= 499)
localscore, testscore= [], []
model= VotingClassifier(estimators= [('model1', model1), ('model2', model2), ('mo
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 12/14
In [25]:
#########################fold0
[Voting] ................... (1 of 3) Processing model1, total= 36.0s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.1s
#########################fold1
[Voting] ................... (1 of 3) Processing model1, total= 37.4s
[Voting] ................... (2 of 3) Processing model2, total= 1.8min
[Voting] ................... (3 of 3) Processing model3, total= 10.2s
#########################fold2
[Voting] ................... (1 of 3) Processing model1, total= 36.4s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.2s
#########################fold3
[Voting] ................... (1 of 3) Processing model1, total= 38.4s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.2s
#########################fold4
[Voting] ................... (1 of 3) Processing model1, total= 37.6s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 12.2s
#########################fold5
[Voting] ................... (1 of 3) Processing model1, total= 35.5s
[Voting] ................... (2 of 3) Processing model2, total= 1.8min
[Voting] ................... (3 of 3) Processing model3, total= 10.1s
#########################fold6
[Voting] ................... (1 of 3) Processing model1, total= 37.6s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.2s
#########################fold7
[Voting] ................... (1 of 3) Processing model1, total= 35.9s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.3s
#########################fold8
[Voting] ................... (1 of 3) Processing model1, total= 37.4s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.2s
#########################fold9
[Voting] ................... (1 of 3) Processing model1, total= 37.4s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.1s
#########################fold10
for i, (trainval, valvalue) in enumerate(fold.split(scaledtrain, y)):
print( '#' * 25+ 'fold'+ str(i))
train_x, val_x= scaledtrain.iloc[trainval], scaledtrain.iloc[valvalue]
train_y, val_y= y.iloc[trainval], y.iloc[valvalue]
model.fit(train_x, train_y)
pred= model.predict_proba(val_x)[:,-1]
result= roc_auc_score(val_y, pred)
localscore.append(result)
testpred= model.predict_proba(scaledtest)[:,-1]
testscore.append(testpred)
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 13/14
In [26]:
In [28]:
[Voting] ................... (1 of 3) Processing model1, total= 35.9s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.3s
#########################fold11
[Voting] ................... (1 of 3) Processing model1, total= 37.7s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 12.6s
#########################fold12
[Voting] ................... (1 of 3) Processing model1, total= 35.8s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.4s
#########################fold13
[Voting] ................... (1 of 3) Processing model1, total= 37.1s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.0s
#########################fold14
[Voting] ................... (1 of 3) Processing model1, total= 36.2s
[Voting] ................... (2 of 3) Processing model2, total= 1.7min
[Voting] ................... (3 of 3) Processing model3, total= 10.6s
0.84036499050012
Out[28]:
0 1 2 3 4 5 6 7 8
0 0.837618 0.835608 0.837453 0.856249 0.829797 0.84765 0.836926 0.84438 0.823843 0.85009
print(np.mean(localscore))
output= pd.DataFrame(localscore).T
output.head()
03/10/2020 DSN20 - Jupyter Notebook
localhost:8888/notebooks/Desktop/DSN20.ipynb 14/14
In [29]:
In [33]:
In [34]:
In [ ]:
Out[29]:
Applicant_ID default_status
0 Apcnt_1000032 1
1 Apcnt_1000048 1
2 Apcnt_1000052 1
3 Apcnt_1000076 1
4 Apcnt_1000080 1
... ... ...
23995 Apcnt_999940 1
23996 Apcnt_999956 1
23997 Apcnt_999976 1
23998 Apcnt_999984 1
23999 Apcnt_999992 1
24000 rows × 2 columns
submission= pd.read_csv('Datasets/2020 Qualifying Competition/SampleSubmission.cs
submission
test_pred= np.mean(testscore, axis=0)
submission['default_status']= test_pred
submission.to_csv('grace.csv', index= False)

More Related Content

What's hot

FP305 data structure PAPER FINAL SEM 3
FP305 data structure PAPER FINAL SEM 3FP305 data structure PAPER FINAL SEM 3
FP305 data structure PAPER FINAL SEM 3Syahriha Ruslan
 
Computer science-2010-cbse-question-paper
Computer science-2010-cbse-question-paperComputer science-2010-cbse-question-paper
Computer science-2010-cbse-question-paperDeepak Singh
 
Question Paper Code 065 informatic Practice New CBSE - 2021
Question Paper Code 065 informatic Practice New CBSE - 2021 Question Paper Code 065 informatic Practice New CBSE - 2021
Question Paper Code 065 informatic Practice New CBSE - 2021 FarhanAhmade
 
Sp 1418794917
Sp 1418794917Sp 1418794917
Sp 1418794917lakshmi r
 
Sample Paper 2 Class XI (Computer Science)
Sample Paper 2 Class XI (Computer Science)Sample Paper 2 Class XI (Computer Science)
Sample Paper 2 Class XI (Computer Science)Poonam Chopra
 
FINAL PAPER FP304 DATABASE SYSTEM
FINAL PAPER FP304 DATABASE SYSTEMFINAL PAPER FP304 DATABASE SYSTEM
FINAL PAPER FP304 DATABASE SYSTEMAmira Dolce Farhana
 
C Programming Training In Ambala ! BATRA COMPUTER CENTRE
C Programming Training In Ambala ! BATRA COMPUTER CENTREC Programming Training In Ambala ! BATRA COMPUTER CENTRE
C Programming Training In Ambala ! BATRA COMPUTER CENTREjatin batra
 
Cbse question-paper-computer-science-2009
Cbse question-paper-computer-science-2009Cbse question-paper-computer-science-2009
Cbse question-paper-computer-science-2009Deepak Singh
 
Sample Paper Class XI (Informatics Practices)
Sample Paper Class XI (Informatics Practices)Sample Paper Class XI (Informatics Practices)
Sample Paper Class XI (Informatics Practices)Poonam Chopra
 
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...FarhanAhmade
 
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEM
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEMFINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEM
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEMAmira Dolce Farhana
 
Computer Science Sample Paper 2015
Computer Science Sample Paper 2015Computer Science Sample Paper 2015
Computer Science Sample Paper 2015Poonam Chopra
 
Fp304 DATABASE SYSTEM JUNE 2012
Fp304   DATABASE SYSTEM JUNE 2012Fp304   DATABASE SYSTEM JUNE 2012
Fp304 DATABASE SYSTEM JUNE 2012Syahriha Ruslan
 
FP304 DATABASE SYSTEM FINAL PAPER
FP304    DATABASE SYSTEM FINAL PAPERFP304    DATABASE SYSTEM FINAL PAPER
FP304 DATABASE SYSTEM FINAL PAPERSyahriha Ruslan
 
Computer science ms
Computer science msComputer science ms
Computer science msB Bhuvanesh
 

What's hot (20)

FP305 data structure PAPER FINAL SEM 3
FP305 data structure PAPER FINAL SEM 3FP305 data structure PAPER FINAL SEM 3
FP305 data structure PAPER FINAL SEM 3
 
Computer science-2010-cbse-question-paper
Computer science-2010-cbse-question-paperComputer science-2010-cbse-question-paper
Computer science-2010-cbse-question-paper
 
Question Paper Code 065 informatic Practice New CBSE - 2021
Question Paper Code 065 informatic Practice New CBSE - 2021 Question Paper Code 065 informatic Practice New CBSE - 2021
Question Paper Code 065 informatic Practice New CBSE - 2021
 
Sp 1418794917
Sp 1418794917Sp 1418794917
Sp 1418794917
 
Sample Paper 2 Class XI (Computer Science)
Sample Paper 2 Class XI (Computer Science)Sample Paper 2 Class XI (Computer Science)
Sample Paper 2 Class XI (Computer Science)
 
FINAL PAPER FP304 DATABASE SYSTEM
FINAL PAPER FP304 DATABASE SYSTEMFINAL PAPER FP304 DATABASE SYSTEM
FINAL PAPER FP304 DATABASE SYSTEM
 
C Programming Training In Ambala ! BATRA COMPUTER CENTRE
C Programming Training In Ambala ! BATRA COMPUTER CENTREC Programming Training In Ambala ! BATRA COMPUTER CENTRE
C Programming Training In Ambala ! BATRA COMPUTER CENTRE
 
Cbse question-paper-computer-science-2009
Cbse question-paper-computer-science-2009Cbse question-paper-computer-science-2009
Cbse question-paper-computer-science-2009
 
Sample Paper Class XI (Informatics Practices)
Sample Paper Class XI (Informatics Practices)Sample Paper Class XI (Informatics Practices)
Sample Paper Class XI (Informatics Practices)
 
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...Informatics Practices (new) solution CBSE  2021, Compartment,  improvement ex...
Informatics Practices (new) solution CBSE 2021, Compartment, improvement ex...
 
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEM
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEMFINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEM
FINAL PAPER FP501 OPEN SOURCE OPERATING SYSTEM
 
Computer Science Sample Paper 2015
Computer Science Sample Paper 2015Computer Science Sample Paper 2015
Computer Science Sample Paper 2015
 
Fp304 DATABASE SYSTEM JUNE 2012
Fp304   DATABASE SYSTEM JUNE 2012Fp304   DATABASE SYSTEM JUNE 2012
Fp304 DATABASE SYSTEM JUNE 2012
 
FP304 DATABASE SYSTEM FINAL PAPER
FP304    DATABASE SYSTEM FINAL PAPERFP304    DATABASE SYSTEM FINAL PAPER
FP304 DATABASE SYSTEM FINAL PAPER
 
Computer science ms
Computer science msComputer science ms
Computer science ms
 
7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...
7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...
7th Semester (Dec-2015; Jan-2016) Computer Science and Information Science En...
 
7th Semester Information Science (2013-June) Question Papers
7th Semester Information Science (2013-June) Question Papers 7th Semester Information Science (2013-June) Question Papers
7th Semester Information Science (2013-June) Question Papers
 
Oops qb cse
Oops qb cseOops qb cse
Oops qb cse
 
7th semester Computer Science and Information Science Engg (2013 December) Qu...
7th semester Computer Science and Information Science Engg (2013 December) Qu...7th semester Computer Science and Information Science Engg (2013 December) Qu...
7th semester Computer Science and Information Science Engg (2013 December) Qu...
 
5th Semester (June; July-2015) Computer Science and Information Science Engin...
5th Semester (June; July-2015) Computer Science and Information Science Engin...5th Semester (June; July-2015) Computer Science and Information Science Engin...
5th Semester (June; July-2015) Computer Science and Information Science Engin...
 

Similar to Loan-defaulters-predictions(Python codes)

Open SQL & Internal Table
Open SQL & Internal TableOpen SQL & Internal Table
Open SQL & Internal Tablesapdocs. info
 
03 abap3-090715081232-phpapp01-100511101016-phpapp02
03 abap3-090715081232-phpapp01-100511101016-phpapp0203 abap3-090715081232-phpapp01-100511101016-phpapp02
03 abap3-090715081232-phpapp01-100511101016-phpapp02tabish
 
03 abap3-090715081232-phpapp01
03 abap3-090715081232-phpapp0103 abap3-090715081232-phpapp01
03 abap3-090715081232-phpapp01wingsrai
 
Debug Information And Where They Come From
Debug Information And Where They Come FromDebug Information And Where They Come From
Debug Information And Where They Come FromMin-Yih Hsu
 
R getting spatial
R getting spatialR getting spatial
R getting spatialFAO
 
Cics cheat sheet
Cics cheat sheetCics cheat sheet
Cics cheat sheetRafi Shaik
 
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...Kamiya Toshihiro
 
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeks
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeksBeginning direct3d gameprogramming05_thebasics_20160421_jintaeks
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeksJinTaek Seo
 
JVM code reading -- C2
JVM code reading -- C2JVM code reading -- C2
JVM code reading -- C2ytoshima
 
Pandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with codePandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with codeTim Hong
 
Spring 2014 CSCI 111 Final exam of 1 61. (2 points) Fl.docx
Spring 2014 CSCI 111 Final exam   of 1 61. (2 points) Fl.docxSpring 2014 CSCI 111 Final exam   of 1 61. (2 points) Fl.docx
Spring 2014 CSCI 111 Final exam of 1 61. (2 points) Fl.docxrafbolet0
 
Declaring friend function with inline code
Declaring friend function with inline codeDeclaring friend function with inline code
Declaring friend function with inline codeRajeev Sharan
 
Cassandra Hadoop Integration at HUG France by Piotr Kołaczkowski
Cassandra Hadoop Integration at HUG France by Piotr KołaczkowskiCassandra Hadoop Integration at HUG France by Piotr Kołaczkowski
Cassandra Hadoop Integration at HUG France by Piotr KołaczkowskiModern Data Stack France
 
Part II: LLVM Intermediate Representation
Part II: LLVM Intermediate RepresentationPart II: LLVM Intermediate Representation
Part II: LLVM Intermediate RepresentationWei-Ren Chen
 

Similar to Loan-defaulters-predictions(Python codes) (20)

Open SQL & Internal Table
Open SQL & Internal TableOpen SQL & Internal Table
Open SQL & Internal Table
 
03 abap3-090715081232-phpapp01-100511101016-phpapp02
03 abap3-090715081232-phpapp01-100511101016-phpapp0203 abap3-090715081232-phpapp01-100511101016-phpapp02
03 abap3-090715081232-phpapp01-100511101016-phpapp02
 
03 abap3-090715081232-phpapp01
03 abap3-090715081232-phpapp0103 abap3-090715081232-phpapp01
03 abap3-090715081232-phpapp01
 
Debug Information And Where They Come From
Debug Information And Where They Come FromDebug Information And Where They Come From
Debug Information And Where They Come From
 
alexnet.pdf
alexnet.pdfalexnet.pdf
alexnet.pdf
 
R getting spatial
R getting spatialR getting spatial
R getting spatial
 
Cics cheat sheet
Cics cheat sheetCics cheat sheet
Cics cheat sheet
 
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...
An Execution-Semantic and Content-and-Context-Based Code-Clone Detection and ...
 
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeks
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeksBeginning direct3d gameprogramming05_thebasics_20160421_jintaeks
Beginning direct3d gameprogramming05_thebasics_20160421_jintaeks
 
10. R getting spatial
10.  R getting spatial10.  R getting spatial
10. R getting spatial
 
JVM code reading -- C2
JVM code reading -- C2JVM code reading -- C2
JVM code reading -- C2
 
Cs practical file
Cs practical fileCs practical file
Cs practical file
 
Pandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with codePandas+postgre sql 實作 with code
Pandas+postgre sql 實作 with code
 
Spring 2014 CSCI 111 Final exam of 1 61. (2 points) Fl.docx
Spring 2014 CSCI 111 Final exam   of 1 61. (2 points) Fl.docxSpring 2014 CSCI 111 Final exam   of 1 61. (2 points) Fl.docx
Spring 2014 CSCI 111 Final exam of 1 61. (2 points) Fl.docx
 
Boosting Developer Productivity with Clang
Boosting Developer Productivity with ClangBoosting Developer Productivity with Clang
Boosting Developer Productivity with Clang
 
KPMG - TASK 1.pdf
KPMG - TASK 1.pdfKPMG - TASK 1.pdf
KPMG - TASK 1.pdf
 
CBSE 12 ip 2018 sample paper
CBSE 12 ip 2018 sample paperCBSE 12 ip 2018 sample paper
CBSE 12 ip 2018 sample paper
 
Declaring friend function with inline code
Declaring friend function with inline codeDeclaring friend function with inline code
Declaring friend function with inline code
 
Cassandra Hadoop Integration at HUG France by Piotr Kołaczkowski
Cassandra Hadoop Integration at HUG France by Piotr KołaczkowskiCassandra Hadoop Integration at HUG France by Piotr Kołaczkowski
Cassandra Hadoop Integration at HUG France by Piotr Kołaczkowski
 
Part II: LLVM Intermediate Representation
Part II: LLVM Intermediate RepresentationPart II: LLVM Intermediate Representation
Part II: LLVM Intermediate Representation
 

Recently uploaded

Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...MyIntelliSource, Inc.
 
DNT_Corporate presentation know about us
DNT_Corporate presentation know about usDNT_Corporate presentation know about us
DNT_Corporate presentation know about usDynamic Netsoft
 
Building Real-Time Data Pipelines: Stream & Batch Processing workshop Slide
Building Real-Time Data Pipelines: Stream & Batch Processing workshop SlideBuilding Real-Time Data Pipelines: Stream & Batch Processing workshop Slide
Building Real-Time Data Pipelines: Stream & Batch Processing workshop SlideChristina Lin
 
5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdf5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdfWave PLM
 
HR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.comHR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.comFatema Valibhai
 
Unlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language ModelsUnlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language Modelsaagamshah0812
 
Software Quality Assurance Interview Questions
Software Quality Assurance Interview QuestionsSoftware Quality Assurance Interview Questions
Software Quality Assurance Interview QuestionsArshad QA
 
Hand gesture recognition PROJECT PPT.pptx
Hand gesture recognition PROJECT PPT.pptxHand gesture recognition PROJECT PPT.pptx
Hand gesture recognition PROJECT PPT.pptxbodapatigopi8531
 
Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...
Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...
Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...kellynguyen01
 
How To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsHow To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsAndolasoft Inc
 
Advancing Engineering with AI through the Next Generation of Strategic Projec...
Advancing Engineering with AI through the Next Generation of Strategic Projec...Advancing Engineering with AI through the Next Generation of Strategic Projec...
Advancing Engineering with AI through the Next Generation of Strategic Projec...OnePlan Solutions
 
Cloud Management Software Platforms: OpenStack
Cloud Management Software Platforms: OpenStackCloud Management Software Platforms: OpenStack
Cloud Management Software Platforms: OpenStackVICTOR MAESTRE RAMIREZ
 
Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...
Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...
Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...harshavardhanraghave
 
Project Based Learning (A.I).pptx detail explanation
Project Based Learning (A.I).pptx detail explanationProject Based Learning (A.I).pptx detail explanation
Project Based Learning (A.I).pptx detail explanationkaushalgiri8080
 
Der Spagat zwischen BIAS und FAIRNESS (2024)
Der Spagat zwischen BIAS und FAIRNESS (2024)Der Spagat zwischen BIAS und FAIRNESS (2024)
Der Spagat zwischen BIAS und FAIRNESS (2024)OPEN KNOWLEDGE GmbH
 
Salesforce Certified Field Service Consultant
Salesforce Certified Field Service ConsultantSalesforce Certified Field Service Consultant
Salesforce Certified Field Service ConsultantAxelRicardoTrocheRiq
 
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...ICS
 
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...MyIntelliSource, Inc.
 

Recently uploaded (20)

Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
Try MyIntelliAccount Cloud Accounting Software As A Service Solution Risk Fre...
 
DNT_Corporate presentation know about us
DNT_Corporate presentation know about usDNT_Corporate presentation know about us
DNT_Corporate presentation know about us
 
Building Real-Time Data Pipelines: Stream & Batch Processing workshop Slide
Building Real-Time Data Pipelines: Stream & Batch Processing workshop SlideBuilding Real-Time Data Pipelines: Stream & Batch Processing workshop Slide
Building Real-Time Data Pipelines: Stream & Batch Processing workshop Slide
 
5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdf5 Signs You Need a Fashion PLM Software.pdf
5 Signs You Need a Fashion PLM Software.pdf
 
HR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.comHR Software Buyers Guide in 2024 - HRSoftware.com
HR Software Buyers Guide in 2024 - HRSoftware.com
 
Unlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language ModelsUnlocking the Future of AI Agents with Large Language Models
Unlocking the Future of AI Agents with Large Language Models
 
Software Quality Assurance Interview Questions
Software Quality Assurance Interview QuestionsSoftware Quality Assurance Interview Questions
Software Quality Assurance Interview Questions
 
Hand gesture recognition PROJECT PPT.pptx
Hand gesture recognition PROJECT PPT.pptxHand gesture recognition PROJECT PPT.pptx
Hand gesture recognition PROJECT PPT.pptx
 
Exploring iOS App Development: Simplifying the Process
Exploring iOS App Development: Simplifying the ProcessExploring iOS App Development: Simplifying the Process
Exploring iOS App Development: Simplifying the Process
 
Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...
Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...
Short Story: Unveiling the Reasoning Abilities of Large Language Models by Ke...
 
How To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.jsHow To Use Server-Side Rendering with Nuxt.js
How To Use Server-Side Rendering with Nuxt.js
 
Advancing Engineering with AI through the Next Generation of Strategic Projec...
Advancing Engineering with AI through the Next Generation of Strategic Projec...Advancing Engineering with AI through the Next Generation of Strategic Projec...
Advancing Engineering with AI through the Next Generation of Strategic Projec...
 
Cloud Management Software Platforms: OpenStack
Cloud Management Software Platforms: OpenStackCloud Management Software Platforms: OpenStack
Cloud Management Software Platforms: OpenStack
 
Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...
Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...
Reassessing the Bedrock of Clinical Function Models: An Examination of Large ...
 
Project Based Learning (A.I).pptx detail explanation
Project Based Learning (A.I).pptx detail explanationProject Based Learning (A.I).pptx detail explanation
Project Based Learning (A.I).pptx detail explanation
 
Der Spagat zwischen BIAS und FAIRNESS (2024)
Der Spagat zwischen BIAS und FAIRNESS (2024)Der Spagat zwischen BIAS und FAIRNESS (2024)
Der Spagat zwischen BIAS und FAIRNESS (2024)
 
Salesforce Certified Field Service Consultant
Salesforce Certified Field Service ConsultantSalesforce Certified Field Service Consultant
Salesforce Certified Field Service Consultant
 
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
The Real-World Challenges of Medical Device Cybersecurity- Mitigating Vulnera...
 
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
Steps To Getting Up And Running Quickly With MyTimeClock Employee Scheduling ...
 
Call Girls In Mukherjee Nagar 📱 9999965857 🤩 Delhi 🫦 HOT AND SEXY VVIP 🍎 SE...
Call Girls In Mukherjee Nagar 📱  9999965857  🤩 Delhi 🫦 HOT AND SEXY VVIP 🍎 SE...Call Girls In Mukherjee Nagar 📱  9999965857  🤩 Delhi 🫦 HOT AND SEXY VVIP 🍎 SE...
Call Girls In Mukherjee Nagar 📱 9999965857 🤩 Delhi 🫦 HOT AND SEXY VVIP 🍎 SE...
 

Loan-defaulters-predictions(Python codes)

  • 1. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 1/14 In [1]: In [2]: In [3]: train set shape: (56000, 52) test set shape: (24000, 51) import pandas as pd import numpy as np from pandas import DataFrame from sklearn.metrics import roc_auc_score from sklearn.preprocessing import LabelEncoder, MinMaxScaler import catboost from rgf.sklearn import RGFClassifier from lightgbm import LGBMClassifier from sklearn.ensemble import VotingClassifier, RandomForestClassifier from sklearn.model_selection import cross_val_score, StratifiedKFold train=pd.read_csv("Datasets/2020 Qualifying competition/Train.csv") test=pd.read_csv("Datasets/2020 Qualifying competition/Test.csv") print("train set shape:",train.shape) print("test set shape:",test.shape)
  • 2. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 2/14 In [4]: <class 'pandas.core.frame.DataFrame'> RangeIndex: 56000 entries, 0 to 55999 Data columns (total 52 columns): Applicant_ID 56000 non-null object form_field1 53471 non-null float64 form_field2 52156 non-null float64 form_field3 55645 non-null float64 form_field4 55645 non-null float64 form_field5 55645 non-null float64 form_field6 42640 non-null float64 form_field7 50837 non-null float64 form_field8 42640 non-null float64 form_field9 47992 non-null float64 form_field10 55645 non-null float64 form_field11 24579 non-null float64 form_field12 46105 non-null float64 form_field13 50111 non-null float64 form_field14 56000 non-null int64 form_field15 33525 non-null float64 form_field16 42964 non-null float64 form_field17 44849 non-null float64 form_field18 45598 non-null float64 form_field19 55996 non-null float64 form_field20 55645 non-null float64 form_field21 40146 non-null float64 form_field22 35600 non-null float64 form_field23 27877 non-null float64 form_field24 42703 non-null float64 form_field25 50550 non-null float64 form_field26 48562 non-null float64 form_field27 46701 non-null float64 form_field28 55645 non-null float64 form_field29 55645 non-null float64 form_field30 30491 non-null float64 form_field31 16592 non-null float64 form_field32 50550 non-null float64 form_field33 54744 non-null float64 form_field34 55645 non-null float64 form_field35 32852 non-null float64 form_field36 54005 non-null float64 form_field37 50550 non-null float64 form_field38 55645 non-null float64 form_field39 51789 non-null float64 form_field40 12271 non-null float64 form_field41 17771 non-null float64 form_field42 54677 non-null float64 form_field43 55432 non-null float64 form_field44 50617 non-null float64 form_field45 24683 non-null float64 form_field46 40096 non-null float64 form_field47 56000 non-null object form_field48 35111 non-null float64 form_field49 55645 non-null float64 # Information on each of the columns in the dataset train.info()
  • 3. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 3/14 form_field50 44944 non-null float64 default_status 56000 non-null object dtypes: float64(48), int64(1), object(3) memory usage: 22.2+ MB
  • 4. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 4/14 In [5]: Out[5]: Applicant_ID object form_field1 float64 form_field2 float64 form_field3 float64 form_field4 float64 form_field5 float64 form_field6 float64 form_field7 float64 form_field8 float64 form_field9 float64 form_field10 float64 form_field11 float64 form_field12 float64 form_field13 float64 form_field14 int64 form_field15 float64 form_field16 float64 form_field17 float64 form_field18 float64 form_field19 float64 form_field20 float64 form_field21 float64 form_field22 float64 form_field23 float64 form_field24 float64 form_field25 float64 form_field26 float64 form_field27 float64 form_field28 float64 form_field29 float64 form_field30 float64 form_field31 float64 form_field32 float64 form_field33 float64 form_field34 float64 form_field35 float64 form_field36 float64 form_field37 float64 form_field38 float64 form_field39 float64 form_field40 float64 form_field41 float64 form_field42 float64 form_field43 float64 form_field44 float64 form_field45 float64 form_field46 float64 form_field47 object form_field48 float64 form_field49 float64 form_field50 float64 dtype: object test.dtypes
  • 5. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 5/14 In [6]: Out[6]: Applicant_ID form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_f 0 Apcnt_1000000 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689 1 Apcnt_1000004 3456.0 0.67400 0.2342 0.0 0.000 0.0 898 2 Apcnt_1000008 3276.0 0.53845 3.1510 0.0 6.282 NaN 956 3 Apcnt_1000012 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044 4 Apcnt_1000016 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214 5 rows × 52 columns train.head()
  • 6. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 6/14 In [7]: Out[7]: Applicant_ID object form_field1 float64 form_field2 float64 form_field3 float64 form_field4 float64 form_field5 float64 form_field6 float64 form_field7 float64 form_field8 float64 form_field9 float64 form_field10 float64 form_field11 float64 form_field12 float64 form_field13 float64 form_field14 int64 form_field15 float64 form_field16 float64 form_field17 float64 form_field18 float64 form_field19 float64 form_field20 float64 form_field21 float64 form_field22 float64 form_field23 float64 form_field24 float64 form_field25 float64 form_field26 float64 form_field27 float64 form_field28 float64 form_field29 float64 form_field30 float64 form_field31 float64 form_field32 float64 form_field33 float64 form_field34 float64 form_field35 float64 form_field36 float64 form_field37 float64 form_field38 float64 form_field39 float64 form_field40 float64 form_field41 float64 form_field42 float64 form_field43 float64 form_field44 float64 form_field45 float64 form_field46 float64 form_field47 object form_field48 float64 form_field49 float64 form_field50 float64 default_status object dtype: object train.dtypes
  • 7. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 7/14 In [8]: Out[8]: Applicant_ID 0 form_field1 2529 form_field2 3844 form_field3 355 form_field4 355 form_field5 355 form_field6 13360 form_field7 5163 form_field8 13360 form_field9 8008 form_field10 355 form_field11 31421 form_field12 9895 form_field13 5889 form_field14 0 form_field15 22475 form_field16 13036 form_field17 11151 form_field18 10402 form_field19 4 form_field20 355 form_field21 15854 form_field22 20400 form_field23 28123 form_field24 13297 form_field25 5450 form_field26 7438 form_field27 9299 form_field28 355 form_field29 355 form_field30 25509 form_field31 39408 form_field32 5450 form_field33 1256 form_field34 355 form_field35 23148 form_field36 1995 form_field37 5450 form_field38 355 form_field39 4211 form_field40 43729 form_field41 38229 form_field42 1323 form_field43 568 form_field44 5383 form_field45 31317 form_field46 15904 form_field47 0 form_field48 20889 form_field49 355 form_field50 11056 default_status 0 dtype: int64 # Exploring the number of missing values(NaN) in each column of the dataset train.isnull().sum()
  • 8. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 8/14 In [9]: In [10]: Out[10]: Applicant_ID 0 form_field1 0 form_field2 0 form_field3 0 form_field4 0 form_field5 0 form_field6 0 form_field7 0 form_field8 0 form_field9 0 form_field10 0 form_field11 0 form_field12 0 form_field13 0 form_field14 0 form_field15 0 form_field16 0 form_field17 0 form_field18 0 f fi ld19 0 #fill empty cell(NaN data)with -999 train=train.fillna(-999) test= test.fillna(-999) test.isnull().sum()
  • 9. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 9/14 In [11]: Out[11]: Applicant_ID 0 form_field1 0 form_field2 0 form_field3 0 form_field4 0 form_field5 0 form_field6 0 form_field7 0 form_field8 0 form_field9 0 form_field10 0 form_field11 0 form_field12 0 form_field13 0 form_field14 0 form_field15 0 form_field16 0 form_field17 0 form_field18 0 form_field19 0 form_field20 0 form_field21 0 form_field22 0 form_field23 0 form_field24 0 form_field25 0 form_field26 0 form_field27 0 form_field28 0 form_field29 0 form_field30 0 form_field31 0 form_field32 0 form_field33 0 form_field34 0 form_field35 0 form_field36 0 form_field37 0 form_field38 0 form_field39 0 form_field40 0 form_field41 0 form_field42 0 form_field43 0 form_field44 0 form_field45 0 form_field46 0 form_field47 0 form_field48 0 form_field49 0 form_field50 0 default_status 0 dtype: int64 train.isnull().sum()
  • 10. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 10/14 In [12]: In [13]: In [14]: In [15]: Out[12]: Applicant_ID form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_f 0 Apcnt_1000000 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689 1 Apcnt_1000004 3456.0 0.67400 0.2342 0.0 0.000 0.0 898 2 Apcnt_1000008 3276.0 0.53845 3.1510 0.0 6.282 -9999.0 956 3 Apcnt_1000012 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044 4 Apcnt_1000016 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214 5 rows × 52 columns Out[15]: 0 0 1 0 2 1 3 0 4 0 .. 55995 0 55996 1 55997 0 55998 0 55999 0 Name: default_status, Length: 56000, dtype: int32 train.head() # Encoding Categorical features encoder= ('form_field47', 'default_status') for x in encoder: le= LabelEncoder() train[x] =le.fit_transform(train[x].values) test['form_field47']=le.fit_transform(test['form_field47'].values) y= train["default_status"] y
  • 11. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 11/14 In [16]: In [17]: In [18]: In [19]: In [20]: In [21]: In [23]: In [24]: Out[16]: form_field1 form_field2 form_field3 form_field4 form_field5 form_field6 form_field7 form_field 0 3436.0 0.28505 1.6560 0.0 0.000 0.0 10689720.0 252072 1 3456.0 0.67400 0.2342 0.0 0.000 0.0 898979.0 497531 2 3276.0 0.53845 3.1510 0.0 6.282 -9999.0 956940.0 -9999 3 3372.0 0.17005 0.5050 0.0 0.000 192166.0 3044703.0 385499 4 3370.0 0.77270 1.1010 0.0 0.000 1556.0 214728.0 214728 5 rows × 50 columns # drop appropriate column train= train.drop(["Applicant_ID","default_status"], axis=1) test= test.drop(["Applicant_ID"], axis=1) train.head() robust= MinMaxScaler() scaledtrain= robust.fit_transform(train) scaledtrain= DataFrame(scaledtrain) scaledtest= robust.transform(test) scaledtest= DataFrame(scaledtest) # split input(x) and output(y) data #from sklearn.model_selection import train_test_split #X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.3, #stratify=y,random_state=42) params= {'n_estimators': 2500,'learning_rate':0.01,'objective': 'CrossEntropy','e 'random_seed': 3500,'verbose': False} fold= StratifiedKFold(n_splits= 15, shuffle= True,random_state= 999) from catboost import CatBoostClassifier model1= CatBoostClassifier(random_state= 499, verbose= False) model2= RGFClassifier(max_leaf= 1500) model3= LGBMClassifier(n_estimators= 900, random_state= 499) localscore, testscore= [], [] model= VotingClassifier(estimators= [('model1', model1), ('model2', model2), ('mo
  • 12. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 12/14 In [25]: #########################fold0 [Voting] ................... (1 of 3) Processing model1, total= 36.0s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.1s #########################fold1 [Voting] ................... (1 of 3) Processing model1, total= 37.4s [Voting] ................... (2 of 3) Processing model2, total= 1.8min [Voting] ................... (3 of 3) Processing model3, total= 10.2s #########################fold2 [Voting] ................... (1 of 3) Processing model1, total= 36.4s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.2s #########################fold3 [Voting] ................... (1 of 3) Processing model1, total= 38.4s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.2s #########################fold4 [Voting] ................... (1 of 3) Processing model1, total= 37.6s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 12.2s #########################fold5 [Voting] ................... (1 of 3) Processing model1, total= 35.5s [Voting] ................... (2 of 3) Processing model2, total= 1.8min [Voting] ................... (3 of 3) Processing model3, total= 10.1s #########################fold6 [Voting] ................... (1 of 3) Processing model1, total= 37.6s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.2s #########################fold7 [Voting] ................... (1 of 3) Processing model1, total= 35.9s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.3s #########################fold8 [Voting] ................... (1 of 3) Processing model1, total= 37.4s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.2s #########################fold9 [Voting] ................... (1 of 3) Processing model1, total= 37.4s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.1s #########################fold10 for i, (trainval, valvalue) in enumerate(fold.split(scaledtrain, y)): print( '#' * 25+ 'fold'+ str(i)) train_x, val_x= scaledtrain.iloc[trainval], scaledtrain.iloc[valvalue] train_y, val_y= y.iloc[trainval], y.iloc[valvalue] model.fit(train_x, train_y) pred= model.predict_proba(val_x)[:,-1] result= roc_auc_score(val_y, pred) localscore.append(result) testpred= model.predict_proba(scaledtest)[:,-1] testscore.append(testpred)
  • 13. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 13/14 In [26]: In [28]: [Voting] ................... (1 of 3) Processing model1, total= 35.9s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.3s #########################fold11 [Voting] ................... (1 of 3) Processing model1, total= 37.7s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 12.6s #########################fold12 [Voting] ................... (1 of 3) Processing model1, total= 35.8s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.4s #########################fold13 [Voting] ................... (1 of 3) Processing model1, total= 37.1s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.0s #########################fold14 [Voting] ................... (1 of 3) Processing model1, total= 36.2s [Voting] ................... (2 of 3) Processing model2, total= 1.7min [Voting] ................... (3 of 3) Processing model3, total= 10.6s 0.84036499050012 Out[28]: 0 1 2 3 4 5 6 7 8 0 0.837618 0.835608 0.837453 0.856249 0.829797 0.84765 0.836926 0.84438 0.823843 0.85009 print(np.mean(localscore)) output= pd.DataFrame(localscore).T output.head()
  • 14. 03/10/2020 DSN20 - Jupyter Notebook localhost:8888/notebooks/Desktop/DSN20.ipynb 14/14 In [29]: In [33]: In [34]: In [ ]: Out[29]: Applicant_ID default_status 0 Apcnt_1000032 1 1 Apcnt_1000048 1 2 Apcnt_1000052 1 3 Apcnt_1000076 1 4 Apcnt_1000080 1 ... ... ... 23995 Apcnt_999940 1 23996 Apcnt_999956 1 23997 Apcnt_999976 1 23998 Apcnt_999984 1 23999 Apcnt_999992 1 24000 rows × 2 columns submission= pd.read_csv('Datasets/2020 Qualifying Competition/SampleSubmission.cs submission test_pred= np.mean(testscore, axis=0) submission['default_status']= test_pred submission.to_csv('grace.csv', index= False)