ModelsDir = '/home/kate/Research/Property/Models/'

import pandas as pd
import numpy as np
import pickle
import xgboost as xgb
import os
import xgbfir

Models = ['basemodel0_class_XGB'
          #'basemodel1_class_XGB',
          #'wc_Poisson_bm_ecy_XGB',
          #'wc_Poisson_f_ecy_XGB',
          #'wc_class_f_ecy_XGB',
          #'wc_class_bm_ecy_XGB',
          #'wc_Linear_Reg_XGB_mae',
          #'wc_LogRegObj_Reg_XGB_mae',
          #'wc_Gamma_Reg_XGB_mae'
         ]

featureset_basemodel0  = [
'roofcd_encd',
'sqft',  
'usagetype_encd',
'yearbuilt',
'water_risk_3_blk',
'landlordind',
'multipolicyind'  
]

featureset_basemodel1  = [
'roofcd_encd',
'sqft',  
'usagetype_encd',
'yearbuilt',
'water_risk_3_blk',
'landlordind',
'multipolicyind',
'cova_deductible',
'cova_limit'
]

featureset_bm  = [
'roofcd_encd',
'sqft',  
'usagetype_encd',
'yearbuilt',
'water_risk_3_blk',
'landlordind',
'multipolicyind',    
'cova_deductible',
'cova_limit'
]

featureset_f  = [
'roofcd_encd',
'sqft',  
'usagetype_encd',
'yearbuilt',
'water_risk_3_blk',
'landlordind',
'multipolicyind',    
'cova_deductible',
'cova_limit',    
'ecy'
]

featureset_s=[
'cova_deductible',
'roofcd_encd',
'water_risk_sev_3_blk',
'sqft',
'rep_cost_3_blk',
'yearbuilt',
'ecy',
'usagetype_encd'
]

kfold = 1

def create_fmap(ModelName,featureset):
    fmap_filename='%s%s.fmap'%(ModelsDir,ModelName)
    outfile = open(fmap_filename, 'w')
    for i, feat in enumerate(featureset):
        outfile.write('{0}\t{1}\tq\n'.format(i, feat))
    outfile.close()
    return fmap_filename

feat_imp_all = pd.DataFrame()
for Model in Models:
    for i in range(0,kfold):
        ModelName=Model+"_%s"%i
        xgb_model_file='%s%s.model'%(ModelsDir,ModelName)
        print('Processing model %s, fold %s...'%(Model,i))
        xgb_model = pickle.load(open(xgb_model_file, 'rb'))
        #feature importance
        if '_bm_'  in Model:
            featureset=featureset_bm
        elif 'basemodel0'  in Model:
            featureset=featureset_basemodel0
        elif 'basemodel1'  in Model: 
            featureset=featureset_basemodel1            
        elif '_Reg_'  in Model:
            featureset=featureset_s            
        else:
            featureset=featureset_f
        fmap_filename=create_fmap(ModelName,featureset)
        feat_imp = pd.Series(xgb_model.get_score(fmap=fmap_filename,importance_type='weight')).to_frame()
        feat_imp.columns=['Weight']
        feat_imp = feat_imp.join(pd.Series(xgb_model.get_score(fmap=fmap_filename,importance_type='gain')).to_frame())
        feat_imp.columns=['Weight','Gain']
        feat_imp = feat_imp.join(pd.Series(xgb_model.get_score(fmap=fmap_filename,importance_type='cover')).to_frame())
        feat_imp.columns=['Weight','Gain','Cover']
        feat_imp['fold']=i
        feat_imp['FeatureName'] = feat_imp.index
        feat_imp['ModelName'] = Model
        feat_imp_all = feat_imp_all.append(feat_imp, ignore_index=True)
        #iterations
        xgbfir.saveXgbFI(xgb_model, feature_names=featureset,  TopK = 500,  MaxTrees = 500, MaxInteractionDepth = 2, OutputXlsxFile = '%s%s_%s.xlsx'%(ModelsDir,Model,i))
feat_imp_all = feat_imp_all[['ModelName','fold','FeatureName','Weight','Gain','Cover']]
feat_imp_all.to_csv('%swc_feature_importance.csv'%ModelsDir,header=True,index=False)

Processing model basemodel0_class_XGB, fold 0...