In [1]:
ModelsDir <- '/home/kate/Research/Property/Models/'
DataDir <- '/home/kate/Research/Property/Data/'
ModelName <- "wc_gamma_glm"
UseSavedIfExists <- TRUE
In [2]:
library(Metrics)
Warning message:
“package ‘Metrics’ was built under R version 3.6.3”
In [3]:
source('/home/kate/code/Utils/MyFunctions.R')
In [4]:
training_dataset <- read.csv(paste(DataDir,"property_wcs_training_for_gamma.csv", sep = ""), header=TRUE)
testing_dataset <- read.csv(paste(DataDir,"property_wcf_testing.csv", sep = ""), header=TRUE)
prediction_dataset <- read.csv(paste(DataDir,"property_water_claims_non_cat_fs.csv", sep = ""), header=TRUE)
In [5]:
formula <- cova_il_nc_water ~ ecy + cova_deductible + log_yearbuilt + log_sqft + log_water_risk_sev_3_blk + rep_cost_3_blk + usagetype_encd
In [6]:
Model <- glm(formula,family = Gamma(link = "log"),data = training_dataset)
summary(Model)
Call:
glm(formula = formula, family = Gamma(link = "log"), data = training_dataset)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.7706  -1.0235  -0.3837   0.3505   2.6138  

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               1.414e+01  7.675e+00   1.842 0.065524 .  
ecy                       1.514e-02  3.798e-02   0.399 0.690079    
cova_deductible           1.174e-04  1.315e-05   8.923  < 2e-16 ***
log_yearbuilt            -9.440e-01  1.016e+00  -0.929 0.352931    
log_sqft                  9.682e-02  2.857e-02   3.389 0.000704 ***
log_water_risk_sev_3_blk  2.449e-01  4.427e-02   5.531 3.25e-08 ***
rep_cost_3_blk            3.614e-02  1.358e-02   2.660 0.007820 ** 
usagetype_encd            1.036e-03  1.851e-02   0.056 0.955377    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for Gamma family taken to be 0.9426904)

    Null deviance: 12323  on 11413  degrees of freedom
Residual deviance: 12120  on 11406  degrees of freedom
AIC: 233106

Number of Fisher Scoring iterations: 6
In [7]:
training_dataset$gamma_glm <-  predict.glm(Model, training_dataset, type = "response", se.fit = T)$fit 
testing_dataset$gamma_glm <-  predict.glm(Model, testing_dataset, type = "response", se.fit = T)$fit 
prediction_dataset$gamma_glm <-  predict.glm(Model, prediction_dataset, type = "response", se.fit = T)$fit

Train Dataset Scores

In [8]:
NormalizedWeightedGini(training_dataset$cova_il_nc_water,training_dataset$gamma_glm,training_dataset$ecy)
0.156121766672429
In [9]:
mae(training_dataset$cova_il_nc_water,training_dataset$gamma_glm)
7471.69373527092
In [10]:
rmse(training_dataset$cova_il_nc_water,training_dataset$gamma_glm)
9716.65206063733

Test Dataset Scores

In [11]:
NormalizedWeightedGini(testing_dataset[testing_dataset$cova_ic_nc_water>0,]$cova_il_nc_water,testing_dataset[testing_dataset$cova_ic_nc_water>0,]$gamma_glm,testing_dataset[testing_dataset$cova_ic_nc_water>0,]$ecy)
0.247633972906366
In [12]:
mae(testing_dataset[testing_dataset$cova_ic_nc_water>0,]$cova_il_nc_water, testing_dataset[testing_dataset$cova_ic_nc_water>0,]$gamma_glm)
11551.6952148738
In [13]:
rmse(testing_dataset[testing_dataset$cova_ic_nc_water>0,]$cova_il_nc_water, testing_dataset[testing_dataset$cova_ic_nc_water>0,]$gamma_glm)
21480.4463578529
In [14]:
write.table(training_dataset,paste(DataDir,"property_wcs_training_for_gamma.csv", sep = ""), sep=",",  col.names=TRUE, row.names = FALSE)
write.table(testing_dataset,paste(DataDir,"property_wcf_testing.csv", sep = ""), sep=",",  col.names=TRUE, row.names = FALSE)
write.table(prediction_dataset,paste(DataDir,"property_water_claims_non_cat_fs.csv", sep = ""), sep=",",  col.names=TRUE, row.names = FALSE)