Membuka Package

library(foreign)
library(tidyverse)
library(caret)
library(Hmisc)
library(corrplot)
library(lmtest)
library(car)
library(rattle)
library(dplyr)
library(cowplot)
library(readxl)

Membuka File Pengolahan data

Membuka File

setwd("C:/Users/dhihr/Downloads/dengue phlc")
data_fix <- read_excel("data_fix.xlsx")

Mengolah Data Awal

data_fix$ABJ <- as.factor(data_fix$ABJ)
data_fix$Date <- as.Date(data_fix$Date)
str(data_fix)
## tibble [60 × 9] (S3: tbl_df/tbl/data.frame)
##  $ Month-Years: chr [1:60] "2018-01" "2018-02" "2018-03" "2018-04" ...
##  $ Date       : Date[1:60], format: "2018-01-01" "2018-02-01" ...
##  $ Tavg       : num [1:60] 23.2 23.7 23.6 23.5 23.7 ...
##  $ RH_avg     : num [1:60] 77.5 77.1 77.3 78.6 76.9 ...
##  $ RR         : num [1:60] 3.51 9.72 5.72 11.59 5.09 ...
##  $ ss         : num [1:60] 3.94 5.37 5.79 4.59 5.93 ...
##  $ ff_avg     : num [1:60] 2.52 1.79 1.81 1.73 1.45 ...
##  $ kasus      : num [1:60] 267 160 219 232 297 225 193 107 0 0 ...
##  $ ABJ        : Factor w/ 2 levels "Berisiko","Tidak Berisiko": 1 1 1 1 1 1 2 1 2 2 ...

Membagi Data

lastDate<- as.Date("2022-12-30")
startDate <- data_fix$Date[49]
x <- ggplot(data_fix) +
  geom_line(aes(x = Date, y = kasus), size = 1.3, colour = "darkcyan") + labs(title ="Bandung City Dengue Cases 2018-2022", y = " ", x = " ") +
  annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1')  +
  scale_x_date(date_breaks = "5 month", date_labels = "%b-%y")+ geom_text(x=as.Date("2022-06-01"), y=1250, label="Testing", color = "firebrick") +
  geom_text(x=as.Date("2019-06-01"), y=1250, label="Training", color = "gray4")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
x

training<-data_fix[1:48,]
testing<-data_fix[49:60,]

EDA

Dimensi dan Summary Data

dim(training)
## [1] 48  9
summary(training)
##  Month-Years             Date                 Tavg           RH_avg     
##  Length:48          Min.   :2018-01-01   Min.   :23.04   Min.   :68.39  
##  Class :character   1st Qu.:2018-12-24   1st Qu.:23.49   1st Qu.:74.31  
##  Mode  :character   Median :2019-12-16   Median :23.70   Median :77.42  
##                     Mean   :2019-12-16   Mean   :23.69   Mean   :76.63  
##                     3rd Qu.:2020-12-08   3rd Qu.:23.88   3rd Qu.:79.62  
##                     Max.   :2021-12-01   Max.   :24.32   Max.   :81.76  
##        RR               ss            ff_avg          kasus      
##  Min.   : 1.081   Min.   :3.352   Min.   :1.129   Min.   :  0.0  
##  1st Qu.: 3.787   1st Qu.:4.580   1st Qu.:1.677   1st Qu.:104.5  
##  Median : 6.054   Median :5.365   Median :1.806   Median :222.0  
##  Mean   : 6.554   Mean   :5.298   Mean   :1.798   Mean   :222.5  
##  3rd Qu.: 9.458   3rd Qu.:5.906   3rd Qu.:1.976   3rd Qu.:331.2  
##  Max.   :12.822   Max.   :7.287   Max.   :2.516   Max.   :695.0  
##              ABJ    
##  Berisiko      :36  
##  Tidak Berisiko:12  
##                     
##                     
##                     
## 
varnum <- select_if(training, is.numeric)
head(varnum)
## # A tibble: 6 × 6
##    Tavg RH_avg    RR    ss ff_avg kasus
##   <dbl>  <dbl> <dbl> <dbl>  <dbl> <dbl>
## 1  23.2   77.5  3.51  3.94   2.52   267
## 2  23.7   77.1  9.72  5.37   1.79   160
## 3  23.6   77.3  5.72  5.79   1.81   219
## 4  23.5   78.6 11.6   4.59   1.73   232
## 5  23.7   76.9  5.09  5.93   1.45   297
## 6  23.3   75.4  2.84  5.90   1.97   225

Data Numerik

Distribusi

varnum %>% 
  gather() %>% 
  ggplot(aes(x=key, y=value)) + 
  geom_boxplot() + 
  facet_wrap( ~ key, scales="free")

pairs(~ . , data=varnum, lower.panel=NULL)

Korelasi

korelasi <- rcorr(as.matrix(varnum))
korelasi_r <- korelasi$r
korelasi_r
##                 Tavg     RH_avg         RR          ss      ff_avg
## Tavg    1.0000000000  0.2355626  0.3669569 -0.24499596 -0.12870758
## RH_avg  0.2355626360  1.0000000  0.7131046 -0.80386745 -0.41503946
## RR      0.3669569338  0.7131046  1.0000000 -0.61558121 -0.37556764
## ss     -0.2449959585 -0.8038675 -0.6155812  1.00000000  0.05660159
## ff_avg -0.1287075845 -0.4150395 -0.3755676  0.05660159  1.00000000
## kasus   0.0001353229  0.4078794  0.1401594 -0.21287477 -0.47123532
##                kasus
## Tavg    0.0001353229
## RH_avg  0.4078794111
## RR      0.1401593563
## ss     -0.2128747742
## ff_avg -0.4712353184
## kasus   1.0000000000
korelasi_p <- korelasi$P
korelasi_p
##              Tavg       RH_avg           RR           ss       ff_avg
## Tavg           NA 1.070101e-01 1.030195e-02 9.328367e-02 0.3832950053
## RH_avg 0.10701011           NA 1.292286e-08 6.019185e-12 0.0033543647
## RR     0.01030195 1.292286e-08           NA 3.215187e-06 0.0085262540
## ss     0.09328367 6.019185e-12 3.215187e-06           NA 0.7023745562
## ff_avg 0.38329501 3.354365e-03 8.526254e-03 7.023746e-01           NA
## kasus  0.99927167 4.006041e-03 3.420325e-01 1.463146e-01 0.0007231472
##               kasus
## Tavg   0.9992716666
## RH_avg 0.0040060412
## RR     0.3420324521
## ss     0.1463146106
## ff_avg 0.0007231472
## kasus            NA
diag(korelasi_p) <- 0
corrplot(korelasi_r, method="circle", type='upper', 
         p.mat=korelasi_p, sig.level = 0.05, addCoef.col='black')

Data Kategorik

varkat <- select_if(training, is.factor)
head(varkat)
## # A tibble: 6 × 1
##   ABJ     
##   <fct>   
## 1 Berisiko
## 2 Berisiko
## 3 Berisiko
## 4 Berisiko
## 5 Berisiko
## 6 Berisiko
varkat %>% 
  gather() %>% 
  ggplot(aes(y=value)) + 
  geom_bar() + 
  coord_flip() +
  facet_wrap( ~ key, scales="free")

Permodelan

Metode Validasi

fit.control <- trainControl(method = "cv", number = 5)

Linear Regresi

lr <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ, 
            data = training, method = "lm", trControl = fit.control)
lr
## Linear Regression 
## 
## 48 samples
##  6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 38, 38, 39, 38, 39 
## Resampling results:
## 
##   RMSE      Rsquared  MAE     
##   113.6312  0.447231  98.89542
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
# cek asumsi linieritas
plot(lr$finalModel, 1)

# cek asumsi homogenitas
bptest(lr$finalModel)
## 
##  studentized Breusch-Pagan test
## 
## data:  lr$finalModel
## BP = 7.5118, df = 6, p-value = 0.2761
# cek asumsi autokorelasi
durbinWatsonTest(lr$finalModel)
##  lag Autocorrelation D-W Statistic p-value
##    1       0.2509618      1.371495   0.012
##  Alternative hypothesis: rho != 0
# cek asumsi multikolinier
vif(lr$finalModel)
##                Tavg              RH_avg                  RR                  ss 
##            1.247265            6.789773            2.368484            4.551645 
##              ff_avg `ABJTidak Berisiko` 
##            1.744278            1.506678
# cek asumsi normalitas
residual = training$kasus - predict(lr, training)
shapiro.test(residual)
## 
##  Shapiro-Wilk normality test
## 
## data:  residual
## W = 0.97973, p-value = 0.5673
ks.test(residual, "pnorm")
## 
##  Exact one-sample Kolmogorov-Smirnov test
## 
## data:  residual
## D = 0.54167, p-value = 1.029e-13
## alternative hypothesis: two-sided
# deksripsi model yang terbentuk
summary(lr)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -207.86  -70.63  -18.82   91.23  228.56 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          418.026   1850.326   0.226 0.822386    
## Tavg                  44.265     65.620   0.675 0.503734    
## RH_avg                -4.428     11.397  -0.388 0.699669    
## RR                   -12.217      7.554  -1.617 0.113480    
## ss                   -56.603     35.011  -1.617 0.113610    
## ff_avg              -266.808     76.378  -3.493 0.001159 ** 
## `ABJTidak Berisiko` -180.346     45.032  -4.005 0.000255 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 110.1 on 41 degrees of freedom
## Multiple R-squared:  0.5294, Adjusted R-squared:  0.4606 
## F-statistic: 7.688 on 6 and 41 DF,  p-value: 1.431e-05

Prediksi dan Testing Linear Regresi

# melakukan prediksi terhadap data testing
testing$prediksiLR <- predict(lr, testing)
DT::datatable(head(testing,20))
# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiLR, testing$kasus)
##        RMSE    Rsquared         MAE 
## 268.3230508   0.1205471 164.1061436
# melihat variabel importance
varImp(lr)
## lm variable importance
## 
##                     Overall
## `ABJTidak Berisiko` 100.000
## ff_avg               85.854
## RR                   33.980
## ss                   33.963
## Tavg                  7.911
## RH_avg                0.000

Ridge Regression

# set range lambda (antara 0.001 sampai 1000)
lambda <- 10^seq(-3, 3, length = 100)

ridge <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ, 
               data = training, method = "glmnet", 
               trControl = fit.control,
               tuneGrid = expand.grid(alpha = 0, lambda = lambda))
ridge
## glmnet 
## 
## 48 samples
##  6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 37, 39, 39, 38, 39 
## Resampling results across tuning parameters:
## 
##   lambda        RMSE      Rsquared   MAE      
##   1.000000e-03  119.2576  0.4434593  102.25443
##   1.149757e-03  119.2576  0.4434593  102.25443
##   1.321941e-03  119.2576  0.4434593  102.25443
##   1.519911e-03  119.2576  0.4434593  102.25443
##   1.747528e-03  119.2576  0.4434593  102.25443
##   2.009233e-03  119.2576  0.4434593  102.25443
##   2.310130e-03  119.2576  0.4434593  102.25443
##   2.656088e-03  119.2576  0.4434593  102.25443
##   3.053856e-03  119.2576  0.4434593  102.25443
##   3.511192e-03  119.2576  0.4434593  102.25443
##   4.037017e-03  119.2576  0.4434593  102.25443
##   4.641589e-03  119.2576  0.4434593  102.25443
##   5.336699e-03  119.2576  0.4434593  102.25443
##   6.135907e-03  119.2576  0.4434593  102.25443
##   7.054802e-03  119.2576  0.4434593  102.25443
##   8.111308e-03  119.2576  0.4434593  102.25443
##   9.326033e-03  119.2576  0.4434593  102.25443
##   1.072267e-02  119.2576  0.4434593  102.25443
##   1.232847e-02  119.2576  0.4434593  102.25443
##   1.417474e-02  119.2576  0.4434593  102.25443
##   1.629751e-02  119.2576  0.4434593  102.25443
##   1.873817e-02  119.2576  0.4434593  102.25443
##   2.154435e-02  119.2576  0.4434593  102.25443
##   2.477076e-02  119.2576  0.4434593  102.25443
##   2.848036e-02  119.2576  0.4434593  102.25443
##   3.274549e-02  119.2576  0.4434593  102.25443
##   3.764936e-02  119.2576  0.4434593  102.25443
##   4.328761e-02  119.2576  0.4434593  102.25443
##   4.977024e-02  119.2576  0.4434593  102.25443
##   5.722368e-02  119.2576  0.4434593  102.25443
##   6.579332e-02  119.2576  0.4434593  102.25443
##   7.564633e-02  119.2576  0.4434593  102.25443
##   8.697490e-02  119.2576  0.4434593  102.25443
##   1.000000e-01  119.2576  0.4434593  102.25443
##   1.149757e-01  119.2576  0.4434593  102.25443
##   1.321941e-01  119.2576  0.4434593  102.25443
##   1.519911e-01  119.2576  0.4434593  102.25443
##   1.747528e-01  119.2576  0.4434593  102.25443
##   2.009233e-01  119.2576  0.4434593  102.25443
##   2.310130e-01  119.2576  0.4434593  102.25443
##   2.656088e-01  119.2576  0.4434593  102.25443
##   3.053856e-01  119.2576  0.4434593  102.25443
##   3.511192e-01  119.2576  0.4434593  102.25443
##   4.037017e-01  119.2576  0.4434593  102.25443
##   4.641589e-01  119.2576  0.4434593  102.25443
##   5.336699e-01  119.2576  0.4434593  102.25443
##   6.135907e-01  119.2576  0.4434593  102.25443
##   7.054802e-01  119.2576  0.4434593  102.25443
##   8.111308e-01  119.2576  0.4434593  102.25443
##   9.326033e-01  119.2576  0.4434593  102.25443
##   1.072267e+00  119.2576  0.4434593  102.25443
##   1.232847e+00  119.2576  0.4434593  102.25443
##   1.417474e+00  119.2576  0.4434593  102.25443
##   1.629751e+00  119.2576  0.4434593  102.25443
##   1.873817e+00  119.2576  0.4434593  102.25443
##   2.154435e+00  119.2576  0.4434593  102.25443
##   2.477076e+00  119.2576  0.4434593  102.25443
##   2.848036e+00  119.2576  0.4434593  102.25443
##   3.274549e+00  119.2576  0.4434593  102.25443
##   3.764936e+00  119.2576  0.4434593  102.25443
##   4.328761e+00  119.2576  0.4434593  102.25443
##   4.977024e+00  119.2576  0.4434593  102.25443
##   5.722368e+00  119.2576  0.4434593  102.25443
##   6.579332e+00  119.2576  0.4434593  102.25443
##   7.564633e+00  119.2576  0.4434593  102.25443
##   8.697490e+00  119.1981  0.4434367  102.21297
##   1.000000e+01  118.9290  0.4432413  101.97749
##   1.149757e+01  118.6547  0.4428810  101.71287
##   1.321941e+01  118.3810  0.4423707  101.42335
##   1.519911e+01  118.1124  0.4416922  101.10724
##   1.747528e+01  117.8530  0.4408220  100.76386
##   2.009233e+01  117.6075  0.4397704  100.39063
##   2.310130e+01  117.3793  0.4385468   99.98532
##   2.656088e+01  117.1769  0.4371328   99.54897
##   3.053856e+01  117.0068  0.4355233   99.07938
##   3.511192e+01  116.8755  0.4337359   98.57608
##   4.037017e+01  116.7925  0.4317642   98.13683
##   4.641589e+01  116.7634  0.4296510   97.71000
##   5.336699e+01  116.8000  0.4273535   97.26408
##   6.135907e+01  116.9053  0.4249421   96.85440
##   7.054802e+01  117.0929  0.4223701   96.62571
##   8.111308e+01  117.3620  0.4197330   96.56000
##   9.326033e+01  117.7273  0.4169485   96.56063
##   1.072267e+02  118.1823  0.4141386   96.82248
##   1.232847e+02  118.7419  0.4112065   97.11218
##   1.417474e+02  119.3918  0.4082965   97.79120
##   1.629751e+02  120.1463  0.4052903   98.67835
##   1.873817e+02  120.9825  0.4023471   99.54069
##   2.154435e+02  121.9135  0.3993367  100.39238
##   2.477076e+02  122.9077  0.3964302  101.28193
##   2.848036e+02  123.9771  0.3934889  102.25488
##   3.274549e+02  125.0835  0.3906936  103.17630
##   3.764936e+02  126.2387  0.3878948  104.06533
##   4.328761e+02  127.4011  0.3852745  104.89419
##   4.977024e+02  128.5835  0.3826861  105.91833
##   5.722368e+02  129.7441  0.3803018  106.93288
##   6.579332e+02  130.8976  0.3779772  107.88799
##   7.564633e+02  132.0053  0.3758698  108.76895
##   8.697490e+02  133.0841  0.3738411  109.63172
##   1.000000e+03  134.1001  0.3720290  110.41305
## 
## Tuning parameter 'alpha' was held constant at a value of 0
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 0 and lambda = 46.41589.
# Model coefficients
coef(ridge$finalModel, ridge$bestTune$lambda)
## 7 x 1 sparse Matrix of class "dgCMatrix"
##                            s1
## (Intercept)         27.327073
## Tavg                12.484195
## RH_avg               4.540485
## RR                  -5.459452
## ss                 -16.501802
## ff_avg            -163.445043
## ABJTidak Berisiko -125.686956

Prediksi dan Testing Ridge Regression

# melakukan prediksi terhadap data testing
testing$prediksiRidge <- predict(ridge, testing)
DT::datatable(head(testing,20))
# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiRidge, testing$kasus)
##         RMSE     Rsquared          MAE 
## 2.973676e+02 7.458723e-05 1.789785e+02
# melihat variabel importance
varImp(ridge)
## glmnet variable importance
## 
##                    Overall
## ff_avg            100.0000
## ABJTidak Berisiko  76.2385
## ss                  7.5274
## Tavg                4.9990
## RR                  0.5783
## RH_avg              0.0000

Lasso Regression

lasso <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ, 
               data = training, method = "glmnet", 
               trControl = fit.control,
               tuneGrid = expand.grid(alpha = 1, lambda = lambda))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
lasso
## glmnet 
## 
## 48 samples
##  6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 40, 39, 37, 38, 38 
## Resampling results across tuning parameters:
## 
##   lambda        RMSE      Rsquared    MAE      
##   1.000000e-03  120.3470  0.39128335  101.48556
##   1.149757e-03  120.3470  0.39128335  101.48556
##   1.321941e-03  120.3470  0.39128335  101.48556
##   1.519911e-03  120.3470  0.39128335  101.48556
##   1.747528e-03  120.3470  0.39128335  101.48556
##   2.009233e-03  120.3470  0.39128335  101.48556
##   2.310130e-03  120.3470  0.39128335  101.48556
##   2.656088e-03  120.3470  0.39128335  101.48556
##   3.053856e-03  120.3470  0.39128335  101.48556
##   3.511192e-03  120.3470  0.39128335  101.48556
##   4.037017e-03  120.3470  0.39128335  101.48556
##   4.641589e-03  120.3470  0.39128335  101.48556
##   5.336699e-03  120.3470  0.39128335  101.48556
##   6.135907e-03  120.3470  0.39128335  101.48556
##   7.054802e-03  120.3470  0.39128335  101.48556
##   8.111308e-03  120.3470  0.39128335  101.48556
##   9.326033e-03  120.3470  0.39128335  101.48556
##   1.072267e-02  120.3470  0.39128335  101.48556
##   1.232847e-02  120.3470  0.39128335  101.48556
##   1.417474e-02  120.3470  0.39128335  101.48556
##   1.629751e-02  120.3470  0.39128335  101.48556
##   1.873817e-02  120.3470  0.39128335  101.48556
##   2.154435e-02  120.3470  0.39128335  101.48556
##   2.477076e-02  120.3470  0.39128335  101.48556
##   2.848036e-02  120.3470  0.39128335  101.48556
##   3.274549e-02  120.3470  0.39128335  101.48556
##   3.764936e-02  120.3470  0.39128335  101.48556
##   4.328761e-02  120.3470  0.39128335  101.48556
##   4.977024e-02  120.3470  0.39128335  101.48556
##   5.722368e-02  120.3470  0.39128335  101.48556
##   6.579332e-02  120.3470  0.39128335  101.48556
##   7.564633e-02  120.3470  0.39128335  101.48556
##   8.697490e-02  120.3470  0.39128335  101.48556
##   1.000000e-01  120.3470  0.39128335  101.48556
##   1.149757e-01  120.3504  0.39125087  101.48621
##   1.321941e-01  120.3611  0.39114982  101.48831
##   1.519911e-01  120.3746  0.39102198  101.49105
##   1.747528e-01  120.3857  0.39087951  101.48887
##   2.009233e-01  120.3784  0.39073427  101.46680
##   2.310130e-01  120.3321  0.39077808  101.41511
##   2.656088e-01  120.2645  0.39094523  101.34626
##   3.053856e-01  120.1884  0.39114484  101.26822
##   3.511192e-01  120.1025  0.39136189  101.17868
##   4.037017e-01  120.0039  0.39160342  101.07431
##   4.641589e-01  119.8839  0.39195489  100.94837
##   5.336699e-01  119.7453  0.39237452  100.80199
##   6.135907e-01  119.5883  0.39285328  100.63412
##   7.054802e-01  119.4067  0.39342149  100.43823
##   8.111308e-01  119.2020  0.39407660  100.21512
##   9.326033e-01  118.9735  0.39479063   99.95863
##   1.072267e+00  118.7164  0.39558720   99.66352
##   1.232847e+00  118.4272  0.39649182   99.32291
##   1.417474e+00  118.1039  0.39752212   98.93114
##   1.629751e+00  117.7482  0.39862061   98.47961
##   1.873817e+00  117.3569  0.39982512   97.96348
##   2.154435e+00  116.9046  0.40130327   97.36242
##   2.477076e+00  116.4207  0.40282363   96.68922
##   2.848036e+00  115.9934  0.40338881   96.02579
##   3.274549e+00  115.7066  0.40197817   95.44443
##   3.764936e+00  115.4516  0.40046844   94.85984
##   4.328761e+00  115.1500  0.40120997   94.42150
##   4.977024e+00  114.8586  0.40177020   94.13111
##   5.722368e+00  114.5842  0.40219519   93.85075
##   6.579332e+00  114.3913  0.40194667   93.54882
##   7.564633e+00  114.3158  0.40081395   93.23505
##   8.697490e+00  114.3832  0.39907593   92.93207
##   1.000000e+01  114.3270  0.39931433   92.54670
##   1.149757e+01  114.1762  0.40080265   92.00384
##   1.321941e+01  114.1165  0.40142545   91.57500
##   1.519911e+01  114.2133  0.40068777   91.49544
##   1.747528e+01  114.5807  0.39833212   91.70689
##   2.009233e+01  115.1017  0.39628911   91.98444
##   2.310130e+01  115.8635  0.39380981   92.56529
##   2.656088e+01  116.9108  0.39097111   93.41853
##   3.053856e+01  118.3731  0.38667432   94.72036
##   3.511192e+01  120.3962  0.37940310   96.94358
##   4.037017e+01  123.1195  0.36766205   99.97948
##   4.641589e+01  126.2748  0.35599813  103.12005
##   5.336699e+01  129.7960  0.35063499  106.23887
##   6.135907e+01  134.5364  0.34332318  110.32946
##   7.054802e+01  139.9144  0.33990573  114.51046
##   8.111308e+01  144.3200  0.16565880  118.14366
##   9.326033e+01  145.3157  0.03884456  119.04683
##   1.072267e+02  145.4718         NaN  119.10121
##   1.232847e+02  145.4718         NaN  119.10121
##   1.417474e+02  145.4718         NaN  119.10121
##   1.629751e+02  145.4718         NaN  119.10121
##   1.873817e+02  145.4718         NaN  119.10121
##   2.154435e+02  145.4718         NaN  119.10121
##   2.477076e+02  145.4718         NaN  119.10121
##   2.848036e+02  145.4718         NaN  119.10121
##   3.274549e+02  145.4718         NaN  119.10121
##   3.764936e+02  145.4718         NaN  119.10121
##   4.328761e+02  145.4718         NaN  119.10121
##   4.977024e+02  145.4718         NaN  119.10121
##   5.722368e+02  145.4718         NaN  119.10121
##   6.579332e+02  145.4718         NaN  119.10121
##   7.564633e+02  145.4718         NaN  119.10121
##   8.697490e+02  145.4718         NaN  119.10121
##   1.000000e+03  145.4718         NaN  119.10121
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 13.21941.
# Model coefficients
coef(lasso$finalModel, lasso$bestTune$lambda)
## 7 x 1 sparse Matrix of class "dgCMatrix"
##                          s1
## (Intercept)        611.0503
## Tavg                 .     
## RH_avg               .     
## RR                   .     
## ss                 -10.5608
## ff_avg            -164.9893
## ABJTidak Berisiko -144.0374

Prediksi dengan Lasso Regression

# melakukan prediksi terhadap data testing
testing$prediksiLasso <- predict(lasso, testing)
DT::datatable(head(testing,20))
# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiLasso, testing$kasus)
##         RMSE     Rsquared          MAE 
## 3.064909e+02 9.529632e-03 1.809761e+02
# melihat variabel importance
varImp(lasso)
## glmnet variable importance
## 
##                   Overall
## ff_avg            100.000
## ABJTidak Berisiko  87.301
## ss                  6.401
## Tavg                0.000
## RR                  0.000
## RH_avg              0.000

Regression Tree

regtree <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ, 
                 data = training, method = "rpart", trControl = fit.control)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
regtree
## CART 
## 
## 48 samples
##  6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 39, 38, 37, 39, 39 
## Resampling results across tuning parameters:
## 
##   cp         RMSE      Rsquared   MAE     
##   0.0301086  148.4326  0.2295502  115.7412
##   0.1218599  149.1598  0.2139198  111.9254
##   0.3233249  156.2884  0.1315086  120.8747
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.0301086.
# menampilkan plot decision tree
fancyRpartPlot(regtree$finalModel)

Prediksi dengan Regression Tree

# melakukan prediksi terhadap data testing
testing$prediksiTree <- predict(regtree, testing)
DT::datatable(head(testing,20))
# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiTree, testing$kasus)
##         RMSE     Rsquared          MAE 
## 3.264468e+02 7.088687e-05 2.060725e+02
# melihat variabel importance
varImp(regtree)
## rpart variable importance
## 
##                     Overall
## ff_avg               100.00
## RH_avg                85.43
## ABJTidak Berisiko     73.83
## ss                    59.47
## RR                    32.94
## Tavg                  28.40
## `ABJTidak Berisiko`    0.00

Random Forest

rf <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ, data = training, method = "rf", trControl = fit.control)
rf
## Random Forest 
## 
## 48 samples
##  6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 38, 39, 39, 37, 39 
## Resampling results across tuning parameters:
## 
##   mtry  RMSE      Rsquared   MAE      
##   2     125.4204  0.3366489  100.19985
##   4     124.0048  0.3546408   97.93969
##   6     123.1344  0.3766345   98.09341
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 6.

Prediksi dengan Random Forest

# melakukan prediksi terhadap data testing
testing$prediksiForest <- predict(rf, testing)
DT::datatable(head(testing,20))
# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiForest, testing$kasus)
##         RMSE     Rsquared          MAE 
## 2.937971e+02 4.352322e-03 1.732728e+02
# melihat variabel importance
varImp(rf)
## rf variable importance
## 
##                   Overall
## ff_avg             100.00
## ABJTidak Berisiko   76.83
## Tavg                31.51
## RH_avg              17.07
## ss                  11.94
## RR                   0.00

Support Vector Regression (SVM)

svr <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ, data = training, method = "svmLinear", trControl = fit.control)
svr
## Support Vector Machines with Linear Kernel 
## 
## 48 samples
##  6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 38, 37, 38, 40, 39 
## Resampling results:
## 
##   RMSE      Rsquared   MAE    
##   124.4817  0.3096192  102.978
## 
## Tuning parameter 'C' was held constant at a value of 1

Prediksi dengan SVM

# melakukan prediksi terhadap data testing
testing$prediksiSVR <- predict(svr, testing)
DT::datatable(head(testing,20))
# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiSVR, testing$kasus)
##       RMSE   Rsquared        MAE 
## 280.194893   0.184741 172.621785

Perbandingan Model

# menampilkan perbandingan hasil antar model
model_list <- list(LinearRegression = lr,
                   RidgeRegression = ridge,
                   LassoRegression = lasso,
                   RegressionTree = regtree, 
                   RandomForestReg = rf, 
                   SupportVectorReg = svr)
res <- resamples(model_list)
summary(res)
## 
## Call:
## summary.resamples(object = res)
## 
## Models: LinearRegression, RidgeRegression, LassoRegression, RegressionTree, RandomForestReg, SupportVectorReg 
## Number of resamples: 5 
## 
## MAE 
##                       Min.   1st Qu.    Median      Mean   3rd Qu.     Max.
## LinearRegression  64.56357  68.95197 108.68633  98.89542 110.13093 142.1443
## RidgeRegression   61.70995  77.56574 116.13732  97.71000 116.50914 116.6279
## LassoRegression   73.15082  87.78440  89.84569  91.57500  92.89956 114.1945
## RegressionTree   101.54656 105.02751 114.15455 115.74116 114.50444 143.4727
## RandomForestReg   77.58886  81.60344  92.56650  98.09341 119.27213 119.4361
## SupportVectorReg  77.08317  93.55125 103.72574 102.97805 104.13163 136.3985
##                  NA's
## LinearRegression    0
## RidgeRegression     0
## LassoRegression     0
## RegressionTree      0
## RandomForestReg     0
## SupportVectorReg    0
## 
## RMSE 
##                       Min.   1st Qu.   Median     Mean  3rd Qu.     Max. NA's
## LinearRegression  76.20058  77.15340 121.3971 113.6312 134.7006 158.7041    0
## RidgeRegression   70.84932  89.33320 132.8882 116.7634 135.5371 155.2092    0
## LassoRegression   86.09101 102.00838 110.8140 114.1165 111.9685 159.7007    0
## RegressionTree   132.77916 138.73148 138.7877 148.4326 155.8879 175.9767    0
## RandomForestReg   92.48192  99.50272 111.7852 123.1344 144.6301 167.2719    0
## SupportVectorReg 103.82612 113.23867 119.0237 124.4817 121.9898 164.3301    0
## 
## Rsquared 
##                          Min.   1st Qu.    Median      Mean   3rd Qu.      Max.
## LinearRegression 0.2109021137 0.2775890 0.3180561 0.4472310 0.7100149 0.7195929
## RidgeRegression  0.2788380791 0.2807190 0.4799432 0.4296510 0.5045099 0.6042446
## LassoRegression  0.1707081283 0.3120274 0.3619458 0.4014254 0.5354107 0.6270353
## RegressionTree   0.0078928159 0.1752574 0.2232302 0.2295502 0.3696945 0.3716764
## RandomForestReg  0.0163983519 0.1672217 0.2663317 0.3766345 0.5905772 0.8426435
## SupportVectorReg 0.0002824777 0.2764985 0.2866044 0.3096192 0.4138583 0.5708524
##                  NA's
## LinearRegression    0
## RidgeRegression     0
## LassoRegression     0
## RegressionTree      0
## RandomForestReg     0
## SupportVectorReg    0
# bandingkan semua model yang sudah dibuat
perbandingan <- data.frame(Linear = round(postResample(testing$prediksiLR, testing$kasus),3),
                           Ridge = round(postResample(testing$prediksiRidge, testing$kasus),3),
                           Lasso = round(postResample(testing$prediksiLasso, testing$kasus),3),
                           RegTree = round(postResample(testing$prediksiTree, testing$kasus),3),
                           RandomForest = round(postResample(testing$prediksiForest, testing$kasus),3),
                           SVR = round(postResample(testing$prediksiSVR, testing$kasus),3))
DT::datatable(perbandingan)
perbandingan
##           Linear   Ridge   Lasso RegTree RandomForest     SVR
## RMSE     268.323 297.368 306.491 326.447      293.797 280.195
## Rsquared   0.121   0.000   0.010   0.000        0.004   0.185
## MAE      164.106 178.979 180.976 206.073      173.273 172.622

Grafik Perbandingan

perbandingan
##           Linear   Ridge   Lasso RegTree RandomForest     SVR
## RMSE     268.323 297.368 306.491 326.447      293.797 280.195
## Rsquared   0.121   0.000   0.010   0.000        0.004   0.185
## MAE      164.106 178.979 180.976 206.073      173.273 172.622
training$prediksiLR <- NA
training$prediksiRidge <- NA
training$prediksiLasso <- NA
training$prediksiTree <- NA
training$prediksiForest <- NA
training$prediksiSVR <- NA
final <- rbind(training, testing)

x1 <- ggplot()+
  geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
  geom_line(data=final,aes(y=prediksiLR,x= Date,colour="Prediction LR"),size=1) +
  scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction LR" = "orchid4")) +
  annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
  labs(title ="Observation-LR", y = " ", x = " ") +
  theme(legend.position="bottom", legend.box = "horizontal") +
  scale_x_date(date_breaks = "12 month", date_labels = "%Y") 
x2 <- ggplot()+
  geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
  geom_line(data=final,aes(y=prediksiTree,x= Date,colour="Prediction Reg Tree"),size=1) +
  scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction Reg Tree" = "violetred3")) +
  annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
  labs(title ="Observation-Reg Tree", y = " ", x = " ") +
  theme(legend.position="bottom", legend.box = "horizontal") +
  scale_x_date(date_breaks = "12 month", date_labels = "%Y") 
x3 <- ggplot()+
  geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
  geom_line(data=final,aes(y=prediksiLasso,x= Date,colour="Prediction Lasso"),size=1) +
  scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction Lasso" = "darkorange4")) +
  annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
  labs(title ="Observation-Lasso", y = " ", x = " ") +
  theme(legend.position="bottom", legend.box = "horizontal")+
  scale_x_date(date_breaks = "12 month", date_labels = "%Y")
x4 <- ggplot()+
  geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
  geom_line(data=final,aes(y=prediksiForest,x= Date,colour="Prediction RForest"),size=1) +
  scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction RForest" = "midnightblue")) +
  annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
  labs(title ="Observation-RForest", y = " ", x = " ")+
  theme(legend.position="bottom", legend.box = "horizontal")+
  scale_x_date(date_breaks = "12 month", date_labels = "%Y")
x5 <- ggplot()+
  geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
  geom_line(data=final,aes(y=prediksiSVR,x= Date,colour="Prediction SVR"),size=1) +
  scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction SVR" = "green4")) +
  annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
  labs(title ="Observation-SVR", y = " ", x = " ")+
  theme(legend.position="bottom", legend.box = "horizontal")+
  scale_x_date(date_breaks = "12 month", date_labels = "%Y")
x6 <- ggplot()+
  geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
  geom_line(data=final,aes(y=prediksiSVR,x= Date,colour="Prediction Ridge Reg"),size=1) +
  scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction Ridge Reg" = "blue")) +
  annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
  labs(title ="Observation-Ridge Reg", y = " ", x = " ")+
  theme(legend.position="bottom", legend.box = "horizontal")+
  scale_x_date(date_breaks = "12 month", date_labels = "%Y")
plot_grid(x1, x3, x6, x2, x4, x5, labels = "AUTO")