library(foreign)
library(tidyverse)
library(caret)
library(Hmisc)
library(corrplot)
library(lmtest)
library(car)
library(rattle)
library(dplyr)
library(cowplot)
library(readxl)setwd("C:/Users/dhihr/Downloads/dengue phlc")
data_fix <- read_excel("data_fix.xlsx")data_fix$ABJ <- as.factor(data_fix$ABJ)
data_fix$Date <- as.Date(data_fix$Date)
str(data_fix)## tibble [60 × 9] (S3: tbl_df/tbl/data.frame)
## $ Month-Years: chr [1:60] "2018-01" "2018-02" "2018-03" "2018-04" ...
## $ Date : Date[1:60], format: "2018-01-01" "2018-02-01" ...
## $ Tavg : num [1:60] 23.2 23.7 23.6 23.5 23.7 ...
## $ RH_avg : num [1:60] 77.5 77.1 77.3 78.6 76.9 ...
## $ RR : num [1:60] 3.51 9.72 5.72 11.59 5.09 ...
## $ ss : num [1:60] 3.94 5.37 5.79 4.59 5.93 ...
## $ ff_avg : num [1:60] 2.52 1.79 1.81 1.73 1.45 ...
## $ kasus : num [1:60] 267 160 219 232 297 225 193 107 0 0 ...
## $ ABJ : Factor w/ 2 levels "Berisiko","Tidak Berisiko": 1 1 1 1 1 1 2 1 2 2 ...
lastDate<- as.Date("2022-12-30")
startDate <- data_fix$Date[49]
x <- ggplot(data_fix) +
geom_line(aes(x = Date, y = kasus), size = 1.3, colour = "darkcyan") + labs(title ="Bandung City Dengue Cases 2018-2022", y = " ", x = " ") +
annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
scale_x_date(date_breaks = "5 month", date_labels = "%b-%y")+ geom_text(x=as.Date("2022-06-01"), y=1250, label="Testing", color = "firebrick") +
geom_text(x=as.Date("2019-06-01"), y=1250, label="Training", color = "gray4")## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
xtraining<-data_fix[1:48,]
testing<-data_fix[49:60,]dim(training)## [1] 48 9
summary(training)## Month-Years Date Tavg RH_avg
## Length:48 Min. :2018-01-01 Min. :23.04 Min. :68.39
## Class :character 1st Qu.:2018-12-24 1st Qu.:23.49 1st Qu.:74.31
## Mode :character Median :2019-12-16 Median :23.70 Median :77.42
## Mean :2019-12-16 Mean :23.69 Mean :76.63
## 3rd Qu.:2020-12-08 3rd Qu.:23.88 3rd Qu.:79.62
## Max. :2021-12-01 Max. :24.32 Max. :81.76
## RR ss ff_avg kasus
## Min. : 1.081 Min. :3.352 Min. :1.129 Min. : 0.0
## 1st Qu.: 3.787 1st Qu.:4.580 1st Qu.:1.677 1st Qu.:104.5
## Median : 6.054 Median :5.365 Median :1.806 Median :222.0
## Mean : 6.554 Mean :5.298 Mean :1.798 Mean :222.5
## 3rd Qu.: 9.458 3rd Qu.:5.906 3rd Qu.:1.976 3rd Qu.:331.2
## Max. :12.822 Max. :7.287 Max. :2.516 Max. :695.0
## ABJ
## Berisiko :36
## Tidak Berisiko:12
##
##
##
##
varnum <- select_if(training, is.numeric)
head(varnum)## # A tibble: 6 × 6
## Tavg RH_avg RR ss ff_avg kasus
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 23.2 77.5 3.51 3.94 2.52 267
## 2 23.7 77.1 9.72 5.37 1.79 160
## 3 23.6 77.3 5.72 5.79 1.81 219
## 4 23.5 78.6 11.6 4.59 1.73 232
## 5 23.7 76.9 5.09 5.93 1.45 297
## 6 23.3 75.4 2.84 5.90 1.97 225
varnum %>%
gather() %>%
ggplot(aes(x=key, y=value)) +
geom_boxplot() +
facet_wrap( ~ key, scales="free")pairs(~ . , data=varnum, lower.panel=NULL)korelasi <- rcorr(as.matrix(varnum))
korelasi_r <- korelasi$r
korelasi_r## Tavg RH_avg RR ss ff_avg
## Tavg 1.0000000000 0.2355626 0.3669569 -0.24499596 -0.12870758
## RH_avg 0.2355626360 1.0000000 0.7131046 -0.80386745 -0.41503946
## RR 0.3669569338 0.7131046 1.0000000 -0.61558121 -0.37556764
## ss -0.2449959585 -0.8038675 -0.6155812 1.00000000 0.05660159
## ff_avg -0.1287075845 -0.4150395 -0.3755676 0.05660159 1.00000000
## kasus 0.0001353229 0.4078794 0.1401594 -0.21287477 -0.47123532
## kasus
## Tavg 0.0001353229
## RH_avg 0.4078794111
## RR 0.1401593563
## ss -0.2128747742
## ff_avg -0.4712353184
## kasus 1.0000000000
korelasi_p <- korelasi$P
korelasi_p## Tavg RH_avg RR ss ff_avg
## Tavg NA 1.070101e-01 1.030195e-02 9.328367e-02 0.3832950053
## RH_avg 0.10701011 NA 1.292286e-08 6.019185e-12 0.0033543647
## RR 0.01030195 1.292286e-08 NA 3.215187e-06 0.0085262540
## ss 0.09328367 6.019185e-12 3.215187e-06 NA 0.7023745562
## ff_avg 0.38329501 3.354365e-03 8.526254e-03 7.023746e-01 NA
## kasus 0.99927167 4.006041e-03 3.420325e-01 1.463146e-01 0.0007231472
## kasus
## Tavg 0.9992716666
## RH_avg 0.0040060412
## RR 0.3420324521
## ss 0.1463146106
## ff_avg 0.0007231472
## kasus NA
diag(korelasi_p) <- 0
corrplot(korelasi_r, method="circle", type='upper',
p.mat=korelasi_p, sig.level = 0.05, addCoef.col='black')varkat <- select_if(training, is.factor)
head(varkat)## # A tibble: 6 × 1
## ABJ
## <fct>
## 1 Berisiko
## 2 Berisiko
## 3 Berisiko
## 4 Berisiko
## 5 Berisiko
## 6 Berisiko
varkat %>%
gather() %>%
ggplot(aes(y=value)) +
geom_bar() +
coord_flip() +
facet_wrap( ~ key, scales="free")fit.control <- trainControl(method = "cv", number = 5)lr <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ,
data = training, method = "lm", trControl = fit.control)
lr## Linear Regression
##
## 48 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 38, 38, 39, 38, 39
## Resampling results:
##
## RMSE Rsquared MAE
## 113.6312 0.447231 98.89542
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
# cek asumsi linieritas
plot(lr$finalModel, 1)# cek asumsi homogenitas
bptest(lr$finalModel)##
## studentized Breusch-Pagan test
##
## data: lr$finalModel
## BP = 7.5118, df = 6, p-value = 0.2761
# cek asumsi autokorelasi
durbinWatsonTest(lr$finalModel)## lag Autocorrelation D-W Statistic p-value
## 1 0.2509618 1.371495 0.012
## Alternative hypothesis: rho != 0
# cek asumsi multikolinier
vif(lr$finalModel)## Tavg RH_avg RR ss
## 1.247265 6.789773 2.368484 4.551645
## ff_avg `ABJTidak Berisiko`
## 1.744278 1.506678
# cek asumsi normalitas
residual = training$kasus - predict(lr, training)
shapiro.test(residual)##
## Shapiro-Wilk normality test
##
## data: residual
## W = 0.97973, p-value = 0.5673
ks.test(residual, "pnorm")##
## Exact one-sample Kolmogorov-Smirnov test
##
## data: residual
## D = 0.54167, p-value = 1.029e-13
## alternative hypothesis: two-sided
# deksripsi model yang terbentuk
summary(lr)##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -207.86 -70.63 -18.82 91.23 228.56
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 418.026 1850.326 0.226 0.822386
## Tavg 44.265 65.620 0.675 0.503734
## RH_avg -4.428 11.397 -0.388 0.699669
## RR -12.217 7.554 -1.617 0.113480
## ss -56.603 35.011 -1.617 0.113610
## ff_avg -266.808 76.378 -3.493 0.001159 **
## `ABJTidak Berisiko` -180.346 45.032 -4.005 0.000255 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 110.1 on 41 degrees of freedom
## Multiple R-squared: 0.5294, Adjusted R-squared: 0.4606
## F-statistic: 7.688 on 6 and 41 DF, p-value: 1.431e-05
# melakukan prediksi terhadap data testing
testing$prediksiLR <- predict(lr, testing)
DT::datatable(head(testing,20))# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiLR, testing$kasus)## RMSE Rsquared MAE
## 268.3230508 0.1205471 164.1061436
# melihat variabel importance
varImp(lr)## lm variable importance
##
## Overall
## `ABJTidak Berisiko` 100.000
## ff_avg 85.854
## RR 33.980
## ss 33.963
## Tavg 7.911
## RH_avg 0.000
# set range lambda (antara 0.001 sampai 1000)
lambda <- 10^seq(-3, 3, length = 100)
ridge <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ,
data = training, method = "glmnet",
trControl = fit.control,
tuneGrid = expand.grid(alpha = 0, lambda = lambda))
ridge## glmnet
##
## 48 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 37, 39, 39, 38, 39
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 1.000000e-03 119.2576 0.4434593 102.25443
## 1.149757e-03 119.2576 0.4434593 102.25443
## 1.321941e-03 119.2576 0.4434593 102.25443
## 1.519911e-03 119.2576 0.4434593 102.25443
## 1.747528e-03 119.2576 0.4434593 102.25443
## 2.009233e-03 119.2576 0.4434593 102.25443
## 2.310130e-03 119.2576 0.4434593 102.25443
## 2.656088e-03 119.2576 0.4434593 102.25443
## 3.053856e-03 119.2576 0.4434593 102.25443
## 3.511192e-03 119.2576 0.4434593 102.25443
## 4.037017e-03 119.2576 0.4434593 102.25443
## 4.641589e-03 119.2576 0.4434593 102.25443
## 5.336699e-03 119.2576 0.4434593 102.25443
## 6.135907e-03 119.2576 0.4434593 102.25443
## 7.054802e-03 119.2576 0.4434593 102.25443
## 8.111308e-03 119.2576 0.4434593 102.25443
## 9.326033e-03 119.2576 0.4434593 102.25443
## 1.072267e-02 119.2576 0.4434593 102.25443
## 1.232847e-02 119.2576 0.4434593 102.25443
## 1.417474e-02 119.2576 0.4434593 102.25443
## 1.629751e-02 119.2576 0.4434593 102.25443
## 1.873817e-02 119.2576 0.4434593 102.25443
## 2.154435e-02 119.2576 0.4434593 102.25443
## 2.477076e-02 119.2576 0.4434593 102.25443
## 2.848036e-02 119.2576 0.4434593 102.25443
## 3.274549e-02 119.2576 0.4434593 102.25443
## 3.764936e-02 119.2576 0.4434593 102.25443
## 4.328761e-02 119.2576 0.4434593 102.25443
## 4.977024e-02 119.2576 0.4434593 102.25443
## 5.722368e-02 119.2576 0.4434593 102.25443
## 6.579332e-02 119.2576 0.4434593 102.25443
## 7.564633e-02 119.2576 0.4434593 102.25443
## 8.697490e-02 119.2576 0.4434593 102.25443
## 1.000000e-01 119.2576 0.4434593 102.25443
## 1.149757e-01 119.2576 0.4434593 102.25443
## 1.321941e-01 119.2576 0.4434593 102.25443
## 1.519911e-01 119.2576 0.4434593 102.25443
## 1.747528e-01 119.2576 0.4434593 102.25443
## 2.009233e-01 119.2576 0.4434593 102.25443
## 2.310130e-01 119.2576 0.4434593 102.25443
## 2.656088e-01 119.2576 0.4434593 102.25443
## 3.053856e-01 119.2576 0.4434593 102.25443
## 3.511192e-01 119.2576 0.4434593 102.25443
## 4.037017e-01 119.2576 0.4434593 102.25443
## 4.641589e-01 119.2576 0.4434593 102.25443
## 5.336699e-01 119.2576 0.4434593 102.25443
## 6.135907e-01 119.2576 0.4434593 102.25443
## 7.054802e-01 119.2576 0.4434593 102.25443
## 8.111308e-01 119.2576 0.4434593 102.25443
## 9.326033e-01 119.2576 0.4434593 102.25443
## 1.072267e+00 119.2576 0.4434593 102.25443
## 1.232847e+00 119.2576 0.4434593 102.25443
## 1.417474e+00 119.2576 0.4434593 102.25443
## 1.629751e+00 119.2576 0.4434593 102.25443
## 1.873817e+00 119.2576 0.4434593 102.25443
## 2.154435e+00 119.2576 0.4434593 102.25443
## 2.477076e+00 119.2576 0.4434593 102.25443
## 2.848036e+00 119.2576 0.4434593 102.25443
## 3.274549e+00 119.2576 0.4434593 102.25443
## 3.764936e+00 119.2576 0.4434593 102.25443
## 4.328761e+00 119.2576 0.4434593 102.25443
## 4.977024e+00 119.2576 0.4434593 102.25443
## 5.722368e+00 119.2576 0.4434593 102.25443
## 6.579332e+00 119.2576 0.4434593 102.25443
## 7.564633e+00 119.2576 0.4434593 102.25443
## 8.697490e+00 119.1981 0.4434367 102.21297
## 1.000000e+01 118.9290 0.4432413 101.97749
## 1.149757e+01 118.6547 0.4428810 101.71287
## 1.321941e+01 118.3810 0.4423707 101.42335
## 1.519911e+01 118.1124 0.4416922 101.10724
## 1.747528e+01 117.8530 0.4408220 100.76386
## 2.009233e+01 117.6075 0.4397704 100.39063
## 2.310130e+01 117.3793 0.4385468 99.98532
## 2.656088e+01 117.1769 0.4371328 99.54897
## 3.053856e+01 117.0068 0.4355233 99.07938
## 3.511192e+01 116.8755 0.4337359 98.57608
## 4.037017e+01 116.7925 0.4317642 98.13683
## 4.641589e+01 116.7634 0.4296510 97.71000
## 5.336699e+01 116.8000 0.4273535 97.26408
## 6.135907e+01 116.9053 0.4249421 96.85440
## 7.054802e+01 117.0929 0.4223701 96.62571
## 8.111308e+01 117.3620 0.4197330 96.56000
## 9.326033e+01 117.7273 0.4169485 96.56063
## 1.072267e+02 118.1823 0.4141386 96.82248
## 1.232847e+02 118.7419 0.4112065 97.11218
## 1.417474e+02 119.3918 0.4082965 97.79120
## 1.629751e+02 120.1463 0.4052903 98.67835
## 1.873817e+02 120.9825 0.4023471 99.54069
## 2.154435e+02 121.9135 0.3993367 100.39238
## 2.477076e+02 122.9077 0.3964302 101.28193
## 2.848036e+02 123.9771 0.3934889 102.25488
## 3.274549e+02 125.0835 0.3906936 103.17630
## 3.764936e+02 126.2387 0.3878948 104.06533
## 4.328761e+02 127.4011 0.3852745 104.89419
## 4.977024e+02 128.5835 0.3826861 105.91833
## 5.722368e+02 129.7441 0.3803018 106.93288
## 6.579332e+02 130.8976 0.3779772 107.88799
## 7.564633e+02 132.0053 0.3758698 108.76895
## 8.697490e+02 133.0841 0.3738411 109.63172
## 1.000000e+03 134.1001 0.3720290 110.41305
##
## Tuning parameter 'alpha' was held constant at a value of 0
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 0 and lambda = 46.41589.
# Model coefficients
coef(ridge$finalModel, ridge$bestTune$lambda)## 7 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 27.327073
## Tavg 12.484195
## RH_avg 4.540485
## RR -5.459452
## ss -16.501802
## ff_avg -163.445043
## ABJTidak Berisiko -125.686956
# melakukan prediksi terhadap data testing
testing$prediksiRidge <- predict(ridge, testing)
DT::datatable(head(testing,20))# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiRidge, testing$kasus)## RMSE Rsquared MAE
## 2.973676e+02 7.458723e-05 1.789785e+02
# melihat variabel importance
varImp(ridge)## glmnet variable importance
##
## Overall
## ff_avg 100.0000
## ABJTidak Berisiko 76.2385
## ss 7.5274
## Tavg 4.9990
## RR 0.5783
## RH_avg 0.0000
lasso <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ,
data = training, method = "glmnet",
trControl = fit.control,
tuneGrid = expand.grid(alpha = 1, lambda = lambda))## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
lasso## glmnet
##
## 48 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 40, 39, 37, 38, 38
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 1.000000e-03 120.3470 0.39128335 101.48556
## 1.149757e-03 120.3470 0.39128335 101.48556
## 1.321941e-03 120.3470 0.39128335 101.48556
## 1.519911e-03 120.3470 0.39128335 101.48556
## 1.747528e-03 120.3470 0.39128335 101.48556
## 2.009233e-03 120.3470 0.39128335 101.48556
## 2.310130e-03 120.3470 0.39128335 101.48556
## 2.656088e-03 120.3470 0.39128335 101.48556
## 3.053856e-03 120.3470 0.39128335 101.48556
## 3.511192e-03 120.3470 0.39128335 101.48556
## 4.037017e-03 120.3470 0.39128335 101.48556
## 4.641589e-03 120.3470 0.39128335 101.48556
## 5.336699e-03 120.3470 0.39128335 101.48556
## 6.135907e-03 120.3470 0.39128335 101.48556
## 7.054802e-03 120.3470 0.39128335 101.48556
## 8.111308e-03 120.3470 0.39128335 101.48556
## 9.326033e-03 120.3470 0.39128335 101.48556
## 1.072267e-02 120.3470 0.39128335 101.48556
## 1.232847e-02 120.3470 0.39128335 101.48556
## 1.417474e-02 120.3470 0.39128335 101.48556
## 1.629751e-02 120.3470 0.39128335 101.48556
## 1.873817e-02 120.3470 0.39128335 101.48556
## 2.154435e-02 120.3470 0.39128335 101.48556
## 2.477076e-02 120.3470 0.39128335 101.48556
## 2.848036e-02 120.3470 0.39128335 101.48556
## 3.274549e-02 120.3470 0.39128335 101.48556
## 3.764936e-02 120.3470 0.39128335 101.48556
## 4.328761e-02 120.3470 0.39128335 101.48556
## 4.977024e-02 120.3470 0.39128335 101.48556
## 5.722368e-02 120.3470 0.39128335 101.48556
## 6.579332e-02 120.3470 0.39128335 101.48556
## 7.564633e-02 120.3470 0.39128335 101.48556
## 8.697490e-02 120.3470 0.39128335 101.48556
## 1.000000e-01 120.3470 0.39128335 101.48556
## 1.149757e-01 120.3504 0.39125087 101.48621
## 1.321941e-01 120.3611 0.39114982 101.48831
## 1.519911e-01 120.3746 0.39102198 101.49105
## 1.747528e-01 120.3857 0.39087951 101.48887
## 2.009233e-01 120.3784 0.39073427 101.46680
## 2.310130e-01 120.3321 0.39077808 101.41511
## 2.656088e-01 120.2645 0.39094523 101.34626
## 3.053856e-01 120.1884 0.39114484 101.26822
## 3.511192e-01 120.1025 0.39136189 101.17868
## 4.037017e-01 120.0039 0.39160342 101.07431
## 4.641589e-01 119.8839 0.39195489 100.94837
## 5.336699e-01 119.7453 0.39237452 100.80199
## 6.135907e-01 119.5883 0.39285328 100.63412
## 7.054802e-01 119.4067 0.39342149 100.43823
## 8.111308e-01 119.2020 0.39407660 100.21512
## 9.326033e-01 118.9735 0.39479063 99.95863
## 1.072267e+00 118.7164 0.39558720 99.66352
## 1.232847e+00 118.4272 0.39649182 99.32291
## 1.417474e+00 118.1039 0.39752212 98.93114
## 1.629751e+00 117.7482 0.39862061 98.47961
## 1.873817e+00 117.3569 0.39982512 97.96348
## 2.154435e+00 116.9046 0.40130327 97.36242
## 2.477076e+00 116.4207 0.40282363 96.68922
## 2.848036e+00 115.9934 0.40338881 96.02579
## 3.274549e+00 115.7066 0.40197817 95.44443
## 3.764936e+00 115.4516 0.40046844 94.85984
## 4.328761e+00 115.1500 0.40120997 94.42150
## 4.977024e+00 114.8586 0.40177020 94.13111
## 5.722368e+00 114.5842 0.40219519 93.85075
## 6.579332e+00 114.3913 0.40194667 93.54882
## 7.564633e+00 114.3158 0.40081395 93.23505
## 8.697490e+00 114.3832 0.39907593 92.93207
## 1.000000e+01 114.3270 0.39931433 92.54670
## 1.149757e+01 114.1762 0.40080265 92.00384
## 1.321941e+01 114.1165 0.40142545 91.57500
## 1.519911e+01 114.2133 0.40068777 91.49544
## 1.747528e+01 114.5807 0.39833212 91.70689
## 2.009233e+01 115.1017 0.39628911 91.98444
## 2.310130e+01 115.8635 0.39380981 92.56529
## 2.656088e+01 116.9108 0.39097111 93.41853
## 3.053856e+01 118.3731 0.38667432 94.72036
## 3.511192e+01 120.3962 0.37940310 96.94358
## 4.037017e+01 123.1195 0.36766205 99.97948
## 4.641589e+01 126.2748 0.35599813 103.12005
## 5.336699e+01 129.7960 0.35063499 106.23887
## 6.135907e+01 134.5364 0.34332318 110.32946
## 7.054802e+01 139.9144 0.33990573 114.51046
## 8.111308e+01 144.3200 0.16565880 118.14366
## 9.326033e+01 145.3157 0.03884456 119.04683
## 1.072267e+02 145.4718 NaN 119.10121
## 1.232847e+02 145.4718 NaN 119.10121
## 1.417474e+02 145.4718 NaN 119.10121
## 1.629751e+02 145.4718 NaN 119.10121
## 1.873817e+02 145.4718 NaN 119.10121
## 2.154435e+02 145.4718 NaN 119.10121
## 2.477076e+02 145.4718 NaN 119.10121
## 2.848036e+02 145.4718 NaN 119.10121
## 3.274549e+02 145.4718 NaN 119.10121
## 3.764936e+02 145.4718 NaN 119.10121
## 4.328761e+02 145.4718 NaN 119.10121
## 4.977024e+02 145.4718 NaN 119.10121
## 5.722368e+02 145.4718 NaN 119.10121
## 6.579332e+02 145.4718 NaN 119.10121
## 7.564633e+02 145.4718 NaN 119.10121
## 8.697490e+02 145.4718 NaN 119.10121
## 1.000000e+03 145.4718 NaN 119.10121
##
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 13.21941.
# Model coefficients
coef(lasso$finalModel, lasso$bestTune$lambda)## 7 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 611.0503
## Tavg .
## RH_avg .
## RR .
## ss -10.5608
## ff_avg -164.9893
## ABJTidak Berisiko -144.0374
# melakukan prediksi terhadap data testing
testing$prediksiLasso <- predict(lasso, testing)
DT::datatable(head(testing,20))# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiLasso, testing$kasus)## RMSE Rsquared MAE
## 3.064909e+02 9.529632e-03 1.809761e+02
# melihat variabel importance
varImp(lasso)## glmnet variable importance
##
## Overall
## ff_avg 100.000
## ABJTidak Berisiko 87.301
## ss 6.401
## Tavg 0.000
## RR 0.000
## RH_avg 0.000
regtree <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ,
data = training, method = "rpart", trControl = fit.control)## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
regtree## CART
##
## 48 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 39, 38, 37, 39, 39
## Resampling results across tuning parameters:
##
## cp RMSE Rsquared MAE
## 0.0301086 148.4326 0.2295502 115.7412
## 0.1218599 149.1598 0.2139198 111.9254
## 0.3233249 156.2884 0.1315086 120.8747
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.0301086.
# menampilkan plot decision tree
fancyRpartPlot(regtree$finalModel)# melakukan prediksi terhadap data testing
testing$prediksiTree <- predict(regtree, testing)
DT::datatable(head(testing,20))# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiTree, testing$kasus)## RMSE Rsquared MAE
## 3.264468e+02 7.088687e-05 2.060725e+02
# melihat variabel importance
varImp(regtree)## rpart variable importance
##
## Overall
## ff_avg 100.00
## RH_avg 85.43
## ABJTidak Berisiko 73.83
## ss 59.47
## RR 32.94
## Tavg 28.40
## `ABJTidak Berisiko` 0.00
rf <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ, data = training, method = "rf", trControl = fit.control)
rf## Random Forest
##
## 48 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 38, 39, 39, 37, 39
## Resampling results across tuning parameters:
##
## mtry RMSE Rsquared MAE
## 2 125.4204 0.3366489 100.19985
## 4 124.0048 0.3546408 97.93969
## 6 123.1344 0.3766345 98.09341
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 6.
# melakukan prediksi terhadap data testing
testing$prediksiForest <- predict(rf, testing)
DT::datatable(head(testing,20))# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiForest, testing$kasus)## RMSE Rsquared MAE
## 2.937971e+02 4.352322e-03 1.732728e+02
# melihat variabel importance
varImp(rf)## rf variable importance
##
## Overall
## ff_avg 100.00
## ABJTidak Berisiko 76.83
## Tavg 31.51
## RH_avg 17.07
## ss 11.94
## RR 0.00
svr <- train(kasus ~ Tavg + RH_avg + RR + ss + ff_avg + ABJ, data = training, method = "svmLinear", trControl = fit.control)
svr## Support Vector Machines with Linear Kernel
##
## 48 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 38, 37, 38, 40, 39
## Resampling results:
##
## RMSE Rsquared MAE
## 124.4817 0.3096192 102.978
##
## Tuning parameter 'C' was held constant at a value of 1
# melakukan prediksi terhadap data testing
testing$prediksiSVR <- predict(svr, testing)
DT::datatable(head(testing,20))# melihat tingkat error atau akurasi hasil prediksi
postResample(testing$prediksiSVR, testing$kasus)## RMSE Rsquared MAE
## 280.194893 0.184741 172.621785
# menampilkan perbandingan hasil antar model
model_list <- list(LinearRegression = lr,
RidgeRegression = ridge,
LassoRegression = lasso,
RegressionTree = regtree,
RandomForestReg = rf,
SupportVectorReg = svr)
res <- resamples(model_list)
summary(res)##
## Call:
## summary.resamples(object = res)
##
## Models: LinearRegression, RidgeRegression, LassoRegression, RegressionTree, RandomForestReg, SupportVectorReg
## Number of resamples: 5
##
## MAE
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## LinearRegression 64.56357 68.95197 108.68633 98.89542 110.13093 142.1443
## RidgeRegression 61.70995 77.56574 116.13732 97.71000 116.50914 116.6279
## LassoRegression 73.15082 87.78440 89.84569 91.57500 92.89956 114.1945
## RegressionTree 101.54656 105.02751 114.15455 115.74116 114.50444 143.4727
## RandomForestReg 77.58886 81.60344 92.56650 98.09341 119.27213 119.4361
## SupportVectorReg 77.08317 93.55125 103.72574 102.97805 104.13163 136.3985
## NA's
## LinearRegression 0
## RidgeRegression 0
## LassoRegression 0
## RegressionTree 0
## RandomForestReg 0
## SupportVectorReg 0
##
## RMSE
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## LinearRegression 76.20058 77.15340 121.3971 113.6312 134.7006 158.7041 0
## RidgeRegression 70.84932 89.33320 132.8882 116.7634 135.5371 155.2092 0
## LassoRegression 86.09101 102.00838 110.8140 114.1165 111.9685 159.7007 0
## RegressionTree 132.77916 138.73148 138.7877 148.4326 155.8879 175.9767 0
## RandomForestReg 92.48192 99.50272 111.7852 123.1344 144.6301 167.2719 0
## SupportVectorReg 103.82612 113.23867 119.0237 124.4817 121.9898 164.3301 0
##
## Rsquared
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## LinearRegression 0.2109021137 0.2775890 0.3180561 0.4472310 0.7100149 0.7195929
## RidgeRegression 0.2788380791 0.2807190 0.4799432 0.4296510 0.5045099 0.6042446
## LassoRegression 0.1707081283 0.3120274 0.3619458 0.4014254 0.5354107 0.6270353
## RegressionTree 0.0078928159 0.1752574 0.2232302 0.2295502 0.3696945 0.3716764
## RandomForestReg 0.0163983519 0.1672217 0.2663317 0.3766345 0.5905772 0.8426435
## SupportVectorReg 0.0002824777 0.2764985 0.2866044 0.3096192 0.4138583 0.5708524
## NA's
## LinearRegression 0
## RidgeRegression 0
## LassoRegression 0
## RegressionTree 0
## RandomForestReg 0
## SupportVectorReg 0
# bandingkan semua model yang sudah dibuat
perbandingan <- data.frame(Linear = round(postResample(testing$prediksiLR, testing$kasus),3),
Ridge = round(postResample(testing$prediksiRidge, testing$kasus),3),
Lasso = round(postResample(testing$prediksiLasso, testing$kasus),3),
RegTree = round(postResample(testing$prediksiTree, testing$kasus),3),
RandomForest = round(postResample(testing$prediksiForest, testing$kasus),3),
SVR = round(postResample(testing$prediksiSVR, testing$kasus),3))
DT::datatable(perbandingan)perbandingan## Linear Ridge Lasso RegTree RandomForest SVR
## RMSE 268.323 297.368 306.491 326.447 293.797 280.195
## Rsquared 0.121 0.000 0.010 0.000 0.004 0.185
## MAE 164.106 178.979 180.976 206.073 173.273 172.622
perbandingan## Linear Ridge Lasso RegTree RandomForest SVR
## RMSE 268.323 297.368 306.491 326.447 293.797 280.195
## Rsquared 0.121 0.000 0.010 0.000 0.004 0.185
## MAE 164.106 178.979 180.976 206.073 173.273 172.622
training$prediksiLR <- NA
training$prediksiRidge <- NA
training$prediksiLasso <- NA
training$prediksiTree <- NA
training$prediksiForest <- NA
training$prediksiSVR <- NA
final <- rbind(training, testing)
x1 <- ggplot()+
geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
geom_line(data=final,aes(y=prediksiLR,x= Date,colour="Prediction LR"),size=1) +
scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction LR" = "orchid4")) +
annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
labs(title ="Observation-LR", y = " ", x = " ") +
theme(legend.position="bottom", legend.box = "horizontal") +
scale_x_date(date_breaks = "12 month", date_labels = "%Y")
x2 <- ggplot()+
geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
geom_line(data=final,aes(y=prediksiTree,x= Date,colour="Prediction Reg Tree"),size=1) +
scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction Reg Tree" = "violetred3")) +
annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
labs(title ="Observation-Reg Tree", y = " ", x = " ") +
theme(legend.position="bottom", legend.box = "horizontal") +
scale_x_date(date_breaks = "12 month", date_labels = "%Y")
x3 <- ggplot()+
geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
geom_line(data=final,aes(y=prediksiLasso,x= Date,colour="Prediction Lasso"),size=1) +
scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction Lasso" = "darkorange4")) +
annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
labs(title ="Observation-Lasso", y = " ", x = " ") +
theme(legend.position="bottom", legend.box = "horizontal")+
scale_x_date(date_breaks = "12 month", date_labels = "%Y")
x4 <- ggplot()+
geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
geom_line(data=final,aes(y=prediksiForest,x= Date,colour="Prediction RForest"),size=1) +
scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction RForest" = "midnightblue")) +
annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
labs(title ="Observation-RForest", y = " ", x = " ")+
theme(legend.position="bottom", legend.box = "horizontal")+
scale_x_date(date_breaks = "12 month", date_labels = "%Y")
x5 <- ggplot()+
geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
geom_line(data=final,aes(y=prediksiSVR,x= Date,colour="Prediction SVR"),size=1) +
scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction SVR" = "green4")) +
annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
labs(title ="Observation-SVR", y = " ", x = " ")+
theme(legend.position="bottom", legend.box = "horizontal")+
scale_x_date(date_breaks = "12 month", date_labels = "%Y")
x6 <- ggplot()+
geom_line(data=final,aes(y=kasus,x= Date,colour="Observation"),size=1, alpha = 0.6) +
geom_line(data=final,aes(y=prediksiSVR,x= Date,colour="Prediction Ridge Reg"),size=1) +
scale_color_manual(name = " ", values = c("Observation" = "darkcyan", "Prediction Ridge Reg" = "blue")) +
annotate("rect", xmin = startDate, xmax = lastDate, ymin=-Inf, ymax=Inf, alpha = 0.2, fill = 'salmon1') +
labs(title ="Observation-Ridge Reg", y = " ", x = " ")+
theme(legend.position="bottom", legend.box = "horizontal")+
scale_x_date(date_breaks = "12 month", date_labels = "%Y")
plot_grid(x1, x3, x6, x2, x4, x5, labels = "AUTO")