Data Preparation

Libraries

In this section, we would like to show of all packages that we use:

library(survival)
## Warning: package 'survival' was built under R version 4.2.1
library(survminer)
## Warning: package 'survminer' was built under R version 4.2.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.1
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ✔ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date()        masks base::date()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ lubridate::intersect()   masks base::intersect()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ lubridate::setdiff()     masks base::setdiff()
## ✖ lubridate::union()       masks base::union()
library(dplyr)
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.2.1
library(ranger)
## Warning: package 'ranger' was built under R version 4.2.1
library(readxl)
library(tidyr)

Importing Dataset

This dataset was from Jakarta Health Office from March to September 2020

setwd("C:/Users/dhihr/Downloads")
NEWWW_Anlsiis_artikel2 <- read_excel("NEWWW_Anlsiis_artikel2(2).xls")
survival <- NEWWW_Anlsiis_artikel2
survival <- data.frame(no = survival$No, gender = survival$JenisKelamin, location = survival$Wilayah, date_PCR_result = survival$TanggalHasilLabKeluarTangga, date_end_treat = survival$TglPulangIsolasiPerawatanMe, status = survival$STATUSRawatSembuhMeninggal, age_group = survival$KelompokUmur, ICU = survival$ICUya1tidak0, heart_diseases = survival$Jantung, hypertension = survival$Hipertensi, obesity = survival$Obesitas, malignancy = survival$Keganasan, kidney_failure = survival$GagalGinjalKronis, immunocompromise = survival$GangguanImmunologi, pneumonia = survival$Pneumonia)
survival$location <- recode(survival$location, "JAKARTA SELATAN" = "South Jakarta", "JAKARTA TIMUR" = "East Jakarta", "JAKARTA PUSAT" = "Central Jakarta", "JAKARTA BARAT" = "West Jakarta", "JAKARTA UTARA" = "North Jakarta", "KEP. SERIBU" = "Thousand Islands")
head(survival)
##      no gender        location date_PCR_result date_end_treat status age_group
## 1 10397      P   South Jakarta      2020-05-18     2020-05-27 Sembuh      6-19
## 2  5929      P    West Jakarta      2020-05-06     2020-05-19 Sembuh       <=5
## 3  5930      L    West Jakarta      2020-05-06     2020-05-19 Sembuh      6-19
## 4  5932      L    West Jakarta      2020-05-06     2020-05-19 Sembuh       <=5
## 5 15777      P    West Jakarta      2020-05-06     2020-07-18 Sembuh      6-19
## 6  7505      L Central Jakarta      2020-06-05     2020-06-30 Sembuh      6-19
##   ICU heart_diseases hypertension obesity malignancy kidney_failure
## 1   0              0            0       0          0              0
## 2  NA              0            0       0          0              0
## 3   0              0            0       0          0              0
## 4   0              0            0       0          0              0
## 5   1              1            1       1          1              1
## 6  NA              0            0       0          0              0
##   immunocompromise pneumonia
## 1                0         0
## 2                0         0
## 3                0         0
## 4                0         0
## 5                1         1
## 6                0         0

Rename Data

All of data were in Indonesia, so I would like to change into English

survival$ICU <- ifelse(survival$ICU == 1, "Yes", "No")
survival$gender <- ifelse(survival$gender == "P", "Female", "Male")
survival$heart_diseases <- ifelse(survival$heart_diseases == "1", "Yes", "No")
survival$hypertension <- ifelse(survival$hypertension == "1", "Yes", "No")
survival$obesity <- ifelse(survival$obesity == "1", "Yes", "No")
survival$malignancy <- ifelse(survival$malignancy == "1", "Yes", "No")
survival$kidney_failure <- ifelse(survival$kidney_failure == "1", "Yes", "No")
survival$immunocompromise <- ifelse(survival$immunocompromise == "1", "Yes", "No")
survival$pneumonia <- ifelse(survival$pneumonia == "1", "Yes", "No")
survival$status <- ifelse(survival$status == "Sembuh", "Recovered", "Death")
survival$no <- as.character(survival$no)
head(survival)
##      no gender        location date_PCR_result date_end_treat    status
## 1 10397 Female   South Jakarta      2020-05-18     2020-05-27 Recovered
## 2  5929 Female    West Jakarta      2020-05-06     2020-05-19 Recovered
## 3  5930   Male    West Jakarta      2020-05-06     2020-05-19 Recovered
## 4  5932   Male    West Jakarta      2020-05-06     2020-05-19 Recovered
## 5 15777 Female    West Jakarta      2020-05-06     2020-07-18 Recovered
## 6  7505   Male Central Jakarta      2020-06-05     2020-06-30 Recovered
##   age_group  ICU heart_diseases hypertension obesity malignancy kidney_failure
## 1      6-19   No             No           No      No         No             No
## 2       <=5 <NA>             No           No      No         No             No
## 3      6-19   No             No           No      No         No             No
## 4       <=5   No             No           No      No         No             No
## 5      6-19  Yes            Yes          Yes     Yes        Yes            Yes
## 6      6-19 <NA>             No           No      No         No             No
##   immunocompromise pneumonia
## 1               No        No
## 2               No        No
## 3               No        No
## 4               No        No
## 5              Yes       Yes
## 6               No        No

Survival Analysis

Finding survival time

The survival time is date_end_treat negative date_PCR_result. We also made the dummy variable of status, where 1 for event (death) and 0 for censored (recovered). We also cleaned the -1 days to 1 day, due to misinformation in dataset.

survival$date_end_treat <- as.Date(survival$date_end_treat)
survival$date_PCR_result <- as.Date(survival$date_PCR_result)
survival$time <- difftime(survival$date_end_treat, survival$date_PCR_result, units = "days")
table(survival$time)
## 
##  -1   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
##   2   3   5   8   5   5  23  10  10  16  28  12  16  32  41  48  50  70  92 207 
##  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38 
## 247 182  93  41  36  36  31  23  35  14  26  27  20  18  18  21   7  17   2   2 
##  39  40  41  42  43  44  47  48  49  52  53  56  71  72  73  75  79  80  81  88 
##   4   5   2   5   1   2   2   2   5   2   2   2   2   2  17   1   1   4   1   1 
##  89  90 
##   1   1
survival$time[survival$time < 0] <- 1
table(survival$time)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
##   3   7   8   5   5  23  10  10  16  28  12  16  32  41  48  50  70  92 207 247 
##  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39 
## 182  93  41  36  36  31  23  35  14  26  27  20  18  18  21   7  17   2   2   4 
##  40  41  42  43  44  47  48  49  52  53  56  71  72  73  75  79  80  81  88  89 
##   5   2   5   1   2   2   2   5   2   2   2   2   2  17   1   1   4   1   1   1 
##  90 
##   1
survival$status_n <- ifelse(survival$status == "Death", 1, 0)

Dummy Variables

Before we did the bivariate, we would like to make the dummy variables.

survival$ICU_n <- ifelse(survival$ICU == "Yes", 1, 0)
survival$gender_n <- ifelse(survival$gender == "Male", 0, 1)
survival$heart_diseases_n <- ifelse(survival$heart_diseases == "Yes", 1, 0)
survival$hypertension_n <- ifelse(survival$hypertension == "Yes", 1, 0)
survival$obesity_n <- ifelse(survival$obesity == "Yes", 1, 0)
survival$malignancy_n <- ifelse(survival$malignancy == "Yes", 1, 0)
survival$kidney_failure_n <- ifelse(survival$kidney_failure == "Yes", 1, 0)
survival$immunocompromise_n <- ifelse(survival$immunocompromise == "Yes", 1, 0)
survival$pneumonia_n <- ifelse(survival$pneumonia == "Yes", 1, 0)
survival$age_group_n <- ifelse(survival$age_group == "<=5",1,0)

Bivariate Analysis

covariates <- c("gender_n", "ICU_n",  "heart_diseases_n", "hypertension_n", "obesity_n", "kidney_failure_n", "malignancy_n", "immunocompromise_n", "age_group_n", "pneumonia_n")
univ_formulas <- sapply(covariates,
                        function(x) as.formula(paste('Surv(time, status_n)~', x)))

univ_models <- lapply( univ_formulas, function(x){coxph(x, data = survival)})
## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 1 ; coefficient may be infinite.

## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 1 ; coefficient may be infinite.

## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 1 ; coefficient may be infinite.
univ_results <- lapply(univ_models,
                       function(x){ 
                         x <- summary(x)
                         p.value<-signif(x$wald["pvalue"], digits=2)
                         wald.test<-signif(x$wald["test"], digits=2)
                         beta<-signif(x$coef[1], digits=2);#coeficient beta
                         HR <-signif(x$coef[2], digits=2);#exp(beta)
                         HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
                         HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
                         HR <- paste0(HR, " (", 
                                      HR.confint.lower, "-", HR.confint.upper, ")")
                         res<-c(beta, HR, wald.test, p.value)
                         names(res)<-c("beta", "HR (95% CI for HR)", "wald.test", 
                                       "p.value")
                         return(res)
                         #return(exp(cbind(coef(x),confint(x))))
                       })
res <- t(as.data.frame(univ_results, check.names = FALSE))
res2 <- as.data.frame(res)
res2
##                      beta HR (95% CI for HR) wald.test p.value
## gender_n           -0.045    0.96 (0.37-2.5)      0.01    0.93
## ICU_n                 -13    2.3e-06 (0-Inf)         0       1
## heart_diseases_n      2.6       14 (1.8-100)       6.4   0.011
## hypertension_n        2.9       18 (2.3-140)       7.7  0.0056
## obesity_n             -13    2.2e-06 (0-Inf)         0       1
## kidney_failure_n        4       56 (6.9-460)        14 0.00017
## malignancy_n          -13    2.2e-06 (0-Inf)         0       1
## immunocompromise_n    3.9       52 (6.4-420)        14 0.00022
## age_group_n           1.2      3.3 (1.3-8.5)       5.8   0.016
## pneumonia_n           3.1        23 (8.5-64)        37   1e-09

Kaplan-Meier Analysis

We would analysis the factors that were a statistically significant. There will be a difference about 0.005 from the bivariate analysis above.

sfit_heart <- survfit(Surv(time, status_n)~heart_diseases, data=survival)
summary(sfit_heart)
## Call: survfit(formula = Surv(time, status_n) ~ heart_diseases, data = survival)
## 
##                 heart_diseases=No 
##  time n.risk n.event survival  std.err lower 95% CI upper 95% CI
##     0   1636       1    0.999 0.000611        0.998        1.000
##     1   1633       3    0.998 0.001222        0.995        1.000
##     2   1626       2    0.996 0.001497        0.993        0.999
##     3   1618       3    0.994 0.001835        0.991        0.998
##     4   1613       1    0.994 0.001935        0.990        0.998
##     8   1565       1    0.993 0.002035        0.989        0.997
##    10   1522       1    0.993 0.002136        0.988        0.997
##    18   1163       1    0.992 0.002298        0.987        0.996
##    23    397       1    0.989 0.003388        0.983        0.996
##    33    131       1    0.982 0.008240        0.966        0.998
##    89      2       1    0.491 0.347098        0.123        1.000
## 
##                 heart_diseases=Yes 
##         time       n.risk      n.event     survival      std.err lower 95% CI 
##        8.000        8.000        1.000        0.875        0.117        0.673 
## upper 95% CI 
##        1.000
ggsurvplot(sfit_heart, pval=TRUE, conf.int = TRUE, surv.median.line = "hv",  ggtheme = theme_bw())

sfit_kidney_failure <- survfit(Surv(time, status_n)~kidney_failure, data=survival)
summary(sfit_kidney_failure)
## Call: survfit(formula = Surv(time, status_n) ~ kidney_failure, data = survival)
## 
##                 kidney_failure=No 
##  time n.risk n.event survival  std.err lower 95% CI upper 95% CI
##     0   1642       1    0.999 0.000609        0.998        1.000
##     1   1639       2    0.998 0.001055        0.996        1.000
##     2   1633       2    0.997 0.001362        0.994        1.000
##     3   1625       3    0.995 0.001725        0.992        0.998
##     4   1620       1    0.994 0.001830        0.991        0.998
##     8   1572       2    0.993 0.002035        0.989        0.997
##    10   1528       1    0.993 0.002135        0.988        0.997
##    18   1167       1    0.992 0.002296        0.987        0.996
##    23    397       1    0.989 0.003387        0.983        0.996
##    33    131       1    0.982 0.008239        0.966        0.998
##    89      2       1    0.491 0.347100        0.123        1.000
## 
##                 kidney_failure=Yes 
##         time       n.risk      n.event     survival      std.err lower 95% CI 
##        1.000        2.000        1.000        0.500        0.354        0.125 
## upper 95% CI 
##        1.000
ggsurvplot(sfit_kidney_failure, pval=TRUE, conf.int = TRUE, surv.median.line = "hv",  ggtheme = theme_bw())

sfit_age_group <- survfit(Surv(time, status_n)~age_group, data=survival)
summary(sfit_age_group)
## Call: survfit(formula = Surv(time, status_n) ~ age_group, data = survival)
## 
##                 age_group=<=5 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1    395       1    0.997 0.00253        0.993        1.000
##     2    394       2    0.992 0.00437        0.984        1.000
##     3    390       2    0.987 0.00564        0.976        0.998
##     8    376       2    0.982 0.00672        0.969        0.995
##    18    300       1    0.979 0.00745        0.964        0.994
##    23    106       1    0.970 0.01179        0.947        0.993
## 
##                 age_group=6-19 
##  time n.risk n.event survival  std.err lower 95% CI upper 95% CI
##     0   1248       1    0.999 0.000801        0.998        1.000
##     1   1246       2    0.998 0.001387        0.995        1.000
##     3   1236       1    0.997 0.001604        0.994        1.000
##     4   1233       1    0.996 0.001795        0.992        1.000
##    10   1162       1    0.995 0.001987        0.991        0.999
##    33     95       1    0.985 0.010604        0.964        1.000
##    89      1       1    0.000      NaN           NA           NA
ggsurvplot(sfit_age_group, pval=TRUE, conf.int = TRUE, surv.median.line = "hv")

sfit_immunocompromise <- survfit(Surv(time, status_n)~immunocompromise, data=survival)
summary(sfit_immunocompromise)
## Call: survfit(formula = Surv(time, status_n) ~ immunocompromise, data = survival)
## 
##                 immunocompromise=No 
##  time n.risk n.event survival  std.err lower 95% CI upper 95% CI
##     0   1642       1    0.999 0.000609        0.998        1.000
##     1   1639       3    0.998 0.001218        0.995        1.000
##     2   1632       1    0.997 0.001362        0.994        1.000
##     3   1625       3    0.995 0.001725        0.992        0.998
##     4   1620       1    0.994 0.001830        0.991        0.998
##     8   1572       2    0.993 0.002034        0.989        0.997
##    10   1528       1    0.993 0.002134        0.988        0.997
##    18   1167       1    0.992 0.002296        0.987        0.996
##    23    397       1    0.989 0.003387        0.983        0.996
##    33    131       1    0.982 0.008239        0.966        0.998
##    89      2       1    0.491 0.347101        0.123        1.000
## 
##                 immunocompromise=Yes 
##         time       n.risk      n.event     survival      std.err lower 95% CI 
##        2.000        2.000        1.000        0.500        0.354        0.125 
## upper 95% CI 
##        1.000
ggsurvplot(sfit_immunocompromise, pval=TRUE, conf.int = TRUE, surv.median.line = "hv")

sfit_hypertension <- survfit(Surv(time, status_n)~hypertension, data=survival)
sfit_pneumonia <- survfit(Surv(time, status_n)~pneumonia, data=survival)
summary(sfit_pneumonia)
## Call: survfit(formula = Surv(time, status_n) ~ pneumonia, data = survival)
## 
##                 pneumonia=No 
##  time n.risk n.event survival  std.err lower 95% CI upper 95% CI
##     0   1603       1    0.999 0.000624        0.998        1.000
##     1   1600       2    0.998 0.001080        0.996        1.000
##     2   1594       1    0.998 0.001248        0.995        1.000
##     3   1588       2    0.996 0.001530        0.993        0.999
##    10   1496       1    0.996 0.001668        0.992        0.999
##    18   1142       1    0.995 0.001881        0.991        0.998
##    23    385       1    0.992 0.003190        0.986        0.998
##    33    125       1    0.984 0.008515        0.968        1.000
## 
##                 pneumonia=Yes 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     1     41       1    0.976  0.0241        0.930        1.000
##     2     40       1    0.951  0.0336        0.888        1.000
##     3     38       1    0.926  0.0410        0.849        1.000
##     4     37       1    0.901  0.0469        0.814        0.998
##     8     36       2    0.851  0.0561        0.748        0.968
##    89      1       1    0.000     NaN           NA           NA
ggsurvplot(sfit_pneumonia, pval=TRUE, conf.int = TRUE, surv.median.line = "hv")

Multivariate Analysis

We made a multivariate model from pneumonia, immunocompromise, and age groups.

res.cox <- coxph(Surv(time, status_n) ~pneumonia_n + immunocompromise_n + age_group_n, data =  survival)
summary(res.cox, conf.int = FALSE)
## Call:
## coxph(formula = Surv(time, status_n) ~ pneumonia_n + immunocompromise_n + 
##     age_group_n, data = survival)
## 
##   n= 1644, number of events= 17 
## 
##                       coef exp(coef) se(coef)     z Pr(>|z|)    
## pneumonia_n         2.8319   16.9780   0.5331 5.312 1.09e-07 ***
## immunocompromise_n  2.1690    8.7491   1.1577 1.874   0.0610 .  
## age_group_n         1.2400    3.4556   0.5160 2.403   0.0163 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Concordance= 0.783  (se = 0.07 )
## Likelihood ratio test= 31.73  on 3 df,   p=6e-07
## Wald test            = 46.56  on 3 df,   p=4e-10
## Score (logrank) test = 99.14  on 3 df,   p=<2e-16
ggforest(res.cox, data=survival)

Model fitting

We fitted the model with Schoenfeld Global test.

test.ph <- cox.zph(res.cox)
test.ph
##                     chisq df    p
## pneumonia_n        0.0226  1 0.88
## immunocompromise_n 1.2278  1 0.27
## age_group_n        0.0307  1 0.86
## GLOBAL             1.2469  3 0.74
ggcoxzph(test.ph)

ggcoxdiagnostics(res.cox, type = "deviance",
                 linear.predictions = FALSE, ggtheme = theme_minimal())
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## `geom_smooth()` using formula 'y ~ x'

Epicurve

This is the additional graph for epidemic curve per cities and municipality, and also the epidemic curve from outcome categories.

agregate <- survival %>% group_by(date_PCR_result) %>% count()
survival <- survival %>% mutate(Recovered = (status == 'Recovered'), Death = (status == 'Death'))
agregate_outcome <- survival %>% group_by(date_end_treat) %>% summarise(total = n(), Recovered = sum(Recovered), Death = sum(Death))
agregate_outcome <- dplyr::rename(agregate_outcome, Date = date_end_treat)
agregate <- dplyr::rename(agregate, Date = date_PCR_result)
agregate <- rename(agregate, Confirmed = n)
agregate <- left_join(agregate, agregate_outcome, by = 'Date')
agregate[is.na(agregate)] <- 0
agregate_2 <- agregate %>% pivot_longer(cols = c('Confirmed', 'Recovered', 'Death'), names_to = 'Category',
                                                        values_to = 'n')

school_close <- as.Date('2020-03-16')
psbb2 <- as.Date('2020-04-01')
end <- as.Date(last(agregate_2$Date))
p <- ggplot(agregate_2, aes(x = Date, y = n, fill = Category)) + geom_bar(stat = "identity", color = 'black') + 
  scale_fill_manual(values = c("lightcyan", "red", "lightblue")) + theme_minimal()
p<- p +  geom_vline(xintercept= school_close, linetype="dashed", color = "blue", size = 1.5) +
  annotate(geom="text", x= psbb2, y=150, label = "School Closure: 16 Mar 2020", color = "blue")
p2 <- ggplot(survival, aes(x = date_PCR_result)) +geom_bar(color = "black", fill = "firebrick") + facet_wrap(~location, ncol = 2) + 
  theme_minimal() + theme(strip.text = element_text( size = 9, face = "bold" ))
p

p2