In this section, we would like to show of all packages that we use:
library(survival)
## Warning: package 'survival' was built under R version 4.2.1
library(survminer)
## Warning: package 'survminer' was built under R version 4.2.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.1
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::as.difftime() masks base::as.difftime()
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ lubridate::intersect() masks base::intersect()
## ✖ dplyr::lag() masks stats::lag()
## ✖ lubridate::setdiff() masks base::setdiff()
## ✖ lubridate::union() masks base::union()
library(dplyr)
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.2.1
library(ranger)
## Warning: package 'ranger' was built under R version 4.2.1
library(readxl)
library(tidyr)
This dataset was from Jakarta Health Office from March to September 2020
setwd("C:/Users/dhihr/Downloads")
NEWWW_Anlsiis_artikel2 <- read_excel("NEWWW_Anlsiis_artikel2(2).xls")
survival <- NEWWW_Anlsiis_artikel2
survival <- data.frame(no = survival$No, gender = survival$JenisKelamin, location = survival$Wilayah, date_PCR_result = survival$TanggalHasilLabKeluarTangga, date_end_treat = survival$TglPulangIsolasiPerawatanMe, status = survival$STATUSRawatSembuhMeninggal, age_group = survival$KelompokUmur, ICU = survival$ICUya1tidak0, heart_diseases = survival$Jantung, hypertension = survival$Hipertensi, obesity = survival$Obesitas, malignancy = survival$Keganasan, kidney_failure = survival$GagalGinjalKronis, immunocompromise = survival$GangguanImmunologi, pneumonia = survival$Pneumonia)
survival$location <- recode(survival$location, "JAKARTA SELATAN" = "South Jakarta", "JAKARTA TIMUR" = "East Jakarta", "JAKARTA PUSAT" = "Central Jakarta", "JAKARTA BARAT" = "West Jakarta", "JAKARTA UTARA" = "North Jakarta", "KEP. SERIBU" = "Thousand Islands")
head(survival)
## no gender location date_PCR_result date_end_treat status age_group
## 1 10397 P South Jakarta 2020-05-18 2020-05-27 Sembuh 6-19
## 2 5929 P West Jakarta 2020-05-06 2020-05-19 Sembuh <=5
## 3 5930 L West Jakarta 2020-05-06 2020-05-19 Sembuh 6-19
## 4 5932 L West Jakarta 2020-05-06 2020-05-19 Sembuh <=5
## 5 15777 P West Jakarta 2020-05-06 2020-07-18 Sembuh 6-19
## 6 7505 L Central Jakarta 2020-06-05 2020-06-30 Sembuh 6-19
## ICU heart_diseases hypertension obesity malignancy kidney_failure
## 1 0 0 0 0 0 0
## 2 NA 0 0 0 0 0
## 3 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 1 1 1 1 1 1
## 6 NA 0 0 0 0 0
## immunocompromise pneumonia
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 1 1
## 6 0 0
All of data were in Indonesia, so I would like to change into English
survival$ICU <- ifelse(survival$ICU == 1, "Yes", "No")
survival$gender <- ifelse(survival$gender == "P", "Female", "Male")
survival$heart_diseases <- ifelse(survival$heart_diseases == "1", "Yes", "No")
survival$hypertension <- ifelse(survival$hypertension == "1", "Yes", "No")
survival$obesity <- ifelse(survival$obesity == "1", "Yes", "No")
survival$malignancy <- ifelse(survival$malignancy == "1", "Yes", "No")
survival$kidney_failure <- ifelse(survival$kidney_failure == "1", "Yes", "No")
survival$immunocompromise <- ifelse(survival$immunocompromise == "1", "Yes", "No")
survival$pneumonia <- ifelse(survival$pneumonia == "1", "Yes", "No")
survival$status <- ifelse(survival$status == "Sembuh", "Recovered", "Death")
survival$no <- as.character(survival$no)
head(survival)
## no gender location date_PCR_result date_end_treat status
## 1 10397 Female South Jakarta 2020-05-18 2020-05-27 Recovered
## 2 5929 Female West Jakarta 2020-05-06 2020-05-19 Recovered
## 3 5930 Male West Jakarta 2020-05-06 2020-05-19 Recovered
## 4 5932 Male West Jakarta 2020-05-06 2020-05-19 Recovered
## 5 15777 Female West Jakarta 2020-05-06 2020-07-18 Recovered
## 6 7505 Male Central Jakarta 2020-06-05 2020-06-30 Recovered
## age_group ICU heart_diseases hypertension obesity malignancy kidney_failure
## 1 6-19 No No No No No No
## 2 <=5 <NA> No No No No No
## 3 6-19 No No No No No No
## 4 <=5 No No No No No No
## 5 6-19 Yes Yes Yes Yes Yes Yes
## 6 6-19 <NA> No No No No No
## immunocompromise pneumonia
## 1 No No
## 2 No No
## 3 No No
## 4 No No
## 5 Yes Yes
## 6 No No
The survival time is date_end_treat negative date_PCR_result. We also made the dummy variable of status, where 1 for event (death) and 0 for censored (recovered). We also cleaned the -1 days to 1 day, due to misinformation in dataset.
survival$date_end_treat <- as.Date(survival$date_end_treat)
survival$date_PCR_result <- as.Date(survival$date_PCR_result)
survival$time <- difftime(survival$date_end_treat, survival$date_PCR_result, units = "days")
table(survival$time)
##
## -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 2 3 5 8 5 5 23 10 10 16 28 12 16 32 41 48 50 70 92 207
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
## 247 182 93 41 36 36 31 23 35 14 26 27 20 18 18 21 7 17 2 2
## 39 40 41 42 43 44 47 48 49 52 53 56 71 72 73 75 79 80 81 88
## 4 5 2 5 1 2 2 2 5 2 2 2 2 2 17 1 1 4 1 1
## 89 90
## 1 1
survival$time[survival$time < 0] <- 1
table(survival$time)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 3 7 8 5 5 23 10 10 16 28 12 16 32 41 48 50 70 92 207 247
## 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
## 182 93 41 36 36 31 23 35 14 26 27 20 18 18 21 7 17 2 2 4
## 40 41 42 43 44 47 48 49 52 53 56 71 72 73 75 79 80 81 88 89
## 5 2 5 1 2 2 2 5 2 2 2 2 2 17 1 1 4 1 1 1
## 90
## 1
survival$status_n <- ifelse(survival$status == "Death", 1, 0)
Before we did the bivariate, we would like to make the dummy variables.
survival$ICU_n <- ifelse(survival$ICU == "Yes", 1, 0)
survival$gender_n <- ifelse(survival$gender == "Male", 0, 1)
survival$heart_diseases_n <- ifelse(survival$heart_diseases == "Yes", 1, 0)
survival$hypertension_n <- ifelse(survival$hypertension == "Yes", 1, 0)
survival$obesity_n <- ifelse(survival$obesity == "Yes", 1, 0)
survival$malignancy_n <- ifelse(survival$malignancy == "Yes", 1, 0)
survival$kidney_failure_n <- ifelse(survival$kidney_failure == "Yes", 1, 0)
survival$immunocompromise_n <- ifelse(survival$immunocompromise == "Yes", 1, 0)
survival$pneumonia_n <- ifelse(survival$pneumonia == "Yes", 1, 0)
survival$age_group_n <- ifelse(survival$age_group == "<=5",1,0)
covariates <- c("gender_n", "ICU_n", "heart_diseases_n", "hypertension_n", "obesity_n", "kidney_failure_n", "malignancy_n", "immunocompromise_n", "age_group_n", "pneumonia_n")
univ_formulas <- sapply(covariates,
function(x) as.formula(paste('Surv(time, status_n)~', x)))
univ_models <- lapply( univ_formulas, function(x){coxph(x, data = survival)})
## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 1 ; coefficient may be infinite.
## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 1 ; coefficient may be infinite.
## Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
## Loglik converged before variable 1 ; coefficient may be infinite.
univ_results <- lapply(univ_models,
function(x){
x <- summary(x)
p.value<-signif(x$wald["pvalue"], digits=2)
wald.test<-signif(x$wald["test"], digits=2)
beta<-signif(x$coef[1], digits=2);#coeficient beta
HR <-signif(x$coef[2], digits=2);#exp(beta)
HR.confint.lower <- signif(x$conf.int[,"lower .95"], 2)
HR.confint.upper <- signif(x$conf.int[,"upper .95"],2)
HR <- paste0(HR, " (",
HR.confint.lower, "-", HR.confint.upper, ")")
res<-c(beta, HR, wald.test, p.value)
names(res)<-c("beta", "HR (95% CI for HR)", "wald.test",
"p.value")
return(res)
#return(exp(cbind(coef(x),confint(x))))
})
res <- t(as.data.frame(univ_results, check.names = FALSE))
res2 <- as.data.frame(res)
res2
## beta HR (95% CI for HR) wald.test p.value
## gender_n -0.045 0.96 (0.37-2.5) 0.01 0.93
## ICU_n -13 2.3e-06 (0-Inf) 0 1
## heart_diseases_n 2.6 14 (1.8-100) 6.4 0.011
## hypertension_n 2.9 18 (2.3-140) 7.7 0.0056
## obesity_n -13 2.2e-06 (0-Inf) 0 1
## kidney_failure_n 4 56 (6.9-460) 14 0.00017
## malignancy_n -13 2.2e-06 (0-Inf) 0 1
## immunocompromise_n 3.9 52 (6.4-420) 14 0.00022
## age_group_n 1.2 3.3 (1.3-8.5) 5.8 0.016
## pneumonia_n 3.1 23 (8.5-64) 37 1e-09
We would analysis the factors that were a statistically significant. There will be a difference about 0.005 from the bivariate analysis above.
sfit_heart <- survfit(Surv(time, status_n)~heart_diseases, data=survival)
summary(sfit_heart)
## Call: survfit(formula = Surv(time, status_n) ~ heart_diseases, data = survival)
##
## heart_diseases=No
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 1636 1 0.999 0.000611 0.998 1.000
## 1 1633 3 0.998 0.001222 0.995 1.000
## 2 1626 2 0.996 0.001497 0.993 0.999
## 3 1618 3 0.994 0.001835 0.991 0.998
## 4 1613 1 0.994 0.001935 0.990 0.998
## 8 1565 1 0.993 0.002035 0.989 0.997
## 10 1522 1 0.993 0.002136 0.988 0.997
## 18 1163 1 0.992 0.002298 0.987 0.996
## 23 397 1 0.989 0.003388 0.983 0.996
## 33 131 1 0.982 0.008240 0.966 0.998
## 89 2 1 0.491 0.347098 0.123 1.000
##
## heart_diseases=Yes
## time n.risk n.event survival std.err lower 95% CI
## 8.000 8.000 1.000 0.875 0.117 0.673
## upper 95% CI
## 1.000
ggsurvplot(sfit_heart, pval=TRUE, conf.int = TRUE, surv.median.line = "hv", ggtheme = theme_bw())
sfit_kidney_failure <- survfit(Surv(time, status_n)~kidney_failure, data=survival)
summary(sfit_kidney_failure)
## Call: survfit(formula = Surv(time, status_n) ~ kidney_failure, data = survival)
##
## kidney_failure=No
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 1642 1 0.999 0.000609 0.998 1.000
## 1 1639 2 0.998 0.001055 0.996 1.000
## 2 1633 2 0.997 0.001362 0.994 1.000
## 3 1625 3 0.995 0.001725 0.992 0.998
## 4 1620 1 0.994 0.001830 0.991 0.998
## 8 1572 2 0.993 0.002035 0.989 0.997
## 10 1528 1 0.993 0.002135 0.988 0.997
## 18 1167 1 0.992 0.002296 0.987 0.996
## 23 397 1 0.989 0.003387 0.983 0.996
## 33 131 1 0.982 0.008239 0.966 0.998
## 89 2 1 0.491 0.347100 0.123 1.000
##
## kidney_failure=Yes
## time n.risk n.event survival std.err lower 95% CI
## 1.000 2.000 1.000 0.500 0.354 0.125
## upper 95% CI
## 1.000
ggsurvplot(sfit_kidney_failure, pval=TRUE, conf.int = TRUE, surv.median.line = "hv", ggtheme = theme_bw())
sfit_age_group <- survfit(Surv(time, status_n)~age_group, data=survival)
summary(sfit_age_group)
## Call: survfit(formula = Surv(time, status_n) ~ age_group, data = survival)
##
## age_group=<=5
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 395 1 0.997 0.00253 0.993 1.000
## 2 394 2 0.992 0.00437 0.984 1.000
## 3 390 2 0.987 0.00564 0.976 0.998
## 8 376 2 0.982 0.00672 0.969 0.995
## 18 300 1 0.979 0.00745 0.964 0.994
## 23 106 1 0.970 0.01179 0.947 0.993
##
## age_group=6-19
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 1248 1 0.999 0.000801 0.998 1.000
## 1 1246 2 0.998 0.001387 0.995 1.000
## 3 1236 1 0.997 0.001604 0.994 1.000
## 4 1233 1 0.996 0.001795 0.992 1.000
## 10 1162 1 0.995 0.001987 0.991 0.999
## 33 95 1 0.985 0.010604 0.964 1.000
## 89 1 1 0.000 NaN NA NA
ggsurvplot(sfit_age_group, pval=TRUE, conf.int = TRUE, surv.median.line = "hv")
sfit_immunocompromise <- survfit(Surv(time, status_n)~immunocompromise, data=survival)
summary(sfit_immunocompromise)
## Call: survfit(formula = Surv(time, status_n) ~ immunocompromise, data = survival)
##
## immunocompromise=No
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 1642 1 0.999 0.000609 0.998 1.000
## 1 1639 3 0.998 0.001218 0.995 1.000
## 2 1632 1 0.997 0.001362 0.994 1.000
## 3 1625 3 0.995 0.001725 0.992 0.998
## 4 1620 1 0.994 0.001830 0.991 0.998
## 8 1572 2 0.993 0.002034 0.989 0.997
## 10 1528 1 0.993 0.002134 0.988 0.997
## 18 1167 1 0.992 0.002296 0.987 0.996
## 23 397 1 0.989 0.003387 0.983 0.996
## 33 131 1 0.982 0.008239 0.966 0.998
## 89 2 1 0.491 0.347101 0.123 1.000
##
## immunocompromise=Yes
## time n.risk n.event survival std.err lower 95% CI
## 2.000 2.000 1.000 0.500 0.354 0.125
## upper 95% CI
## 1.000
ggsurvplot(sfit_immunocompromise, pval=TRUE, conf.int = TRUE, surv.median.line = "hv")
sfit_hypertension <- survfit(Surv(time, status_n)~hypertension, data=survival)
sfit_pneumonia <- survfit(Surv(time, status_n)~pneumonia, data=survival)
summary(sfit_pneumonia)
## Call: survfit(formula = Surv(time, status_n) ~ pneumonia, data = survival)
##
## pneumonia=No
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 0 1603 1 0.999 0.000624 0.998 1.000
## 1 1600 2 0.998 0.001080 0.996 1.000
## 2 1594 1 0.998 0.001248 0.995 1.000
## 3 1588 2 0.996 0.001530 0.993 0.999
## 10 1496 1 0.996 0.001668 0.992 0.999
## 18 1142 1 0.995 0.001881 0.991 0.998
## 23 385 1 0.992 0.003190 0.986 0.998
## 33 125 1 0.984 0.008515 0.968 1.000
##
## pneumonia=Yes
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 1 41 1 0.976 0.0241 0.930 1.000
## 2 40 1 0.951 0.0336 0.888 1.000
## 3 38 1 0.926 0.0410 0.849 1.000
## 4 37 1 0.901 0.0469 0.814 0.998
## 8 36 2 0.851 0.0561 0.748 0.968
## 89 1 1 0.000 NaN NA NA
ggsurvplot(sfit_pneumonia, pval=TRUE, conf.int = TRUE, surv.median.line = "hv")
We made a multivariate model from pneumonia, immunocompromise, and age groups.
res.cox <- coxph(Surv(time, status_n) ~pneumonia_n + immunocompromise_n + age_group_n, data = survival)
summary(res.cox, conf.int = FALSE)
## Call:
## coxph(formula = Surv(time, status_n) ~ pneumonia_n + immunocompromise_n +
## age_group_n, data = survival)
##
## n= 1644, number of events= 17
##
## coef exp(coef) se(coef) z Pr(>|z|)
## pneumonia_n 2.8319 16.9780 0.5331 5.312 1.09e-07 ***
## immunocompromise_n 2.1690 8.7491 1.1577 1.874 0.0610 .
## age_group_n 1.2400 3.4556 0.5160 2.403 0.0163 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Concordance= 0.783 (se = 0.07 )
## Likelihood ratio test= 31.73 on 3 df, p=6e-07
## Wald test = 46.56 on 3 df, p=4e-10
## Score (logrank) test = 99.14 on 3 df, p=<2e-16
ggforest(res.cox, data=survival)
We fitted the model with Schoenfeld Global test.
test.ph <- cox.zph(res.cox)
test.ph
## chisq df p
## pneumonia_n 0.0226 1 0.88
## immunocompromise_n 1.2278 1 0.27
## age_group_n 0.0307 1 0.86
## GLOBAL 1.2469 3 0.74
ggcoxzph(test.ph)
ggcoxdiagnostics(res.cox, type = "deviance",
linear.predictions = FALSE, ggtheme = theme_minimal())
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## `geom_smooth()` using formula 'y ~ x'
This is the additional graph for epidemic curve per cities and municipality, and also the epidemic curve from outcome categories.
agregate <- survival %>% group_by(date_PCR_result) %>% count()
survival <- survival %>% mutate(Recovered = (status == 'Recovered'), Death = (status == 'Death'))
agregate_outcome <- survival %>% group_by(date_end_treat) %>% summarise(total = n(), Recovered = sum(Recovered), Death = sum(Death))
agregate_outcome <- dplyr::rename(agregate_outcome, Date = date_end_treat)
agregate <- dplyr::rename(agregate, Date = date_PCR_result)
agregate <- rename(agregate, Confirmed = n)
agregate <- left_join(agregate, agregate_outcome, by = 'Date')
agregate[is.na(agregate)] <- 0
agregate_2 <- agregate %>% pivot_longer(cols = c('Confirmed', 'Recovered', 'Death'), names_to = 'Category',
values_to = 'n')
school_close <- as.Date('2020-03-16')
psbb2 <- as.Date('2020-04-01')
end <- as.Date(last(agregate_2$Date))
p <- ggplot(agregate_2, aes(x = Date, y = n, fill = Category)) + geom_bar(stat = "identity", color = 'black') +
scale_fill_manual(values = c("lightcyan", "red", "lightblue")) + theme_minimal()
p<- p + geom_vline(xintercept= school_close, linetype="dashed", color = "blue", size = 1.5) +
annotate(geom="text", x= psbb2, y=150, label = "School Closure: 16 Mar 2020", color = "blue")
p2 <- ggplot(survival, aes(x = date_PCR_result)) +geom_bar(color = "black", fill = "firebrick") + facet_wrap(~location, ncol = 2) +
theme_minimal() + theme(strip.text = element_text( size = 9, face = "bold" ))
p
p2