Controlled variables

Published

January 13, 2026

Introduction

Code
source("Survival/utils.R")

# Setup analysis environment
analysis_setup <- setup_analysis()
paths <- analysis_setup$paths
demo <- analysis_setup$demo

flare.df <- readRDS(paste0(paths$outdir, "flares-overview.RDS"))
flare.cd.df <- subset(flare.df, diagnosis2 == "CD")

flare.cd.df$Treatment <- factor(flare.cd.df$Treatment,
                                levels = c("Mono immunotherapy",
                                           "Mono biologic",
                                           "Combo therapy",
                                           "5-ASA",
                                           "None reported"))

flare.uc.df <- subset(flare.df, diagnosis2 == "UC/IBDU")
flare.uc.df$Treatment <- factor(flare.uc.df$Treatment,
                                levels = c("Mono immunotherapy",
                                           "Mono biologic",
                                           "Combo therapy",
                                           "5-ASA",
                                           "None reported"))

The variables being controlled for in later cox models analyses are first investigated.

Although smoking status was originally planned to be controlled for, the high degree of missingness for these data and the lack of significant associations with time to flare has resulted in smoking status not being controlled for in later analyses.

Of the variables considered here, only sex is associated with disease flare.

Sex

Crohn’s disease

Code
p <- generate_survival_plot(
  data = flare.cd.df,
  formula = Surv(softflare_time, softflare) ~ Sex,
  legend_title = "Sex",
  legend_labs = c("Male", "Female"),
  palette = c("#00A6ED", "#F6511D"),
  xlab = "Time from study recruitment (days)",
  title = "Time to patient-reported flare",
  break_time_by = 200,
  plot_path = "plots/cd/soft-flare/controlled/sex"
)
saveRDS(p, paste0(paths$outdir, "sex-cd-soft.RDS"))
knitr::include_graphics("plots/cd/soft-flare/controlled/sex.png")
Figure 1: Time to patient-reported flare in Crohn’s disease by sex.
Code
p <- generate_survival_plot(
  data = flare.cd.df,
  formula = Surv(hardflare_time, hardflare) ~ Sex,
  legend_title = "Sex",
  legend_labs = c("Male", "Female"),
  palette = c("#00A6ED", "#F6511D"),
  xlab = "Time from study recruitment (days)",
  title = "Time to objective flare",
  break_time_by = 500,
  plot_path = "plots/cd/hard-flare/controlled/sex"
)
saveRDS(p, paste0(paths$outdir, "sex-cd-hard.RDS"))
knitr::include_graphics("plots/cd/hard-flare/controlled/sex.png")
Figure 2: Time to objective flare in Crohn’s disease by sex.

Ulcerative colitis

Code
p <- generate_survival_plot(
  data = flare.uc.df,
  formula = Surv(softflare_time, softflare) ~ Sex,
  legend_title = "Sex",
  legend_labs = c("Male", "Female"),
  palette = c("#00A6ED", "#F6511D"),
  xlab = "Time from study recruitment (days)",
  title = "Time to patient-reported flare",
  break_time_by = 200,
  plot_path = "plots/uc/soft-flare/controlled/sex"
)
saveRDS(p, paste0(paths$outdir, "sex-uc-soft.RDS"))
knitr::include_graphics("plots/uc/soft-flare/controlled/sex.png")
Figure 3: Time to patient-reported flare in ulcerative colitis by sex.
Code
p <- generate_survival_plot(
  data = flare.uc.df,
  formula = Surv(hardflare_time, hardflare) ~ Sex,
  legend_title = "Sex",
  legend_labs = c("Male", "Female"),
  palette = c("#00A6ED", "#F6511D"),
  xlab = "Time from study recruitment (days)",
  title = "Time to objective flare",
  break_time_by = 500,
  plot_path = "plots/uc/hard-flare/controlled/sex"
)
saveRDS(p, paste0(paths$outdir, "sex-uc-hard.RDS"))
knitr::include_graphics("plots/uc/hard-flare/controlled/sex.png")
Figure 4: Time to objective flare in ulcerative colitis by sex.

Smoking status

Crohn’s disease

Code
p <- generate_survival_plot(
  data = flare.cd.df,
  formula = Surv(softflare_time, softflare) ~ Smoke,
  legend_title = "Smoking status",
  legend_labs = c("Current", "Previous", "Never"),
  palette = c("#00A6ED", "#FFB400", "#F6511D"),
  xlab = "Time from study recruitment (days)",
  title = "Time to patient-reported flare",
  break_time_by = 200,
  plot_path = "plots/cd/soft-flare/controlled/smoke"
)
saveRDS(p, paste0(paths$outdir, "smoke-cd-soft.RDS"))
knitr::include_graphics("plots/cd/soft-flare/controlled/smoke.png")

Code
p <- generate_survival_plot(
  data = flare.cd.df,
  formula = Surv(hardflare_time, hardflare) ~ Smoke,
  legend_title = "Smoking status",
  legend_labs = c("Current", "Previous", "Never"),
  palette = c("#00A6ED", "#FFB400", "#F6511D"),
  xlab = "Time from study recruitment (days)",
  title = "Time to objective flare",
  break_time_by = 500,
  plot_path = "plots/cd/hard-flare/controlled/smoke"
)
saveRDS(p, paste0(paths$outdir, "smoke-cd-hard.RDS"))
knitr::include_graphics("plots/cd/hard-flare/controlled/smoke.png")

Ulcerative colitis

Code
p <- generate_survival_plot(
  data = flare.uc.df,
  formula = Surv(softflare_time, softflare) ~ Smoke,
  legend_title = "Smoking status",
  legend_labs = c("Current", "Previous", "Never"),
  palette = c("#00A6ED", "#FFB400", "#F6511D"),
  xlab = "Time from study recruitment (days)",
  title = "Time to patient-reported flare",
  break_time_by = 200,
  plot_path = "plots/uc/soft-flare/controlled/smoke"
)
saveRDS(p, paste0(paths$outdir, "smoke-uc-soft.RDS"))
knitr::include_graphics("plots/uc/soft-flare/controlled/smoke.png")

Code
p <- generate_survival_plot(
  data = flare.uc.df,
  formula = Surv(hardflare_time, hardflare) ~ Smoke,
  legend_title = "Smoking status",
  legend_labs = c("Current", "Previous", "Never"),
  palette = c("#00A6ED", "#FFB400", "#F6511D"),
  xlab = "Time from study recruitment (days)",
  title = "Time to objective flare",
  break_time_by = 500,
  plot_path = "plots/uc/hard-flare/controlled/smoke"
)

saveRDS(p, paste0(paths$outdir, "smoke-uc-hard.RDS"))
knitr::include_graphics("plots/uc/hard-flare/controlled/smoke.png")

Social deprivation

Crohn’s disease

Code
p <- generate_survival_plot(
  data = flare.cd.df,
  formula = Surv(softflare_time, softflare) ~ IMD,
  legend_title = "IMD",
  legend_labs = c("1 (least deprived)", "2", "3", "4", "5 (most deprived)"),
  palette = c("#F05D5E", "#00C2D1", "#FFBA49", "#EDC9FF", "#034C3C"),
  xlab = "Time from study recruitment (days)",
  title = "Time to patient-reported flare",
  break_time_by = 200,
  plot_path = "plots/cd/soft-flare/controlled/imd"
)
saveRDS(p, paste0(paths$outdir, "imd-cd-soft.RDS"))
knitr::include_graphics("plots/cd/soft-flare/controlled/imd.png")

Code
p <- generate_survival_plot(
  data = flare.cd.df,
  formula = Surv(hardflare_time, hardflare) ~ IMD,
  legend_title = "IMD",
  legend_labs = c("1 (least deprived)", "2", "3", "4", "5 (most deprived)"),
  palette = c("#F05D5E", "#00C2D1", "#FFBA49", "#EDC9FF", "#034C3C"),
  xlab = "Time from study recruitment (days)",
  title = "Time to objective flare",
  break_time_by = 500,
  plot_path = "plots/cd/hard-flare/controlled/imd"
)
saveRDS(p, paste0(paths$outdir, "imd-cd-hard.RDS"))
knitr::include_graphics("plots/cd/hard-flare/controlled/imd.png")

Ulcerative colitis

Code
p <- generate_survival_plot(
  data = flare.uc.df,
  formula = Surv(softflare_time, softflare) ~ IMD,
  legend_title = "IMD",
  legend_labs = c("1 (least deprived)", "2", "3", "4", "5 (most deprived)"),
  palette = c("#F05D5E", "#00C2D1", "#FFBA49", "#EDC9FF", "#034C3C"),
  xlab = "Time from study recruitment (days)",
  title = "Time to patient-reported flare",
  break_time_by = 200,
  plot_path = "plots/uc/soft-flare/controlled/imd"
)
saveRDS(p, paste0(paths$outdir, "imd-uc-soft.RDS"))
knitr::include_graphics("plots/uc/soft-flare/controlled/imd.png")

Code
p <- generate_survival_plot(
  data = flare.uc.df,
  formula = Surv(hardflare_time, hardflare) ~ IMD,
  legend_title = "IMD",
  legend_labs = c("1 (least deprived)", "2", "3", "4", "5 (most deprived)"),
  palette = c("#F05D5E", "#00C2D1", "#FFBA49", "#EDC9FF", "#034C3C"),
  xlab = "Time from study recruitment (days)",
  title = "Time to objective flare",
  break_time_by = 500,
  plot_path = "plots/uc/hard-flare/controlled/imd"
)
saveRDS(p, paste0(paths$outdir, "imd-uc-hard.RDS"))
knitr::include_graphics("plots/uc/hard-flare/controlled/imd.png")

Faecal calprotectin

Crohn’s disease

Code
p <- generate_survival_plot(
  data = flare.cd.df,
  formula = Surv(softflare_time, softflare) ~ cat,
  legend_title = "Faecal calprotectin",
  legend_labs = c("FC < 50", "50 ≤ FC ≤ 250", "FC > 250"),
  palette = c("#2AAACE", "#FFBF1C", "#FF6726"),
  xlab = "Time from study recruitment (days)",
  title = "Time to patient-reported flare",
  break_time_by = 200,
  plot_path = "plots/cd/soft-flare/controlled/fc"
)

saveRDS(p, paste0(paths$outdir, "fc-cd-soft.RDS"))
knitr::include_graphics("plots/cd/soft-flare/controlled/fc.png")

Code
p <- generate_survival_plot(
  data = flare.cd.df,
  formula = Surv(hardflare_time, hardflare) ~ cat,
  legend_title = "Faecal calprotectin",
  legend_labs = c("FC < 50", "50 ≤ FC ≤ 250", "FC > 250"),
  palette = c("#2AAACE", "#FFBF1C", "#FF6726"),
  xlab = "Time from study recruitment (days)",
  title = "Time to objective flare",
  break_time_by = 500,
  plot_path = "plots/cd/hard-flare/controlled/fc"
)

saveRDS(p, paste0(paths$outdir, "fc-cd-hard.RDS"))
knitr::include_graphics("plots/cd/hard-flare/controlled/fc.png")

Ulcerative colitis

Code
p <- generate_survival_plot(
  data = flare.uc.df,
  formula = Surv(softflare_time, softflare) ~ cat,
  legend_title = "Faecal calprotectin",
  legend_labs = c("FC < 50", "50 ≤ FC ≤ 250", "FC > 250"),
  palette = c("#2AAACE", "#FFBF1C", "#FF6726"),
  xlab = "Time from study recruitment (days)",
  title = "Time to patient-reported flare",
  break_time_by = 200,
  plot_path = "plots/uc/soft-flare/controlled/fc"
)

saveRDS(p, paste0(paths$outdir, "fc-uc-soft.RDS"))
knitr::include_graphics("plots/uc/soft-flare/controlled/fc.png")

Code
p <- generate_survival_plot(
  data = flare.uc.df,
  formula = Surv(hardflare_time, hardflare) ~ cat,
  legend_title = "Faecal calprotectin",
  legend_labs = c("FC < 50", "50 ≤ FC ≤ 250", "FC > 250"),
  palette = c("#2AAACE", "#FFBF1C", "#FF6726"),
  xlab = "Time from study recruitment (days)",
  title = "Time to objective flare",
  break_time_by = 500,
  plot_path = "plots/uc/hard-flare/controlled/fc"
)

saveRDS(p, paste0(paths$outdir, "fc-uc-hard.RDS"))
knitr::include_graphics("plots/uc/hard-flare/controlled/fc.png")

Cox models

Crohn’s disease

Code
fit.me <- coxph(
  Surv(softflare_time, softflare) ~
    Sex + IMD + cat + Smoke + frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.cd.df,
  model = TRUE
)

cd.clin.forest <- get_HR(
  fit.me,
  c("SmokePrevious", "SmokeNever")
)

fit.me <- coxph(
  Surv(softflare_time, softflare) ~
    Sex + IMD + cat + frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.cd.df,
  model = TRUE
)

cd.clin.forest <- rbind(
  cd.clin.forest,
  get_HR(
    fit.me,
    c(
      "SexFemale",
      paste0("IMD", seq(2, 5)),
      "catFC 50-250",
      "catFC > 250"
    )
  )
)

invisible(cox_summary(fit.me))

Cox model summary:

Variable HR Lower 95% Upper 95% P-value
SexFemale 1.9989 1.5753 2.5364 0.0000
IMD2 0.9364 0.5970 1.4686 0.7746
IMD3 0.8868 0.5609 1.4021 0.6074
IMD4 0.9417 0.6062 1.4631 0.7894
IMD5 0.9857 0.6443 1.5078 0.9469
catFC 50-250 1.5844 1.2278 2.0445 0.0004
catFC > 250 2.4138 1.8192 3.2028 0.0000
Proportional hazards assumption test
Chi-squared statistic DF P-value
Sex 0.3091 0.9923 0.5750
IMD 5.8397 3.9497 0.2063
cat 2.3232 1.9815 0.3093
GLOBAL 8.4076 13.9400 0.8643
Warning: `gather_()` was deprecated in tidyr 1.2.0.
ℹ Please use `gather()` instead.
ℹ The deprecated feature was likely used in the survminer package.
  Please report the issue at <https://github.com/kassambara/survminer/issues>.
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

Code
newdata <- flare.cd.df %>%
  mutate(cat = "FC < 50",
         softflare_time = 365.25,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.1798635
Code
newdata <- flare.cd.df %>%
  mutate(cat = "FC < 50",
         softflare_time = 730,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.251028
Code
# FC 50-250
newdata <- flare.cd.df %>%
  mutate(cat = "FC 50-250",
         softflare_time = 365.25,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.2849682
Code
newdata <- flare.cd.df %>%
  mutate(cat = "FC 50-250",
         softflare_time = 730,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.3977182
Code
# FC > 250
newdata <- flare.cd.df %>%
  mutate(cat = "FC > 250",
         softflare_time = 365.25,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.4341572
Code
newdata <- flare.cd.df %>%
  mutate(cat = "FC > 250",
         softflare_time = 730,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.6059351
Code
fit.me <- coxph(
  Surv(hardflare_time, hardflare) ~
    Sex + IMD + cat + Smoke + frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.cd.df
)

cd.hard.forest <- get_HR(
  fit.me,
  c("SmokePrevious", "SmokeNever")
)

fit.me <- coxph(
  Surv(hardflare_time, hardflare) ~
    Sex + IMD + cat + frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.cd.df
)

cd.hard.forest <- rbind(
  cd.hard.forest,
  get_HR(
    fit.me,
    c(
      "SexFemale",
      paste0("IMD", seq(2, 5)),
      "catFC > 250"
    )
  )
)

invisible(cox_summary(fit.me))

Cox model summary:

Variable HR Lower 95% Upper 95% P-value
SexFemale 1.3887 1.0579 1.8227 0.0180
IMD2 0.9220 0.5365 1.5844 0.7688
IMD3 0.9675 0.5566 1.6817 0.9068
IMD4 0.8950 0.5222 1.5338 0.6864
IMD5 0.9035 0.5370 1.5199 0.7021
catFC 50-250 2.0217 1.4730 2.7750 0.0000
catFC > 250 3.3366 2.3693 4.6989 0.0000
Proportional hazards assumption test
Chi-squared statistic DF P-value
Sex 0.2571 0.9863 0.6064
IMD 4.2174 3.9407 0.3689
cat 8.8712 1.9847 0.0116
GLOBAL 13.9394 19.6668 0.8190
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

Code
# FC < 50
newdata <- flare.cd.df %>%
  mutate(cat = "FC < 50",
         hardflare_time = 365.25,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.04729812
Code
newdata <- flare.cd.df %>%
  mutate(cat = "FC < 50",
         hardflare_time = 730,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.07892738
Code
# FC 50-250
newdata <- flare.cd.df %>%
  mutate(cat = "FC 50-250",
         hardflare_time = 365.25,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.09562473
Code
newdata <- flare.cd.df %>%
  mutate(cat = "FC 50-250",
         hardflare_time = 730,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.159571
Code
# FC > 250
newdata <- flare.cd.df %>%
  mutate(cat = "FC > 250",
         hardflare_time = 365.25,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.1578161
Code
newdata <- flare.cd.df %>%
  mutate(cat = "FC > 250",
         hardflare_time = 730,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.2633511

Ulcerative colitis

Code
fit.me <- coxph(
  Surv(softflare_time, softflare) ~
    Sex + IMD + cat + Smoke + frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.uc.df
)

uc.clin.forest <- get_HR(
  fit.me,
  c("SmokePrevious", "SmokeNever")
)


fit.me <- coxph(
  Surv(softflare_time, softflare) ~
    Sex + IMD + cat + frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.uc.df
)

uc.clin.forest <- rbind(
  uc.clin.forest,
  get_HR(
    fit.me,
    c(
      "SexFemale",
      paste0("IMD", seq(2, 5)),
      "catFC > 250"
    )
  )
)

invisible(cox_summary(fit.me))

Cox model summary:

Variable HR Lower 95% Upper 95% P-value
SexFemale 1.5438 1.2475 1.9104 0.0001
IMD2 1.2433 0.7856 1.9678 0.3525
IMD3 1.1010 0.7025 1.7255 0.6748
IMD4 1.4420 0.9388 2.2151 0.0946
IMD5 1.1988 0.7858 1.8290 0.4002
catFC 50-250 1.5688 1.2269 2.0058 0.0003
catFC > 250 2.1447 1.6433 2.7991 0.0000
Proportional hazards assumption test
Chi-squared statistic DF P-value
Sex 1.3013 0.9907 0.2514
IMD 4.0189 3.9418 0.3949
cat 5.7453 1.9706 0.0550
GLOBAL 11.3236 18.7016 0.9037
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

Code
# FC < 50
newdata <- flare.uc.df %>%
  mutate(cat = "FC < 50",
         softflare_time = 365.25,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.2200301
Code
newdata <- flare.uc.df %>%
  mutate(cat = "FC < 50",
         softflare_time = 730,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.3427523
Code
# FC 50-250
newdata <- flare.uc.df %>%
  mutate(cat = "FC 50-250",
         softflare_time = 365.25,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.3451737
Code
newdata <- flare.uc.df %>%
  mutate(cat = "FC 50-250",
         softflare_time = 730,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.537695
Code
# FC > 250
newdata <- flare.uc.df %>%
  mutate(cat = "FC > 250",
         softflare_time = 365.25,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.4718901
Code
newdata <- flare.uc.df %>%
  mutate(cat = "FC > 250",
         softflare_time = 730,
         softflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.7350878
Code
fit.me <- coxph(
  Surv(hardflare_time, hardflare) ~
    Sex + IMD + cat + Smoke + frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.uc.df
)

uc.hard.forest <- get_HR(
  fit.me,
  c("SmokePrevious", "SmokeNever")
)


fit.me <- coxph(
  Surv(hardflare_time, hardflare) ~
    Sex + IMD + cat + frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.uc.df
)

uc.hard.forest <- rbind(
  uc.hard.forest,
  get_HR(
    fit.me,
    c(
      "SexFemale",
      paste0("IMD", seq(2, 5)),
      "catFC 50-250",
      "catFC > 250"
    )
  )
)

invisible(cox_summary(fit.me))

Cox model summary:

Variable HR Lower 95% Upper 95% P-value
SexFemale 1.3259 1.0208 1.7221 0.0345
IMD2 1.4092 0.7861 2.5260 0.2494
IMD3 1.3774 0.7835 2.4213 0.2659
IMD4 1.7484 1.0130 3.0174 0.0448
IMD5 1.2989 0.7566 2.2298 0.3430
catFC 50-250 2.0322 1.4885 2.7744 0.0000
catFC > 250 3.2203 2.3245 4.4614 0.0000
Proportional hazards assumption test
Chi-squared statistic DF P-value
Sex 0.1461 0.9849 0.6962
IMD 2.6145 3.9368 0.6145
cat 4.3647 1.9671 0.1096
GLOBAL 7.4355 23.5846 0.9994
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

Code
# FC < 50
newdata <- flare.uc.df %>%
  mutate(cat = "FC < 50",
         hardflare_time = 365.25,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.05862843
Code
newdata <- flare.uc.df %>%
  mutate(cat = "FC < 50",
         hardflare_time = 730,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.1063757
Code
# FC 50-250
newdata <- flare.uc.df %>%
  mutate(cat = "FC 50-250",
         hardflare_time = 365.25,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.1191424
Code
newdata <- flare.uc.df %>%
  mutate(cat = "FC 50-250",
         hardflare_time = 730,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.2161726
Code
# FC > 250
newdata <- flare.uc.df %>%
  mutate(cat = "FC > 250",
         hardflare_time = 365.25,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.1888021
Code
newdata <- flare.uc.df %>%
  mutate(cat = "FC > 250",
         hardflare_time = 730,
         hardflare = 1)
predict(fit.me, newdata = newdata, type = "expected", se.fit = TRUE)$fit %>%
  mean(na.rm = TRUE)
[1] 0.3425636

Across IBD

Code
fit.me <- coxph(
  Surv(softflare_time, softflare) ~
    Sex + IMD + cat + frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.df
)

invisible(cox_summary(fit.me))

Cox model summary:

Variable HR Lower 95% Upper 95% P-value
SexFemale 1.7183 1.4676 2.0119 0.0000
IMD2 1.0417 0.7555 1.4364 0.8032
IMD3 0.9689 0.7027 1.3360 0.8474
IMD4 1.1549 0.8490 1.5712 0.3590
IMD5 1.0554 0.7814 1.4255 0.7252
catFC 50-250 1.5194 1.2743 1.8117 0.0000
catFC > 250 2.2203 1.8303 2.6934 0.0000
Proportional hazards assumption test
Chi-squared statistic DF P-value
Sex 1.9368 0.9936 0.1627
IMD 8.9273 3.9484 0.0609
cat 1.1234 1.9824 0.5659
GLOBAL 12.1742 22.3691 0.9594
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

Code
fit.me <- coxph(
  Surv(hardflare_time, hardflare) ~
    Sex + IMD + cat + frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.df
)

invisible(cox_summary(fit.me))

Cox model summary:

Variable HR Lower 95% Upper 95% P-value
SexFemale 1.3310 1.1037 1.6051 0.0028
IMD2 1.1029 0.7416 1.6402 0.6286
IMD3 1.1157 0.7525 1.6542 0.5858
IMD4 1.2392 0.8452 1.8169 0.2720
IMD5 1.0508 0.7220 1.5293 0.7959
catFC 50-250 1.9760 1.5846 2.4641 0.0000
catFC > 250 3.2506 2.5702 4.1112 0.0000
Proportional hazards assumption test
Chi-squared statistic DF P-value
Sex 0.2285 0.9928 0.6297
IMD 0.8488 3.9546 0.9289
cat 8.4343 1.9867 0.0145
GLOBAL 9.9289 29.8504 0.9998
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

Code
saveRDS(flare.df, paste0(paths$outdir, "flares-controlled.RDS"))
saveRDS(flare.cd.df, paste0(paths$outdir, "flares-controlled-cd.RDS"))
saveRDS(flare.uc.df, paste0(paths$outdir, "flares-controlled-uc.RDS"))

saveRDS(cd.clin.forest, paste0(paths$outdir, "cd-clin-controlled.RDS"))
saveRDS(cd.hard.forest, paste0(paths$outdir, "cd-hard-controlled.RDS"))
saveRDS(uc.clin.forest, paste0(paths$outdir, "uc-clin-controlled.RDS"))
saveRDS(uc.hard.forest, paste0(paths$outdir, "uc-hard-controlled.RDS"))
Control for additional covariates

Crohn’s disease

Patient-reported flare

Code
fit.me <- coxph(
  Surv(softflare_time, softflare) ~
    Sex +
    IMD +
    cat +
    `IBD Duration` +
    BMI +
    Treatment +
    Age +
    Smoke +
    frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.cd.df
)

invisible(cox_summary(fit.me))

Cox model summary:

Variable HR Lower 95% Upper 95% P-value
SexFemale 2.3665 1.7880 3.1321 0.0000
IMD2 0.8606 0.5236 1.4145 0.5538
IMD3 0.9000 0.5432 1.4909 0.6824
IMD4 0.9740 0.6039 1.5710 0.9141
IMD5 0.9757 0.6125 1.5544 0.9175
catFC 50-250 1.4066 1.0607 1.8653 0.0178
catFC > 250 2.2945 1.6660 3.1602 0.0000
IBD Duration 0.9880 0.9764 0.9998 0.0460
BMI 1.0025 0.9799 1.0255 0.8320
TreatmentMono biologic 1.0530 0.7278 1.5236 0.7840
TreatmentCombo therapy 0.7668 0.4787 1.2281 0.2692
Treatment5-ASA 1.4407 0.7997 2.5952 0.2241
TreatmentNone reported 0.9300 0.6568 1.3166 0.6823
Age 1.0083 0.9989 1.0179 0.0851
SmokePrevious 1.5225 0.9006 2.5740 0.1166
SmokeNever 1.2868 0.7664 2.1606 0.3402
Proportional hazards assumption test
Chi-squared statistic DF P-value
Sex 0.0372 1.0000 0.8470
IMD 5.7872 4.0000 0.2156
cat 1.2874 2.0000 0.5254
IBD Duration 3.3065 1.0000 0.0690
BMI 2.2687 1.0000 0.1320
Treatment 6.5956 4.0000 0.1589
Age 0.6126 1.0000 0.4338
Smoke 0.4805 2.0000 0.7864
GLOBAL 20.7752 16.0001 0.1873
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

Objective flare

Code
fit.me <- coxph(
  Surv(hardflare_time, hardflare) ~
    Sex +
    IMD +
    cat +
    `IBD Duration` +
    BMI +
    Treatment +
    Age +
    Smoke +
    frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.cd.df
)

invisible(cox_summary(fit.me))

Cox model summary:

Variable HR Lower 95% Upper 95% P-value
SexFemale 1.6591 1.1856 2.3216 0.0031
IMD2 0.8097 0.4303 1.5234 0.5126
IMD3 0.7955 0.4172 1.5170 0.4873
IMD4 0.9072 0.4914 1.6750 0.7557
IMD5 0.9736 0.5391 1.7582 0.9293
catFC 50-250 1.9761 1.3551 2.8815 0.0004
catFC > 250 3.6593 2.4321 5.5057 0.0000
IBD Duration 0.9833 0.9676 0.9993 0.0408
BMI 1.0190 0.9907 1.0481 0.1903
TreatmentMono biologic 0.9804 0.6278 1.5309 0.9306
TreatmentCombo therapy 0.7114 0.4007 1.2627 0.2447
Treatment5-ASA 1.3718 0.5994 3.1394 0.4542
TreatmentNone reported 0.7259 0.4741 1.1114 0.1405
Age 0.9922 0.9805 1.0041 0.1978
SmokePrevious 1.3309 0.6861 2.5820 0.3978
SmokeNever 1.2458 0.6572 2.3616 0.5007
Proportional hazards assumption test
Chi-squared statistic DF P-value
Sex 1.3707 0.9959 0.2406
IMD 4.6773 3.9847 0.3200
cat 8.8312 1.9978 0.0121
IBD Duration 0.0682 0.9984 0.7934
BMI 4.4479 0.9976 0.0348
Treatment 3.7406 3.9729 0.4381
Age 2.9221 0.9978 0.0871
Smoke 0.4614 1.9966 0.7933
GLOBAL 26.7768 16.7962 0.0575
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

Ulcerative colitis

Patient-reported flare

Code
fit.me <- coxph(
  Surv(softflare_time, softflare) ~
    Sex +
    IMD +
    cat +
    `IBD Duration` +
    BMI +
    Treatment +
    Age +
    Smoke +
    frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.uc.df
)

invisible(cox_summary(fit.me))

Cox model summary:

Variable HR Lower 95% Upper 95% P-value
SexFemale 1.5639 1.2287 1.9906 0.0003
IMD2 1.2949 0.7603 2.2053 0.3415
IMD3 1.0556 0.6368 1.7498 0.8338
IMD4 1.4290 0.8815 2.3164 0.1475
IMD5 1.1729 0.7255 1.8963 0.5152
catFC 50-250 1.6777 1.2763 2.2053 0.0002
catFC > 250 2.0136 1.4864 2.7278 0.0000
IBD Duration 0.9960 0.9827 1.0095 0.5586
BMI 0.9860 0.9631 1.0095 0.2408
TreatmentMono biologic 0.7771 0.4934 1.2238 0.2764
TreatmentCombo therapy 0.3769 0.1836 0.7739 0.0079
Treatment5-ASA 1.2407 0.8814 1.7465 0.2163
TreatmentNone reported 1.0219 0.7218 1.4468 0.9028
Age 0.9872 0.9778 0.9967 0.0083
SmokePrevious 1.2613 0.6917 2.3000 0.4488
SmokeNever 1.0499 0.5771 1.9099 0.8733
Proportional hazards assumption test
Chi-squared statistic DF P-value
Sex 3.1159 0.9986 0.0774
IMD 3.9747 3.9903 0.4080
cat 3.2704 1.9961 0.1944
IBD Duration 1.5844 0.9987 0.2078
BMI 0.2759 0.9981 0.5986
Treatment 1.7348 3.9815 0.7820
Age 0.0232 0.9955 0.8776
Smoke 1.1879 1.9972 0.5515
GLOBAL 16.3694 16.8521 0.4875
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

Objective flare

Code
fit.me <- coxph(
  Surv(hardflare_time, hardflare) ~
    Sex +
    IMD +
    cat +
    `IBD Duration` +
    BMI +
    Treatment +
    Age +
    Smoke +
    frailty(SiteNo),
  control = coxph.control(outer.max = 20),
  data = flare.uc.df
)

invisible(cox_summary(fit.me))

Cox model summary:

Variable HR Lower 95% Upper 95% P-value
SexFemale 1.4592 1.0646 2.0001 0.0188
IMD2 1.2474 0.5924 2.6266 0.5607
IMD3 1.3381 0.6647 2.6939 0.4146
IMD4 1.9784 1.0177 3.8458 0.0442
IMD5 1.4512 0.7400 2.8460 0.2785
catFC 50-250 2.1379 1.4800 3.0884 0.0001
catFC > 250 2.9624 2.0081 4.3702 0.0000
IBD Duration 0.9943 0.9762 1.0127 0.5398
BMI 0.9883 0.9574 1.0201 0.4652
TreatmentMono biologic 1.1294 0.6652 1.9176 0.6522
TreatmentCombo therapy 0.7832 0.3752 1.6348 0.5151
Treatment5-ASA 1.0290 0.6613 1.6011 0.8991
TreatmentNone reported 0.7403 0.4680 1.1710 0.1987
Age 0.9872 0.9746 0.9998 0.0471
SmokePrevious 1.7680 0.7079 4.4156 0.2224
SmokeNever 1.6201 0.6522 4.0246 0.2987
Proportional hazards assumption test
Chi-squared statistic DF P-value
Sex 0.0956 0.9877 0.7526
IMD 1.9114 3.9373 0.7435
cat 3.3536 1.9695 0.1827
IBD Duration 0.6163 0.9842 0.4264
BMI 0.0101 0.9823 0.9162
Treatment 21.3704 3.8531 0.0002
Age 0.8918 0.9752 0.3365
Smoke 1.1111 1.9748 0.5675
GLOBAL 30.6267 26.5528 0.2665
`geom_smooth()` using formula = 'y ~ x'

`geom_smooth()` using formula = 'y ~ x'

Reproduction and reproducibility

Session info

R version 4.4.0 (2024-04-24)

Platform: aarch64-unknown-linux-gnu

locale: LC_CTYPE=en_US.UTF-8, LC_NUMERIC=C, LC_TIME=en_US.UTF-8, LC_COLLATE=en_US.UTF-8, LC_MONETARY=en_US.UTF-8, LC_MESSAGES=en_US.UTF-8, LC_PAPER=en_US.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.UTF-8 and LC_IDENTIFICATION=C

attached base packages: stats, graphics, grDevices, utils, datasets, methods and base

other attached packages: gtsummary(v.1.7.2), DescTools(v.0.99.54), finalfit(v.1.0.7), coxme(v.2.2-20), bdsmatrix(v.1.3-7), pander(v.0.6.5), survminer(v.0.4.9), ggpubr(v.0.6.0), survival(v.3.5-8), datefixR(v.1.6.1), lubridate(v.1.9.3), forcats(v.1.0.0), stringr(v.1.5.1), dplyr(v.1.1.4), purrr(v.1.0.2), readr(v.2.1.5), tidyr(v.1.3.1), tibble(v.3.2.1), ggplot2(v.3.5.1), tidyverse(v.2.0.0) and readxl(v.1.4.3)

loaded via a namespace (and not attached): gridExtra(v.2.3), gld(v.2.6.6), rlang(v.1.1.3), magrittr(v.2.0.3), e1071(v.1.7-14), compiler(v.4.4.0), mgcv(v.1.9-1), vctrs(v.0.6.5), pkgconfig(v.2.0.3), shape(v.1.4.6.1), fastmap(v.1.2.0), backports(v.1.5.0), labeling(v.0.4.3), KMsurv(v.0.1-5), utf8(v.1.2.4), rmarkdown(v.2.27), markdown(v.1.12), tzdb(v.0.4.0), nloptr(v.2.0.3), xfun(v.0.44), glmnet(v.4.1-8), jomo(v.2.7-6), jsonlite(v.1.8.8), pan(v.1.9), broom(v.1.0.6), R6(v.2.5.1), stringi(v.1.8.4), car(v.3.1-2), boot(v.1.3-30), rpart(v.4.1.23), cellranger(v.1.1.0), Rcpp(v.1.0.12), iterators(v.1.0.14), knitr(v.1.47), zoo(v.1.8-12), Matrix(v.1.7-0), splines(v.4.4.0), nnet(v.7.3-19), timechange(v.0.3.0), tidyselect(v.1.2.1), rstudioapi(v.0.16.0), abind(v.1.4-5), yaml(v.2.3.8), ggtext(v.0.1.2), codetools(v.0.2-20), lattice(v.0.22-6), withr(v.3.0.0), evaluate(v.0.23), proxy(v.0.4-27), xml2(v.1.3.6), survMisc(v.0.5.6), pillar(v.1.9.0), carData(v.3.0-5), mice(v.3.16.0), foreach(v.1.5.2), generics(v.0.1.3), hms(v.1.1.3), commonmark(v.1.9.1), munsell(v.0.5.1), scales(v.1.3.0), rootSolve(v.1.8.2.4), minqa(v.1.2.7), xtable(v.1.8-4), class(v.7.3-22), glue(v.1.7.0), lmom(v.3.0), tools(v.4.4.0), data.table(v.1.15.4), lme4(v.1.1-35.3), ggsignif(v.0.6.4), Exact(v.3.2), mvtnorm(v.1.2-5), grid(v.4.4.0), colorspace(v.2.1-0), nlme(v.3.1-164), cli(v.3.6.2), km.ci(v.0.5-6), fansi(v.1.0.6), expm(v.0.999-9), broom.helpers(v.1.15.0), gt(v.0.10.1), gtable(v.0.3.5), rstatix(v.0.7.2), digest(v.0.6.35), farver(v.2.1.2), htmlwidgets(v.1.6.4), htmltools(v.0.5.8.1), lifecycle(v.1.0.4), httr(v.1.4.7), mitml(v.0.4-5), gridtext(v.0.1.5) and MASS(v.7.3-60.2)

Licensed by CC BY unless otherwise stated.