# Load packages
library(TrialEmulation)
library(dplyr)
library(tidyr)
library(ggplot2)
library(survival)
library(survminer)
library(lubridate)
library(survey)
source("generate observed data version 0.R")
working_dir <- getwd()The per-protocol (PP) analysis estimates the effects of initiating and continuously adhering to the initial treatment. Unlike the intention-to-treat (ITT) analysis focusing on the effects of assigning patients to one treatment group versus another, patients are artificially censored in the PP analysis when they deviate from protocol, that is, switching to the other treatment or discontinuing the initial treatment.
PP analysis estimates treatment effects under full adherence, which rules out the behavioral impact of not taking the treatment as prescribed. PP analysis can be particularly relevant when determining the maximum achievable benefit of a treatment under optimal adherence conditions. However, it estimates the treatment effects under a nearly hypothetical situation (100% adherence) that often does not occur in real world.
Recall our previous example that aims to compare the effectiveness of ARB versus ACEI anti-hypertensive medications on reducing the risk of cardiovascular disease (CVD).
We emulated the target trial specified in Table 1 and prepared the dataset in the single-trial emulation session. Thus, we directly load the pre-processed data.
# Extract the estimated propensity score from the ITT analysis
base.weights <- readRDS("obsdata6.rds")
base.trt.w <- base.weights %>% select(id, trt_weight)
# Load the emulated observational data in long format
obsdata5 <- readRDS("obsdata5.rds")
# Add the propensity score to the long data
obsdata6 <- obsdata5 %>% left_join(base.trt.w, by = "id")
attr(obsdata6$X1_0, "label") <- "Baseline value of X1"
attr(obsdata6$X2_0, "label") <- "Baseline value of X2"
attr(obsdata6$X3_0, "label") <- "Baseline value of X3"
attr(obsdata6$X4_0, "label") <- "Baseline value of X4"
attr(obsdata6$age_0, "label") <- "Baseline value of age"
attr(obsdata6$trt_weight, "label") <- "Untruncated inverse probability treatment weight"
get_label <- function(x) {
lbl <- attr(x, "label", exact = TRUE)
if (is.null(lbl)) "" else as.character(lbl)
}
dict <- data.frame(
Variable = names(obsdata6),
Meaning = vapply(obsdata6, get_label, character(1)),
check.names = FALSE
)
knitr::kable(dict, caption = "Data Dictionary", row.names = FALSE)| Variable | Meaning |
|---|---|
| id | Patient ID |
| time | Time index for longitudinal records |
| X1 | Non-ACEI or ARB antihypertensive medication use over time |
| X2 | Standardized systolic blood pressure over time |
| X3 | Biological sex (F/M) |
| X4 | Standardized diastolic blood pressure at baseline |
| age | Age over time (years) |
| A | Treatment indicator of ARB use over time (ACEI=0) |
| Y | Event indicator of cardiovascular disease |
| C | Indicator of early dropout |
| age_s | Standardized age over time (years) |
| follow_up | Time index for longitudinal records |
| assigned_treatment | Treatment indicator of ARB use over time (ACEI=0) |
| X1_0 | Baseline value of X1 |
| X2_0 | Baseline value of X2 |
| X3_0 | Baseline value of X3 |
| X4_0 | Baseline value of X4 |
| age_0 | Baseline value of age |
| trt_weight | Untruncated inverse probability treatment weight |
# Extract initial or assigned treatment
assigned.trt.dat <- obsdata6 %>%
filter(follow_up == 0) %>%
rename(assigned.trt = assigned_treatment) %>%
select(id, assigned.trt)# Add the initial treatment information to every row of the long data
obsdata7 <- obsdata6 %>% left_join(assigned.trt.dat, by = "id")
obsdata8 <- obsdata7 %>%
mutate(adherence = ifelse(A == assigned.trt, 1, 0),
art_censored = 1-adherence) # Drop the rows after individuals being artificially censored
obsdata9 <- obsdata8 %>%
group_by(id) %>%
mutate(cum_art_censored = cumsum(art_censored)) %>% # create an indicator of the censored rows
mutate(cum_cum_art_censored = cumsum(cum_art_censored)) %>% # create an indicator to distinguish the first censored month and the months afterward
filter(cum_cum_art_censored %in% c(0,1)) %>% # keep only data from the adhered months and the first deviation month
select(-cum_art_censored,-cum_cum_art_censored)To minimize extreme weights and unstable treatment effect estimates, we use stabilized adherence weights. Similar to the baseline treatment weights we explained in the Intention-to-treat Analysis tutorial, stablized weights are also inverse weighting estimates constructed with a numerator and a denominator.
# Fit four separate pooled logistic models using a flexible function of time (i.e., time_on_regime)
# ?? Is it necessary to remove baseline data
obsdata10 <- obsdata9 %>% filter(follow_up > 0) # probability of being adhered or on study at baseline is always 1
# Data sets used for making predictions in each treatment group
obsdata_acei <- obsdata10 %>% filter(assigned.trt == 0) %>% ungroup()
obsdata_arb <- obsdata10 %>% filter(assigned.trt == 1) %>% ungroup()
# Numerator of the adherence model for the ACEI group
adh.model.n0 <- glm(adherence ~ X3 + X4 + X1_0 + X2_0 + age_0 + follow_up + I(follow_up^2),
data = obsdata_acei,
family = binomial(link = "logit"))
# Probabilities of being adhered to treatment ACEI for ACEI initiators
adh.prob.n0 <- predict(adh.model.n0, type = "response", newdata = obsdata_acei)
# Denominator of the adherence model for the ACEI group
adh.model.d0 <- glm(adherence ~ X1 + X2 + X3 + X4 + age + X1_0 + X2_0 + age_0 + # include the baseline values of X1, X2, and age
follow_up + I(follow_up^2),
data = obsdata_acei,
family = binomial(link = "logit"))
# Probabilities of being adhered to treatment ACEI for ACEI initiators
adh.prob.d0 <- predict(adh.model.d0, type = "response", newdata = obsdata_acei)
# Numerator of the adherence model for the ARB group
adh.model.n1 <- glm(adherence ~ X3 + X4 + X1_0 + X2_0 + age_0 + follow_up + I(follow_up^2),
data = obsdata_arb,
family = binomial(link = "logit"))
# Probabilities of being adhered to treatment ARB for ARB initiators
adh.prob.n1 <- predict(adh.model.n1, type = "response", newdata = obsdata_arb)
# Denominator of the adherence model for the ARB group
adh.model.d1 <- glm(adherence ~ X1 + X2 + X3 + X4 + age + X1_0 + X2_0 + age_0 + follow_up + I(follow_up^2),
data = obsdata_arb,
family = binomial(link = "logit"))
# Probabilities of being adhered to treatment ARB for ARB initiators
adh.prob.d1 <- predict(adh.model.d1, type = "response", newdata = obsdata_arb)# Numerator of the loss to follow-up model for the ACEI group
cens.model.n0 <- glm(C ~ X3 + X4 + X1_0 + X2_0 + age_0 + follow_up + I(follow_up^2),
data = obsdata_acei,
family = binomial(link = "logit"))
# Probabilities of NOT being lost to follow-up for ACEI initiators
cens.prob.n0 <- predict(cens.model.n0, type = "response", newdata = obsdata_acei)
# Denominator of the loss to follow-up model for the ACEI group
cens.model.d0 <- glm(C ~ X1 + X2 + X3 + X4 + age + X1_0 + X2_0 + age_0 + follow_up + I(follow_up^2),
data = obsdata_acei,
family = binomial(link = "logit"))
# Probabilities of NOT being lost to follow-up for ACEI initiators
cens.prob.d0 <- predict(cens.model.d0, type = "response", newdata = obsdata_acei)
cens.model.n1 <- glm(C ~ X3 + X4 + X1_0 + X2_0 + age_0 + follow_up + I(follow_up^2),
data = obsdata_arb,
family = binomial(link = "logit"))
cens.prob.n1 <- predict(cens.model.n1, type = "response", newdata = obsdata_arb)
cens.model.d1 <- glm(C ~ X1 + X2 + X3 + X4 + age + X1_0 + X2_0 + age_0 + follow_up + I(follow_up^2),
data = obsdata_arb,
family = binomial(link = "logit"))
cens.prob.d1 <- predict(cens.model.d1, type = "response", newdata = obsdata_arb) # For the ACEI group
obsdata_acei1 <- obsdata_acei %>%
mutate(adh_weight_acei = adh.prob.n0 / adh.prob.d0, # adherence weights
cens_weight_acei = cens.prob.n0 / cens.prob.d0, # loss to follow-up weights
weight_acei = adh_weight_acei * cens_weight_acei) %>% # combined time-varying weights
group_by(id) %>%
mutate(weight = cumprod(weight_acei)) %>% # cumulative products of the weights from each month
select(id, follow_up, weight) %>%
ungroup()
# Add the weights to the long observational data
obsdata_acei2 <- obsdata9 %>%
filter(assigned.trt == 0) %>%
left_join(obsdata_acei1, by = c("id", "follow_up")) %>%
mutate(weight = ifelse(follow_up == 0, 1, weight)) # the probability of being adhered and in study at month 0 is 1
# For the ARB group
obsdata_arb1 <- obsdata_arb %>%
mutate(adh_weight_arb = adh.prob.n1 / adh.prob.d1, # adherence weights
cens_weight_arb = cens.prob.n1 / cens.prob.d1, # loss to follow-up weights
weight_arb = adh_weight_arb * cens_weight_arb) %>% # cumulative products of the weights from each month
group_by(id) %>%
mutate(weight = cumprod(weight_arb)) %>% # cumulative products of the weights from each month
select(id, follow_up, weight) %>%
ungroup()
# Add the weights to the long observational data
obsdata_arb2 <- obsdata9 %>%
filter(assigned.trt == 1) %>%
left_join(obsdata_arb1, by = c("id", "follow_up")) %>%
mutate(weight = ifelse(follow_up == 0, 1, weight)) # the probability of being adhered and in study at month 0 is 1obsdata11 <- bind_rows(obsdata_acei2, obsdata_arb2) %>%
left_join(obsdata9 %>% select(id, trt_weight)) %>%
mutate(weight_final = weight * trt_weight) %>% # combine time-varying weights with baseline
select(id, follow_up, X1, X2, X3, X4, age, X1_0, X2_0, X3_0, X4_0, age_0, assigned.trt, Y, C, follow_up,
weight, trt_weight, weight_final, adherence)
#> Joining with `by = join_by(id, trt_weight)`
# Visualize the distribution of final weight
obsdata11 %>%
ggplot(aes(x = weight_final)) +
geom_histogram()
#> `stat_bin()` using `bins = 30`. Pick better value `binwidth`.
summary(obsdata11$weight_final)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.075 1.103 2.466 63.207 10.833 3196.226# ?? Add annotation to each row and argument of below to explain the indication
# ?? I think this function can only be used for emulating a sequence of trials, how to turn off period?
obsdata13 <- obsdata9 %>% mutate(eligible = 1)
PP_data <- data_preparation(
data = obsdata13,
id = "id",
period = "time",
treatment = "A",
outcome = "Y",
eligible = "eligible",
estimand_type = "PP",
outcome_cov = ~ X1 + X2 + X3 + X4 + age_s,
model_var = "assigned_treatment",
switch_d_cov = ~ X1 + X2 + X3 + X4 + age_s + follow_up + I(follow_up^2),
switch_n_cov = ~ X3 + X4 + follow_up + I(follow_up^2),
use_censor_weights = TRUE,
cense = "C",
cense_d_cov = ~ X1 + X2 + X3 + X4 + age_s,
cense_n_cov = ~ X3 + X4,
pool_cense = "numerator",
data_dir = working_dir,
save_weight_models = TRUE,
chunk_size = 500,
separate_files = TRUE,
quiet = TRUE)
# Extract the fitted adherence models
adh.model.n0 <- PP_data$switch_models$switch_n0
adh.model.d0 <- PP_data$switch_models$switch_d0
adh.model.n1 <- PP_data$switch_models$switch_n1
adh.model.d1 <- PP_data$switch_models$switch_d1
# Extract the fitted loss to follow-up models
cens.model.n <- PP_data$censor_models$cens_pool_n # we asked to fit a single model for the numerator
cens.model.d0 <- PP_data$censor_models$cens_d0
cens.model.d1 <- PP_data$censor_models$cens_d1# Drop the rows where individuals are censored either due to nonadherence or lost to follow-up
outcome.data <- obsdata12 %>% filter(adherence==1 & C==1)
# Fit a pooled logistic model
outcome.data.w <- svydesign(ids = ~1, weights = ~weight_final_trun, data = outcome.data)
plr.model <- svyglm(Y ~ assigned.trt + X1_0 + X2_0 + X3_0 + X4_0 + age_0 + follow_up + I(follow_up^2),
design = outcome.data.w,
family = binomial(link = "logit"))
summary(plr.model)
#>
#> Call:
#> svyglm(formula = Y ~ assigned.trt + X1_0 + X2_0 + X3_0 + X4_0 +
#> age_0 + follow_up + I(follow_up^2), design = outcome.data.w,
#> family = binomial(link = "logit"))
#>
#> Survey design:
#> svydesign(ids = ~1, weights = ~weight_final_trun, data = outcome.data)
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -28.84286 8.66779 -3.328 0.0012 **
#> assigned.trt 0.24531 0.72945 0.336 0.7373
#> X1_0 -0.25194 0.38093 -0.661 0.5098
#> X2_0 -0.14940 0.21638 -0.690 0.4914
#> X3_0 -0.02303 0.34471 -0.067 0.9469
#> X4_0 0.02996 0.28180 0.106 0.9155
#> age_0 0.04834 0.16804 0.288 0.7742
#> follow_up 0.12984 0.12981 1.000 0.3194
#> I(follow_up^2) -0.01475 0.00787 -1.874 0.0636 .
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> (Dispersion parameter for binomial family taken to be 6.910661e-14)
#>
#> Number of Fisher Scoring iterations: 25
# Extract the treatment effect estimate and 95% confidence interval
est <- summary(plr.model)$coefficients
est <- data.frame(variable = rownames(est), est)
est <- est %>%
filter(variable == "assigned.trt") %>%
mutate(lower = Estimate - 1.96 * Std..Error,
upper = Estimate + 1.96 * Std..Error) %>%
select(variable, Estimate, lower, upper)
est
#> variable Estimate lower upper
#> assigned.trt assigned.trt 0.2453106 -1.184413 1.675034??What is the truth and how does the estimated results compare?
plr.model.unadj <- glm(Y ~ assigned.trt + X1_0 + X2_0 + X3_0 + X4_0 + age_0 + follow_up + I(follow_up^2),
data = outcome.data,
family = binomial(link = "logit"))
summary(plr.model.unadj)
#>
#> Call:
#> glm(formula = Y ~ assigned.trt + X1_0 + X2_0 + X3_0 + X4_0 +
#> age_0 + follow_up + I(follow_up^2), family = binomial(link = "logit"),
#> data = outcome.data)
#>
#> Coefficients:
#> Estimate Std. Error z value Pr(>|z|)
#> (Intercept) -2.657e+01 2.371e+06 0 1
#> assigned.trt -3.884e-14 2.361e+05 0 1
#> X1_0 -3.703e-14 9.421e+04 0 1
#> X2_0 4.593e-16 4.109e+04 0 1
#> X3_0 2.341e-14 8.743e+04 0 1
#> X4_0 -3.590e-15 6.266e+04 0 1
#> age_0 1.445e-14 4.638e+04 0 1
#> follow_up -1.025e-14 3.632e+04 0 1
#> I(follow_up^2) 6.497e-16 2.284e+03 0 1
#>
#> (Dispersion parameter for binomial family taken to be 1)
#>
#> Null deviance: 0.0000e+00 on 117 degrees of freedom
#> Residual deviance: 6.8459e-10 on 109 degrees of freedom
#> AIC: 18
#>
#> Number of Fisher Scoring iterations: 25
# Extract the treatment effect estimate and 95% confidence interval
est.unadj <- summary(plr.model.unadj)$coefficients
est.unadj <- data.frame(variable = rownames(est.unadj), est.unadj)
est.unadj <- est.unadj %>%
filter(variable == "assigned.trt") %>%
mutate(lower=(Estimate - 1.96 * Std..Error),
upper=(Estimate + 1.96 * Std..Error)) %>%
select(variable, Estimate, lower, upper)
est.unadj
#> variable Estimate lower upper
#> assigned.trt assigned.trt -3.883943e-14 -462830.6 462830.6The research reported in this publication was supported in part by the National Center for Advancing Translational Sciences of the National Institutes of Health under Award Number UM1 TR 004409. The content is solely the responsibility of the authors and does not necessarily represent the official views of the National Institutes of Health.
Corresponding author: yizhe.xu@hsc.utah.edu↩︎