Gallery showing various tables possible with the {gtsummary} package. If you have created an interesting table using {gtsummary}, please submit it to the gallery via a pull request to the GitHub repository.
library(gtsummary); library(gt); library(survival)
library(dplyr); library(stringr); library(purrr); library(forcats)
Add a spanning header over the group columns for increased clarity, and modifying column headers.
trial %>%
select(trt, age, grade) %>%
tbl_summary(
by = trt,
missing = "no",
statistic = all_continuous() ~ "{median} ({p25}, {p75})"
) %>%
modify_header(stat_by = md("**{level}**<br>N = {n} ({style_percent(p)}%)")) %>%
add_n() %>%
bold_labels() %>%
modify_spanning_header(all_stat_cols() ~ "**Chemotherapy Treatment**")
Characteristic | N | Chemotherapy Treatment | |
---|---|---|---|
Drug A N = 98 (49%)1 |
Drug B N = 102 (51%)1 |
||
Age | 189 | 46 (37, 59) | 48 (39, 56) |
Grade | 200 | ||
I | 35 (36%) | 33 (32%) | |
II | 32 (33%) | 36 (35%) | |
III | 31 (32%) | 33 (32%) | |
1
Median (IQR); n (%)
|
Show continuous summary statistics on multiple lines.
trial %>%
select(trt, age, marker) %>%
tbl_summary(
by = trt,
type = all_continuous() ~ "continuous2",
statistic = all_continuous() ~ c("{N_nonmiss}",
"{mean} ({sd})",
"{median} ({p25}, {p75})",
"{min}, {max}"),
missing = "no"
) %>%
italicize_levels()
Characteristic | Drug A, N = 98 | Drug B, N = 102 |
---|---|---|
Age | ||
N | 91 | 98 |
Mean (SD) | 47 (15) | 47 (14) |
Median (IQR) | 46 (37, 59) | 48 (39, 56) |
Range | 6, 78 | 9, 83 |
Marker Level (ng/mL) | ||
N | 92 | 98 |
Mean (SD) | 1.02 (0.89) | 0.82 (0.83) |
Median (IQR) | 0.84 (0.24, 1.57) | 0.52 (0.19, 1.20) |
Range | 0.00, 3.87 | 0.00, 3.64 |
Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing.
trial %>%
select(response, age, grade) %>%
mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) %>%
tbl_summary(
by = response,
missing = "no",
label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
) %>%
add_p(pvalue_fun = ~style_pvalue(.x, digits = 2)) %>%
add_q()
Characteristic | No Tumor Response, N = 1321 | Tumor Responded, N = 611 | p-value2 | q-value3 |
---|---|---|---|---|
Patient Age | 46 (36, 55) | 49 (43, 59) | 0.091 | 0.18 |
Tumor Grade | 0.93 | 0.93 | ||
I | 46 (35%) | 21 (34%) | ||
II | 44 (33%) | 19 (31%) | ||
III | 42 (32%) | 21 (34%) | ||
1
Median (IQR); n (%)
2
Wilcoxon rank sum test; Pearson's Chi-squared test
3
False discovery rate correction for multiple testing
|
Include missing tumor response as column using fct_explicit_na()
.
trial %>%
select(response, age, grade) %>%
mutate(
response = factor(response, labels = c("No Tumor Response", "Tumor Responded")) %>%
fct_explicit_na(na_level = "Missing Response Status")
) %>%
tbl_summary(
by = response,
label = list(age ~ "Patient Age", grade ~ "Tumor Grade")
)
Characteristic | No Tumor Response, N = 1321 | Tumor Responded, N = 611 | Missing Response Status, N = 71 |
---|---|---|---|
Patient Age | 46 (36, 55) | 49 (43, 59) | 52 (44, 57) |
Unknown | 7 | 3 | 1 |
Tumor Grade | |||
I | 46 (35%) | 21 (34%) | 1 (14%) |
II | 44 (33%) | 19 (31%) | 5 (71%) |
III | 42 (32%) | 21 (34%) | 1 (14%) |
1
Median (IQR); n (%)
|
Paired t-test
# imagine that each patient recieved Drug A and Drug B (adding ID showing their paired measurements)
trial_paired <-
trial %>%
select(trt, marker) %>%
group_by(trt) %>%
mutate(id = row_number()) %>%
ungroup()
# you must first delete incomplete pairs from the data, then you can build the table
trial_paired %>%
# delete missing marker values
filter(!is.na(marker)) %>%
# keep IDs with both measurements
group_by(id) %>%
filter(n() == 2) %>%
ungroup() %>%
# summarize data
tbl_summary(by = trt, include = -id) %>%
add_p(test = marker ~ "paired.t.test", group = id)
Characteristic | Drug A, N = 881 | Drug B, N = 881 | p-value2 |
---|---|---|---|
Marker Level (ng/mL) | 0.84 (0.24, 1.66) | 0.52 (0.17, 1.22) | 0.2 |
1
Median (IQR)
2
Paired t-test
|
Include p-values comparing all groups to a single reference group.
# table summarizing data with no p-values
t0 <- trial %>%
select(grade, age, response) %>%
tbl_summary(by = grade, missing = "no") %>%
modify_header(stat_by = md("**{level}**"))
# table comparing grade I and II
t1 <- trial %>%
select(grade, age, response) %>%
filter(grade %in% c("I", "II")) %>%
tbl_summary(by = grade, missing = "no") %>%
add_p() %>%
modify_header(p.value ~ md("**I vs. II**")) %>%
# hide summary stat columns
modify_table_header(all_stat_cols(), hide = TRUE)
# table comparing grade I and II
t2 <- trial %>%
select(grade, age, response) %>%
filter(grade %in% c("I", "III")) %>%
tbl_summary(by = grade, missing = "no") %>%
add_p() %>%
modify_header(p.value ~ md("**I vs. III**")) %>%
# hide summary stat columns
modify_table_header(all_stat_cols(), hide = TRUE)
# merging the 3 tables together, and adding additional gt formatting
tbl_merge(list(t0, t1, t2)) %>%
modify_spanning_header(
list(
all_stat_cols() ~ "**Tumor Grade**",
starts_with("p.value") ~ "**p-values**"
)
)
Characteristic | Tumor Grade | p-values | |||
---|---|---|---|---|---|
I1 | II1 | III1 | I vs. II2 | I vs. III2 | |
Age | 47 (37, 56) | 48 (37, 57) | 47 (38, 58) | 0.7 | 0.5 |
Tumor Response | 21 (31%) | 19 (30%) | 21 (33%) | >0.9 | 0.9 |
1
Median (IQR); n (%)
2
Wilcoxon rank sum test; Fisher's exact test
|
Add additional statistics as additional columns.
# define function for lower and upper bounds of the mean CI
ll <- function(x) t.test(x)$conf.int[1]
ul <- function(x) t.test(x)$conf.int[2]
t1 <-
trial %>%
select(age, marker) %>%
tbl_summary(statistic = all_continuous() ~ "{mean} ({sd})", missing = "no") %>%
modify_header(stat_0 ~ "**Mean (SD)**")
t2 <-
trial %>%
select(age, marker) %>%
tbl_summary(statistic = all_continuous() ~ "{ll}, {ul}", missing = "no") %>%
modify_header(stat_0 ~ "**95% CI for Mean**")
tbl_merge(list(t1, t2)) %>%
modify_footnote(everything() ~ NA_character_) %>%
modify_spanning_header(everything() ~ NA_character_)
Characteristic | Mean (SD) | 95% CI for Mean |
---|---|---|
Age | 47 (14) | 45, 49 |
Marker Level (ng/mL) | 0.92 (0.86) | 0.79, 1.04 |
Include number of observations and the number of events in a univariate regression table.
trial %>%
select(response, age, grade) %>%
tbl_uvregression(
method = glm,
y = response,
method.args = list(family = binomial),
exponentiate = TRUE
) %>%
add_nevent()
Characteristic | N | Event N | OR1 | 95% CI1 | p-value |
---|---|---|---|---|---|
Age | 183 | 58 | 1.02 | 1.00, 1.04 | 0.10 |
Grade | 193 | 61 | |||
I | — | — | |||
II | 0.95 | 0.45, 2.00 | 0.9 | ||
III | 1.10 | 0.52, 2.29 | 0.8 | ||
1
OR = Odds Ratio, CI = Confidence Interval
|
Include two related models side-by-side with descriptive statistics.
gt_r1 <- glm(response ~ trt + grade, trial, family = binomial) %>%
tbl_regression(exponentiate = TRUE)
gt_r2 <- coxph(Surv(ttdeath, death) ~ trt + grade, trial) %>%
tbl_regression(exponentiate = TRUE)
gt_t1 <- trial[c("trt", "grade")] %>%
tbl_summary(missing = "no") %>%
add_n() %>%
modify_header(stat_0 ~ "**n (%)**") %>%
modify_footnote(stat_0 ~ NA_character_)
tbl_merge(
list(gt_t1, gt_r1, gt_r2),
tab_spanner = c(NA_character_, "**Tumor Response**", "**Time to Death**")
)
Characteristic | N | n (%) | Tumor Response | Time to Death | ||||
---|---|---|---|---|---|---|---|---|
OR1 | 95% CI1 | p-value | HR1 | 95% CI1 | p-value | |||
Chemotherapy Treatment | 200 | |||||||
Drug A | 98 (49%) | — | — | — | — | |||
Drug B | 102 (51%) | 1.21 | 0.66, 2.24 | 0.5 | 1.25 | 0.86, 1.81 | 0.2 | |
Grade | 200 | |||||||
I | 68 (34%) | — | — | — | — | |||
II | 68 (34%) | 0.94 | 0.44, 1.98 | 0.9 | 1.28 | 0.80, 2.06 | 0.3 | |
III | 64 (32%) | 1.09 | 0.52, 2.27 | 0.8 | 1.69 | 1.07, 2.66 | 0.024 | |
1
OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio
|
Include the number of events at each level of a categorical predictor.
gt_model <-
trial %>%
select(ttdeath, death, stage, grade) %>%
tbl_uvregression(
method = coxph,
y = Surv(ttdeath, death),
exponentiate = TRUE,
hide_n = TRUE
)
gt_eventn <-
trial %>%
filter(death == 1) %>%
select(stage, grade) %>%
tbl_summary(
statistic = all_categorical() ~ "{n}",
label = list(stage ~ "T Stage", grade ~ "Grade")
) %>%
modify_header(stat_0 ~ "**Event N**") %>%
modify_footnote(everything() ~ NA_character_)
tbl_merge(list(gt_eventn, gt_model)) %>%
bold_labels() %>%
italicize_levels() %>%
modify_spanning_header(everything() ~ NA_character_)
Characteristic | Event N | HR1 | 95% CI1 | p-value |
---|---|---|---|---|
T Stage | ||||
T1 | 24 | — | — | |
T2 | 27 | 1.18 | 0.68, 2.04 | 0.6 |
T3 | 22 | 1.23 | 0.69, 2.20 | 0.5 |
T4 | 39 | 2.48 | 1.49, 4.14 | <0.001 |
Grade | ||||
I | 33 | — | — | |
II | 36 | 1.28 | 0.80, 2.05 | 0.3 |
III | 43 | 1.69 | 1.07, 2.66 | 0.024 |
1
HR = Hazard Ratio, CI = Confidence Interval
|
Regression model where the covariate remains the same, and the outcome changes.
tbl_reg <-
trial %>%
select(age, marker, trt) %>%
tbl_uvregression(
method = lm,
x = trt,
show_single_row = "trt",
hide_n = TRUE
) %>%
modify_header(list(
label ~"**Model Outcome**",
estimate ~ "**Treatment Coef.**"
))
tbl_reg %>%
modify_footnote(estimate ~ "Values larger than 0 indicate larger values in the Drug group.")
Model Outcome | Treatment Coef.1 | 95% CI2 | p-value |
---|---|---|---|
Age | 0.44 | -3.7, 4.6 | 0.8 |
Marker Level (ng/mL) | -0.20 | -0.44, 0.05 | 0.12 |
1
Values larger than 0 indicate larger values in the Drug group.
2
CI = Confidence Interval
|
Add descriptive statistics by treatment group to the table above to produce a table often reported two group comparisons.
gt_sum <-
trial %>%
select(age, marker, trt) %>%
mutate(trt = fct_rev(trt)) %>%
tbl_summary(by = trt,
statistic = all_continuous() ~ "{mean} ({sd})",
missing = "no") %>%
add_n() %>%
modify_header(stat_by = md("**{level}**"))
tbl_merge(list(gt_sum, tbl_reg)) %>%
modify_header(estimate_2 ~ "**Difference**") %>%
modify_spanning_header(everything() ~ NA_character_)
Characteristic | N | Drug B1 | Drug A1 | Difference | 95% CI2 | p-value |
---|---|---|---|---|---|---|
Age | 189 | 47 (14) | 47 (15) | 0.44 | -3.7, 4.6 | 0.8 |
Marker Level (ng/mL) | 190 | 0.82 (0.83) | 1.02 (0.89) | -0.20 | -0.44, 0.05 | 0.12 |
1
Mean (SD)
2
CI = Confidence Interval
|
Implement a custom tidier to report Wald confidence intervals. The Wald confidence intervals are calculated using confint.default()
.
my_tidy <- function(x, exponentiate = FALSE, conf.level = 0.95, ...) {
dplyr::bind_cols(
broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE),
# calculate the confidence intervals, and save them in a tibble
stats::confint.default(x) %>%
tibble::as_tibble() %>%
rlang::set_names(c("conf.low", "conf.high")) )
}
lm(age ~ grade + marker, trial) %>%
tbl_regression(tidy_fun = my_tidy)
Characteristic | Beta | 95% CI1 | p-value |
---|---|---|---|
Grade | |||
I | — | — | |
II | 0.64 | -4.6, 5.9 | 0.8 |
III | 2.4 | -2.8, 7.6 | 0.4 |
Marker Level (ng/mL) | -0.04 | -2.6, 2.5 | >0.9 |
1
CI = Confidence Interval
|