Personal R tricks and code snippets.
| R | Python |
|---|---|
| count() | value_counts() |
| filter | .loc[] |
| skim | .describe() |
echo "R_BROWSER=zen-browser" >> ~/.Renviron
library(readxl) df <- read_excel("supplement-v2-for_BNX.xlsx", sheet = "Case List", skip = 5, n_max = 26)
source("script.R")
savehistory("script.R")
library(lintr) lint(filename="analyze.R")
colnames(df) <- make.names(str_trim(colnames(df)))
df <- df %>%
mutate(
new_column = str_glue("{column1} ({column2}–{column3}) [{column4}±{column5}]")
)
render
=== Order category values ==
library(forcats)
df %>%
ggplot( aes( x = fct_rev(fct_reorder(angiolog, groin.to.open, median)), y = groin.to.open)) +
geom_boxplot() +
ggtitle("Groin to open podle angiologa") +
ylab("groin to open [min]") +
xlab("angiolog")
library(ggpubr)
ggboxplot( df, x = "group", y = "age", color = "group", add = "jitter") +
stat_compare_means()
To change the test: stat_compare_means( method = 't.test' ). Method can be t.test, wilcox.test, anova, kruskal.test.
To get pairwise comparisons:
comparisons <- list( c("group1", "group2"), c("group2", "group3"), c("group1", "group3" ))
ggboxplot( df, x = "group", y = "age", color = "group", add = "jitter" ) +
stat_compare_means( comparisons = comparisons, label.y = c(58, 60, 62) ) +
stat_compare_means( method = 'kruskal.test', label.y = 66 )
First stat_compare_means does pairwise comparison, second stat_compare_means does global test.
df %>%
ggplot( aes( x = as.factor(rok) ) ) +
geom_bar() +
stat_count(geom = "text", size = 4, aes(label = ..count..), position=position_stack(vjust=1.15)) +
facet_grid(sluzba~.) +
ggtitle("Počet rekanalizací (IVT+MT) v jednotlivých letech dle pracovní doby") +
ylab("počet") +
xlab("rok")
Error in dyn.load(file, DLLpath = DLLpath, ...) : unable to load shared object '/home/pavel/R/x86_64-pc-linux-gnu-library/4.4/stringi/libs/stringi.so'
to solve:
install.packages("stringi")
getOption('timeout')
options(timeout=120)
unable to load shared object '/home/pavel/R/x86_64-pc-linux-gnu-library/4.5/xml2/libs/xml2.so'
sudo pacman -S libxml2-legacy
df <- df %>%
mutate(
tier = ifelse(
`STIM TYPE` == "SHAM",
"SHAM",
substr( `Study ID`, start = 1, stop = 1)
)
) %>%
drop_na(`Study ID`)
Descriptive statistics by group:
library(skimr) df %>% group_by(group) %>% skim()
df %>%
summarize(
"DTG_mean" = mean(door.to.groin, na.rm = TRUE),
"DTG_sd" = sd(door.to.groin, na.rm = TRUE),
"DTG_median" = median(door.to.groin, na.rm = TRUE),
"DTG_Q1" = quantile(door.to.groin, p = 0.25, na.rm = TRUE),
"DTG_Q3" = quantile(door.to.groin, p = 0.75, na.rm = TRUE),
"GTO_mean" = mean(groin.to.open, na.rm = TRUE),
"GTO_sd" = sd(groin.to.open, na.rm = TRUE),
"GTO_median" = median(groin.to.open, na.rm = TRUE),
"GTO_Q1" = quantile(groin.to.open, p = 0.25, na.rm = TRUE),
"GTO_Q3" = quantile(groin.to.open, p = 0.75, na.rm = TRUE),
"DTN_mean" = mean(door.to.needle, na.rm = TRUE),
"DTN_sd" = sd(door.to.needle, na.rm = TRUE),
"DTN_median" = median(door.to.needle, na.rm = TRUE),
"DTN_Q1" = quantile(door.to.needle, p = 0.25, na.rm = TRUE),
"DTN_Q3" = quantile(door.to.needle, p = 0.75, na.rm = TRUE),
) %>%
mutate(
door_to_groin = str_glue("{DTG_median} ({DTG_Q1}–{DTG_Q3}) [{DTG_mean}±{DTG_sd}]"),
groin_to_open = str_glue("{GTO_median} ({GTO_Q1}–{GTO_Q3}) [{GTO_mean}±{GTO_sd}]"),
door_to_needle = str_glue("{DTN_median} ({DTN_Q1}–{DTN_Q3}) [{DTN_mean}±{DTN_sd}]"),
) %>%
select(door_to_groin, groin_to_open, door_to_needle)
{{R kruskal.test(dNIHSS90 ~ tier, data = df) %>% print()
#! one-sided p-values!!! library(dunn.test) dunn.test(df\(dNIHSS90, df\)tier) dunn.test(df\(dNIHSS90, df\)tier, kw=TRUE, method="bonferroni")
#two sided p-values!!! library(FSA) dunnTest( dNIHSS90 ~ tier, df) dunnTest( dNIHSS90 ~ tier, df, method="bonferroni") }}
test <- df %>% group_by(group) %>% select( group, sex ) %>% table() %>% fisher.test() library(broom) tidy(test)$p.value
test <- aov(age ~ group, df) library(broom) p <- tidy(test)$p.value
`test <- kruskal.test( age ~ group, df )
library(broom) p <- tidy(test)$p.value`
fisher_p_values <- tibble(
test = c( "humoral_month3", "humoral_month12", "cellular_month3", "cellular_month12"),
p_value = c(
response %>%
filter( month == 3 ) %>%
select( group, humoral_response ) %>%
table() %>%
fisher.test() %>%
tidy() %>%
select(p.value) %>%
pull(),
response %>%
filter( month == 12 ) %>%
select( group, humoral_response ) %>%
table() %>%
fisher.test() %>%
tidy() %>%
select(p.value)
%>% pull(),
response %>%
filter( month == 3 ) %>%
select( group, cellular_response ) %>%
table() %>%
fisher.test() %>%
tidy() %>%
select(p.value) %>%
pull(),
response %>%
filter( month == 12 ) %>%
select( group, cellular_response ) %>%
table() %>%
fisher.test() %>%
tidy() %>%
select(p.value) %>%
pull()
)) %>% mutate(
Bonferroni = p.adjust( p_value, method = "bonferroni"),
Benjamini_Hochberg = p.adjust( p_value, method = "BH" ),
)
library(tidyverse)
library(readxl)
library(lubridate)
library(ggplot2)
library(lme4)
library(brms)
library(car)
library(skimr)
library(broom)
library(ggpubr)
print(version)
packageVersion("tidyverse")
packageVersion("readxl")
packageVersion("lubridate")
packageVersion("ggplot2")
packageVersion("lme4")
packageVersion("brms")
packageVersion("car")
packageVersion("skimr")
packageVersion("broom")
packageVersion("ggpubr")
df_long <- df %>%
pivot_longer(
cols = starts_with( "humoral_" ),
names_to = "month",
names_prefix = "humoral_M",
values_to = "humoral_response"
) %>%
mutate( month = as.numeric(month) )
df_joined <- df1 %>%
inner_join(
df2,
by = join_by( subject_id == subject_id, month == month, group == group, sex == sex, age == age )
) %>% select(
subject_id,
group,
sex,
age,
month,
cellular_response,
humoral_response,
)
library(lme4) model <- lmer( outcome ~ group + month + age + sex + (1 | subject_id), data = df ) Anova(model)
Subject_id as a random effect:
library(lme4) model <- glmer( binary_outcome ~ group + month + age + sex + (1 | subject_id), data = df, family = binomial ) Anova(model)
library(brms) model <- brm( outcome ~ group + sex + age + month + (1|subject_id), data = response ) plot(model) posterior_interval(model)
library(brms) model <- brm( binary_outcome ~ group + sex + age + month + (1|subject_id), family = bernoulli, data = response ) plot(model) posterior_interval(model)