<- seq(10, 100, by = 10)
subj_n <- seq(4, 12, by = 2)
trial_n <- 25
NumberOfModels
<- expand.grid(subj_n = subj_n, trial_n = trial_n) param_combinations
Grid Searches and Sensitivity Analyses
1 Using Grid Searches to Explore the Multiverse
It should be clear from the previous sections that the data simulation process involves a multiverse of experimenter choices. One way to explore (and calm our fears about) the respective importance of these individual choices and their interactions would be to conduct a sensitivity analysis with a grid search of all available combinations among parameter values of interest. To do this, we can wrap our Simulation and Modelling function with a higher-level function that inputs a series of parameter combinations that we are interested in exploring further. Here is a suggestion for a function of this type. We can start by creating a matrix of parameter combinations that we are interested in.
<- function(filename_full, trial_n, subj_n) {
run_sims_grid_point = trial_n/2
ADS_n = trial_n/2
IDS_n = subj_n
n_subj
<- SimulateEffectSizeData(n_subj = n_subj,
dataSimulated n_ADS = ADS_n, n_IDS = IDS_n)
<- lmer(EF ~ 1 + SpeechStyle + (1 | item_id) + (1 +
model | subj_id), data = dataSimulated)
SpeechStyle
<- broom.mixed::tidy(model)
sim_results
# append the results to a file
<- file.exists(filename_full)
append write_csv(sim_results, filename_full, append = append)
# return the tidy table
sim_results }
# let's make a new folder to store the output of the
# simulation function:
if (file.exists(here("sims_grid_search"))) {
setwd(here("sims_grid_search"))
else {
} dir.create(here("sims_grid_search"))
setwd(here("sims_grid_search"))
}
for (i in seq_len(nrow(param_combinations))) {
<- param_combinations[i, ]
sim_params <- paste0(here("sims_grid_search/test_grid_search_"),
filename_full $subj_n, "_", sim_params$trial_n, ".csv")
sim_params<- Sys.time() # Start time
start_time <- purrr::map_df(1:NumberOfModels, ~run_sims_grid_point(filename_full = filename_full,
sims subj_n = sim_params$subj_n, trial_n = sim_params$trial_n))
<- Sys.time() # End time
end_time cat("Simulation", i, "Time elapsed:", end_time - start_time,
"\n")
}
setwd(here("sims_grid_search"))
<- list.files(pattern = "*.csv")
file_names
# read in all CSV files into a list of dataframes
<- purrr::map(file_names, ~{
df_list <- read.csv(.x)
df $filename <- .x
df
df
})
<- purrr::reduce(df_list, dplyr::bind_rows)
df
<- df %>%
df_per_sim filter(effect == "fixed") %>%
filter(term == "SpeechStyle") %>%
group_by(filename) %>%
summarise(median_estimate = median(estimate), median_se = median(std.error),
power = mean(p.value < 0.05))
<- df_per_sim %>%
PowerGridData mutate(n_subj = as.numeric(sapply(strsplit(filename, "_"),
`[`, 4)), n_trial = as.factor(str_replace(sapply(strsplit(filename,
"_"), `[`, 5), pattern = ".csv", ""))) %>%
mutate(n_trial = factor(n_trial, levels = c("4", "6", "8",
"10", "12")))
ggplot(PowerGridData) + geom_point(aes(x = n_subj, y = power,
color = n_trial)) + geom_line(aes(x = n_subj, y = power,
color = n_trial)) + geom_hline(yintercept = 0.8, linetype = 3) +
xlim(c(0, 110)) + xlab("Sample Size") + ylab("Statistical Power") +
ggtitle("Interaction among Sample Size & Repeated Measures") +
scale_color_brewer(palette = "Dark2") + plot_theme
2 Exercises to Check Understanding
2.1 Exercise VI
How would you adapt the above grid search code to investigate the effect of varying the number of subjects and different effect sizes?
Show the code
if (file.exists(here("sims_grid_search_exercise_6"))) {
setwd(here("sims_grid_search_exercise_6"))
else {
} dir.create(here("sims_grid_search_exercise_6"))
setwd(here("sims_grid_search_exercise_6"))
}
<- seq(2, 50, by = 3)
subj_n <- seq(0.3, 0.9, by = 0.3)
effectsize <- 400
NumberOfModels
<- expand.grid(subj_n = subj_n, effectsize = effectsize)
param_combinations
<- function(filename_full, ef, subj_n) {
run_sims_grid_point = effectsize
ef = subj_n
n_subj
<- SimulateEffectSizeData(n_subj = n_subj,
dataSimulated mean_slope = ef)
<- lmer(EF ~ 1 + SpeechStyle + (1 | item_id) + (1 +
model | subj_id), data = dataSimulated)
SpeechStyle
<- broom.mixed::tidy(model)
sim_results
# append the results to a file
<- file.exists(filename_full)
append write_csv(sim_results, filename_full, append = append)
# return the tidy table
sim_results
}
for (i in seq_len(nrow(param_combinations))) {
<- param_combinations[i, ]
sim_params <- paste0(here("sims_grid_search_exercise_6/test_grid_search_"),
filename_full $subj_n, "_", sim_params$ef, ".csv")
sim_params<- Sys.time() # Start time
start_time <- purrr::map_df(1:NumberOfModels, ~run_sims_grid_point(filename_full = filename_full,
sims subj_n = sim_params$subj_n, ef = sim_params$effectsize))
<- Sys.time() # End time
end_time cat("Simulation", i, "Time elapsed:", end_time - start_time,
"\n")
}
Show the code
setwd(here("sims_grid_search_exercise_6"))
<- list.files(pattern = "*.csv")
file_names
# read in all CSV files into a list of dataframes
<- purrr::map(file_names, ~{
df_list <- read.csv(.x)
df $filename <- .x
df
df
})
<- purrr::reduce(df_list, dplyr::bind_rows)
df
<- df %>%
df_per_sim filter(effect == "fixed") %>%
filter(term == "SpeechStyle") %>%
group_by(filename) %>%
summarise(median_estimate = median(estimate), median_se = median(std.error),
power = mean(p.value < 0.05))
<- df_per_sim %>%
PowerGridData mutate(n_subj = as.numeric(sapply(strsplit(filename, "_"),
`[`, 4)), ef = as.factor(str_replace(sapply(strsplit(filename,
"_"), `[`, 5), pattern = ".csv", "")))
ggplot(PowerGridData) + geom_point(aes(x = n_subj, y = power,
color = ef)) + geom_line(aes(x = n_subj, y = power, color = ef)) +
geom_hline(yintercept = 0.8, linetype = 3) + xlab("Sample Size") +
ylab("Statistical Power") + ggtitle("Interaction among Number of Subjects & Effect Size") +
scale_color_brewer(palette = "Dark2") + plot_theme