ANOVA + Mixed Effects

Animals

Getting set up

Load packages for data handling and plotting

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.3
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

clear global environment

rm(list = ls())

read in the data

df <- read.csv(url("https://raw.githubusercontent.com/ethanweed/ethanweed.github.io/master/r-tutorials/data/Animals.csv"), sep = ";")
#df <- read.csv("/Users/ethan/Desktop/Animals.csv", sep = ";")
df$X <- NULL

add a column to the dataframe with participant ID numbers

df$ID <- seq(1:length(df$ReadWord1))

make a new dataframe with the same data in the “long” format.

df_long <- gather(df, key = "Condition", value = "Time", -"ID")
df_long$Condition <- as.factor(df_long$Condition)
head(df_long)
##   ID Condition Time
## 1  1 ReadWord1 5.11
## 2  2 ReadWord1 5.83
## 3  3 ReadWord1 4.86
## 4  4 ReadWord1 7.31
## 5  5 ReadWord1 4.65
## 6  6 ReadWord1 4.98

plot the data

p1 <- ggplot(df_long, aes(x = Condition, y = Time, color = Condition)) +
  geom_boxplot() +
  geom_jitter() +
  coord_flip() +
  theme_classic() +
  labs(title = "Animals") +
  theme(legend.position="none")
plot(p1)

test for statistical significance

load libraries

library(psycho)
## message: psycho's `analyze()` is deprecated in favour of the report package. Check it out at https://github.com/easystats/report
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'lme4'
## The following object is masked from 'package:psycho':
## 
##     golden
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step

rearrange data

# set up column with task categories
df_long$Task <- ifelse(
  df_long$Condition == 'ReadWord1' 
  | df_long$Condition == 'ReadWord2', 
  "Reading", "Naming")

# set up column with interference categories
df_long$Interference <- ifelse(
  df_long$Condition == 'ReadWord2' 
  | df_long$Condition == 'NameAnimal2', 
  'Interference', 
  'No_Interference')

model the data as a Repeated Measures ANOVA

#mod1 <- aov(Time ~  Task * Interference + Error(ID),  data = df_long)
mod1 <- aov(Time ~ Task + Interference + Task*Interference + Error(ID), data = df_long)

summarize the results of model 1

summary(mod1)
## 
## Error: ID
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals  1  5.169   5.169               
## 
## Error: Within
##                   Df Sum Sq Mean Sq F value   Pr(>F)    
## Task               1 138.38  138.38   74.81 1.04e-12 ***
## Interference       1  35.35   35.35   19.11 4.14e-05 ***
## Task:Interference  1  34.56   34.56   18.68 4.93e-05 ***
## Residuals         71 131.32    1.85                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

report the results of model 1

res <- analyze(mod1)
## (Repeated measures ANOVAs are bad, you should use mixed-models...)
print(res)
##    - The effect of Task is significant (F(1, 71) = 74.81, p < .001) and can be considered as large (Partial Omega-squared = 0.50).
##    - The effect of Interference is significant (F(1, 71) = 19.11, p < .001) and can be considered as large (Partial Omega-squared = 0.20).
##    - The interaction between Task and Interference is significant (F(1, 71) = 18.69, p < .001) and can be considered as large (Partial Omega-squared = 0.19).

ok, since ANOVA’s are bad, we’ll do a mixed effects model then

mod2 <- lmer(Time ~ Task + Interference + Task*Interference + (1|ID), data=df_long)

summarize the results of mod 2

anova(mod2)
## Type III Analysis of Variance Table with Satterthwaite's method
##                    Sum Sq Mean Sq NumDF DenDF F value    Pr(>F)    
## Task              138.375 138.375     1    54  86.411 8.496e-13 ***
## Interference       35.347  35.347     1    54  22.073 1.847e-05 ***
## Task:Interference  34.560  34.560     1    54  21.582 2.216e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

report the results of mod 2

res3 <- analyze(mod2)
print(res3)
## The overall model predicting Time (formula = Time ~ Task + Interference + Task * Interference + (1 | ID)) has an total explanatory power (conditional R2) of 65.73%, in which the fixed effects explain 59.43% of the variance (marginal R2). The model's intercept is at 9.42 (SE = 0.32, 95% CI [8.80, 10.03]). Within this model:
##    - The effect of TaskReading is significant (beta = -4.05, SE = 0.41, 95% CI [-4.84, -3.25], t(54) = -9.86, p < .001) and can be considered as large (std. beta = -1.89, std. SE = 0.19).
##    - The effect of InterferenceNo_Interference is significant (beta = -2.71, SE = 0.41, 95% CI [-3.51, -1.92], t(54) = -6.61, p < .001) and can be considered as large (std. beta = -1.27, std. SE = 0.19).
##    - The effect of TaskReading:InterferenceNo_Interference is significant (beta = 2.70, SE = 0.58, 95% CI [1.57, 3.82], t(54) = 4.65, p < .001) and can be considered as large (std. beta = 1.26, std. SE = 0.27).

or use pander

library(pander)
pander(anova(mod2))
Type III Analysis of Variance Table with Satterthwaite’s method
  Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
Task 138.4 138.4 1 54 86.41 8.496e-13
Interference 35.35 35.35 1 54 22.07 1.847e-05
Task:Interference 34.56 34.56 1 54 21.58 2.216e-05

Display Estimates for Conditions

# Display Estimates for Conditions
library(psycho)
library(pander)
mod_means <- get_means(mod2)
pander(mod_means)
Task Interference Mean SE df CI_lower CI_higher
Naming Interference 9.416 0.3159 67.14 8.785 10.05
Reading Interference 5.368 0.3159 67.14 4.738 5.999
Naming No_Interference 6.703 0.3159 67.14 6.073 7.334
Reading No_Interference 5.353 0.3159 67.14 4.723 5.984

Set up traditional ANOVA-style plot

p2 <- ggplot(mod_means, aes(x=Task, y=Mean, color=Interference, group=Interference)) +
  geom_line(position = position_dodge(.3)) +
  geom_pointrange(aes(ymin=CI_lower, ymax=CI_higher), 
                  position = position_dodge(.3)) +
  ylab("Response Time") +
  xlab("Task") +
  theme_classic()

traditional ANOVA-style plot

plot(p2)

two different ways to plot the same data

p3 <- ggpubr::ggarrange(p1, p2, ncol = 2)
plot(p3)