MATH 80667A - Week 2

library(hecedsm)
library(dplyr) # data manipulation
library(knitr) # data formatting
library(ggplot2) # grammar of graphics
library(emmeans) # marginal means and contrasts

# Load arithmetic data
data(arithmetic, package = "hecedsm")
# categorical variable = factor
str(arithmetic) # Look up data
'data.frame':   45 obs. of  2 variables:
 $ group: Factor w/ 5 levels "control 1","control 2",..: 1 1 1 1 1 1 1 1 1 2 ...
 $ score: num  17 14 24 20 24 23 16 15 24 21 ...
# Compute summary statistics
summary_stat <-
arithmetic |>
  group_by(group) |>
  summarize(mean = mean(score),
            sd = sd(score))
# Create HTML table
knitr::kable(summary_stat,
             digits = 2)
Table 1: Summary statistics (mean and standard deviation) of arithmetic scores per experimental group.
group mean sd
control 1 19.7 4.21
control 2 18.3 3.57
praise 27.4 2.46
reprove 23.4 3.09
ignore 16.1 3.62
# Boxplot with jittered data
ggplot(data = arithmetic,
       aes(x = group,
           y = score)) +
  geom_boxplot() + # box and whiskers plot
  # scatterplot, with horizontal jittering
  geom_jitter(width = 0.3,
              height = 0) +
  theme_bw()
Figure 1: Box and whisker plot with jittered data of arithmetic exam score per experimental group.
# Analysis of variance model - global test of equality of means
model <- lm(score ~ group, data = arithmetic)
# Compute mean for each group with pooled standard error
margmeans <- emmeans(model, specs = "group")
# Compute pairwise differences
contrast(margmeans,
         method = "pairwise",
         adjust = 'none',
         infer = TRUE) |>
  as_tibble() |> # transform to data frame
  filter(contrast == "praise - reprove") |>
  # extract the sole pairwise difference
  knitr::kable(digits = 3)
Table 2: Pairwise differences for praise vs reprove for the arithmetic data.
contrast estimate SE df lower.CL upper.CL t.ratio p.value
praise - reprove 4 1.62 40 0.723 7.28 2.47 0.018
# Manual calculation of p-value
2*pt(q = 2.467, df = 45 - 5, lower.tail = FALSE)
[1] 0.018
# Manual calculation of confidence intervals
df <- model$df.residual # degrees of freedom
sigma <- summary(model)$sigma # standard deviation
se_diff <- sqrt(sigma^2/9 + sigma^2/9) # Standard error of difference in mean
# Difference between praise and reprove means
delta <- diff(predict(model, newdata = data.frame(group = c("reprove", "praise"))))
(ci <- delta + qt(c(0.025,0.975), df = df) * se_diff)
[1] 0.723 7.277