MATH 80667A - Week 2

library(hecedsm)
library(dplyr) # data manipulation
library(knitr) # data formatting
library(ggplot2) # grammar of graphics
library(emmeans) # marginal means and contrasts

# Load arithmetic data
data(arithmetic, package = "hecedsm")
# categorical variable = factor
str(arithmetic) # Look up data

'data.frame':   45 obs. of  2 variables:
 $ group: Factor w/ 5 levels "control 1","control 2",..: 1 1 1 1 1 1 1 1 1 2 ...
 $ score: num  17 14 24 20 24 23 16 15 24 21 ...

# Compute summary statistics
summary_stat <-
arithmetic |>
  group_by(group) |>
  summarize(mean = mean(score),
            sd = sd(score))
# Create HTML table
knitr::kable(summary_stat,
             digits = 2)

Table 1: Summary statistics (mean and standard deviation) of arithmetic scores per experimental group.

group	mean	sd
control 1	19.7	4.21
control 2	18.3	3.57
praise	27.4	2.46
reprove	23.4	3.09
ignore	16.1	3.62

# Boxplot with jittered data
ggplot(data = arithmetic,
       aes(x = group,
           y = score)) +
  geom_boxplot() + # box and whiskers plot
  # scatterplot, with horizontal jittering
  geom_jitter(width = 0.3,
              height = 0) +
  theme_bw()

Figure 1: Box and whisker plot with jittered data of arithmetic exam score per experimental group.

# Analysis of variance model - global test of equality of means
model <- lm(score ~ group, data = arithmetic)
# Compute mean for each group with pooled standard error
margmeans <- emmeans(model, specs = "group")
# Compute pairwise differences
contrast(margmeans,
         method = "pairwise",
         adjust = 'none',
         infer = TRUE) |>
  as_tibble() |> # transform to data frame
  filter(contrast == "praise - reprove") |>
  # extract the sole pairwise difference
  knitr::kable(digits = 3)

Table 2: Pairwise differences for praise vs reprove for the arithmetic data.

contrast	estimate	SE	df	lower.CL	upper.CL	t.ratio	p.value
praise - reprove	4	1.62	40	0.723	7.28	2.47	0.018

# Manual calculation of p-value
2*pt(q = 2.467, df = 45 - 5, lower.tail = FALSE)

[1] 0.018

# Manual calculation of confidence intervals
df <- model$df.residual # degrees of freedom
sigma <- summary(model)$sigma # standard deviation
se_diff <- sqrt(sigma^2/9 + sigma^2/9) # Standard error of difference in mean
# Difference between praise and reprove means
delta <- diff(predict(model, newdata = data.frame(group = c("reprove", "praise"))))
(ci <- delta + qt(c(0.025,0.975), df = df) * se_diff)

[1] 0.723 7.277