MATH 80667A - Week 13

Count data

Example 1 - Duke and Amir (2023), experiment 2

library(coin)
library(dplyr)
library(gnm)
data(DA23_E2, package = "hecedsm")
tabs <- with(DA23_E2, table(purchased, format))
# Chi-square test for independence
(chisq <- chisq.test(tabs))

    Pearson's Chi-squared test with Yates' continuity correction

data:  tabs
X-squared = 21, df = 1, p-value = 5e-06

Example 2 - Elliot et al. (2021) multilab

data(MULTI21_D1, package = "hecedsm")
# Create a contingency table
(contingency <- xtabs( #pool data
  count ~ age + frequency,
  data = MULTI21_D1))
      frequency
age    never sometimes usually
  5yo     53        80      87
  6yo     28        84     141
  7yo     19        73     177
  10yo    14        40     181
# No correction to get same result as Poisson regression model
(chisqtest <- chisq.test(contingency, correct = FALSE))

    Pearson's Chi-squared test

data:  contingency
X-squared = 87, df = 6, p-value <2e-16
# Two-way table with (I,J) categories
# Here, I=4 (age group) and J=3 (frequency)
MULTI21_D1_long <- MULTI21_D1 |> # pool data by age freq
  dplyr::group_by(age, frequency) |>
  dplyr::summarize(total = sum(count)) # aggregate counts
MULTI21_D1_long |>
  knitr::kable()
Table 1: Count data for Elliot et al. (2021) in long format.
age frequency total
5yo never 53
5yo sometimes 80
5yo usually 87
6yo never 28
6yo sometimes 84
6yo usually 141
7yo never 19
7yo sometimes 73
7yo usually 177
10yo never 14
10yo sometimes 40
10yo usually 181
# Fit Poisson regression
mod_main <- glm(total ~ age + frequency, # null model, no interaction
    family = poisson, data = MULTI21_D1_long)
mod_satur <- glm(total ~ age * frequency, # saturated model
    family = poisson, data = MULTI21_D1_long)
# The saturated model returns the observed counts
isTRUE(all.equal(
    target = predict(mod_satur, type = "response"),
    current = MULTI21_D1_long$total,
    check.attributes = FALSE))
[1] TRUE
# Likelihood ratio test and score tests
# There are general families of testing procedures
anova(mod_main, mod_satur, test = "LRT")  # likelihood ratio test, aka deviance stat
Analysis of Deviance Table

Model 1: total ~ age + frequency
Model 2: total ~ age * frequency
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)    
1         6       85.2                         
2         0        0.0  6     85.2    3e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod_main, mod_satur, test = "Rao")  # Rao score test, aka Pearson chi-square test
Analysis of Deviance Table

Model 1: total ~ age + frequency
Model 2: total ~ age * frequency
  Resid. Df Resid. Dev Df Deviance  Rao Pr(>Chi)    
1         6       85.2                              
2         0        0.0  6     85.2 87.5   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# We can also compute them manually without fitting the saturated model
# The Pearson chi-square stat is the sum of squared Pearson residuals
PearsonX2 <- sum(resid(mod_main, type = "pearson")^2)
pchisq(q = PearsonX2, df = mod_main$df.residual, lower.tail = FALSE)
[1] 1.02e-16
# The likelihood ratio test is obtained from the deviance
pchisq(q = deviance(mod_main),
       df = mod_main$df.residual, lower.tail = FALSE)
[1] 2.95e-16

Example 3 - Bertrand and Mullainathan (2004)

data(BM04_T2, package = "hecedsm")
# Symmetric model with 6 parameters (3 diag + 3 upper triangular)
mod_null <- glm(count ~ gnm::Symm(black, white),
                data = BM04_T2,
                family = poisson)
# Compare the two nested models using a likelihood ratio test
pchisq(deviance(mod_null), lower.tail = FALSE,
       df = mod_null$df.residual) # 9 cells - 6 parameters = 3
[1] 3.25e-06
PearsonX2 <- sum(residuals(mod_null, type = "pearson")^2)
pchisq(PearsonX2, df = mod_null$df.residual, lower.tail = FALSE)
[1] 5.07e-06

Nonparametric tests

Example 1 - Brucks and Levav (2022)

data(BL22_E, package = "hecedsm")
# Two-sample comparison using Mann-Whitney or Wilcoxon rank-sum test
mww <- coin::wilcox_test(
  partner_time ~ cond,
  data = BL22_E,
  conf.int = TRUE)
mww

    Asymptotic Wilcoxon-Mann-Whitney Test

data:  partner_time by cond (f2f, video)
Z = -6, p-value = 1e-10
alternative hypothesis: true mu is not equal to 0
95 percent confidence interval:
 -50.7 -25.9
sample estimates:
difference in location 
                 -37.8 
# The point estimate is Welch's average
# Values and bounds for confidence intervals are times in seconds

# Compare results with two sample t-test
(welch_ttest <- t.test(partner_time ~ cond,
  data = BL22_E,
  conf.int = TRUE))

    Welch Two Sample t-test

data:  partner_time by cond
t = -6, df = 268, p-value = 1e-08
alternative hypothesis: true difference in means between group f2f and group video is not equal to 0
95 percent confidence interval:
 -52.9 -26.5
sample estimates:
  mean in group f2f mean in group video 
               51.7                91.4 

Example 2 - Brodeur et al. (2021)

data(BRLS21_T3, package = "hecedsm")
# Friedman test (more popular, but less powerful than Quade)
friedman <- coin::friedman_test(
  nviolation ~ task | id,
  data = BRLS21_T3)
friedman

    Asymptotic Friedman Test

data:  nviolation by
     task (phone, watch, speaker, texting) 
     stratified by id
chi-squared = 19, df = 3, p-value = 3e-04
# Quade test
quade <- coin::quade_test(
  nviolation ~ task | id,
  data = BRLS21_T3)
quade

    Asymptotic Quade Test

data:  nviolation by
     task (phone, watch, speaker, texting) 
     stratified by id
chi-squared = 22, df = 3, p-value = 8e-05
# Effect size for rank-based tests
eff_size <- effectsize::kendalls_w(
  x = "nviolation",
  groups = "task",
  blocks = "id",
  data = BRLS21_T3)
eff_size |> 
  knitr::kable(digits = 2,
               col.names = c("Kendall's W", "level", "lower CL", "upper CL"))
Table 2: Effect size for rank-based test.
Kendall’s W level lower CL upper CL
0.2 0.95 0.1 1
# Compute ranks separately for each person
BRLS21_T3_rank <- BRLS21_T3 |>
  group_by(id) |>
  mutate(rank = rank(nviolation)) |>
  ungroup()
# Which violation type has the highest rank?
BRLS21_T3_rank |>
  group_by(task) |>
  summarize(mrank = mean(rank)) |>
  knitr::kable(col.names = c("task","mean rank"))
Table 3: Average rank of the number of road safety violations.
task mean rank
phone 2.19
watch 2.19
speaker 2.26
texting 3.35
# So texting leads to more violations
# Transform to wide format - one line per id
smartwatch <- tidyr::pivot_wider(
  data = BRLS21_T3,
  names_from = task,
  values_from = nviolation)

# Wilcoxon signed-rank test for all pairwise differences
tests <- list(
  wilcoxsign_test(phone ~ watch,
                  data = smartwatch),
  wilcoxsign_test(speaker ~ watch,
                  data = smartwatch),
  wilcoxsign_test(phone ~ speaker,
                  data = smartwatch),
  wilcoxsign_test(phone ~ texting,
                  data = smartwatch),
  wilcoxsign_test(texting ~ watch,
                  data = smartwatch),
  wilcoxsign_test(texting ~ speaker,
                  data = smartwatch))
# Extract p-values of tests
(pvals <- sapply(tests, pvalue))
[1] 0.723347 0.767498 0.890557 0.000172 0.000191 0.001344
# Adjust for multiple testing using Holm-Bonferroni
p.adjust(pvals, method = "holm")
[1] 1.00000 1.00000 1.00000 0.00103 0.00103 0.00537
# Only differences with texting are significant (the latter is more distracting)