MATH 80667A - Week 13

Count data

Example 1 - Duke and Amir (2023), experiment 2

library(coin)
library(dplyr)
library(gnm)
data(DA23_E2, package = "hecedsm")
tabs <- with(DA23_E2, table(purchased, format))
# Chi-square test for independence
(chisq <- chisq.test(tabs))


    Pearson's Chi-squared test with Yates' continuity correction

data:  tabs
X-squared = 21, df = 1, p-value = 5e-06

Example 2 - Elliot et al. (2021) multilab

data(MULTI21_D1, package = "hecedsm")
# Create a contingency table
(contingency <- xtabs( #pool data
  count ~ age + frequency,
  data = MULTI21_D1))

      frequency
age    never sometimes usually
  5yo     53        80      87
  6yo     28        84     141
  7yo     19        73     177
  10yo    14        40     181

# No correction to get same result as Poisson regression model
(chisqtest <- chisq.test(contingency, correct = FALSE))


    Pearson's Chi-squared test

data:  contingency
X-squared = 87, df = 6, p-value <2e-16

# Two-way table with (I,J) categories
# Here, I=4 (age group) and J=3 (frequency)
MULTI21_D1_long <- MULTI21_D1 |> # pool data by age freq
  dplyr::group_by(age, frequency) |>
  dplyr::summarize(total = sum(count)) # aggregate counts
MULTI21_D1_long |>
  knitr::kable()

Table 1: Count data for Elliot et al. (2021) in long format.

age	frequency	total
5yo	never	53
5yo	sometimes	80
5yo	usually	87
6yo	never	28
6yo	sometimes	84
6yo	usually	141
7yo	never	19
7yo	sometimes	73
7yo	usually	177
10yo	never	14
10yo	sometimes	40
10yo	usually	181

# Fit Poisson regression
mod_main <- glm(total ~ age + frequency, # null model, no interaction
    family = poisson, data = MULTI21_D1_long)
mod_satur <- glm(total ~ age * frequency, # saturated model
    family = poisson, data = MULTI21_D1_long)
# The saturated model returns the observed counts
isTRUE(all.equal(
    target = predict(mod_satur, type = "response"),
    current = MULTI21_D1_long$total,
    check.attributes = FALSE))

[1] TRUE

# Likelihood ratio test and score tests
# There are general families of testing procedures
anova(mod_main, mod_satur, test = "LRT")  # likelihood ratio test, aka deviance stat

Analysis of Deviance Table

Model 1: total ~ age + frequency
Model 2: total ~ age * frequency
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)    
1         6       85.2                         
2         0        0.0  6     85.2    3e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(mod_main, mod_satur, test = "Rao")  # Rao score test, aka Pearson chi-square test

Analysis of Deviance Table

Model 1: total ~ age + frequency
Model 2: total ~ age * frequency
  Resid. Df Resid. Dev Df Deviance  Rao Pr(>Chi)    
1         6       85.2                              
2         0        0.0  6     85.2 87.5   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# We can also compute them manually without fitting the saturated model
# The Pearson chi-square stat is the sum of squared Pearson residuals
PearsonX2 <- sum(resid(mod_main, type = "pearson")^2)
pchisq(q = PearsonX2, df = mod_main$df.residual, lower.tail = FALSE)

[1] 1.02e-16

# The likelihood ratio test is obtained from the deviance
pchisq(q = deviance(mod_main),
       df = mod_main$df.residual, lower.tail = FALSE)

[1] 2.95e-16

Example 3 - Bertrand and Mullainathan (2004)

data(BM04_T2, package = "hecedsm")
# Symmetric model with 6 parameters (3 diag + 3 upper triangular)
mod_null <- glm(count ~ gnm::Symm(black, white),
                data = BM04_T2,
                family = poisson)
# Compare the two nested models using a likelihood ratio test
pchisq(deviance(mod_null), lower.tail = FALSE,
       df = mod_null$df.residual) # 9 cells - 6 parameters = 3

[1] 3.25e-06

PearsonX2 <- sum(residuals(mod_null, type = "pearson")^2)
pchisq(PearsonX2, df = mod_null$df.residual, lower.tail = FALSE)

[1] 5.07e-06

Nonparametric tests

Example 1 - Brucks and Levav (2022)

data(BL22_E, package = "hecedsm")
# Two-sample comparison using Mann-Whitney or Wilcoxon rank-sum test
mww <- coin::wilcox_test(
  partner_time ~ cond,
  data = BL22_E,
  conf.int = TRUE)
mww


    Asymptotic Wilcoxon-Mann-Whitney Test

data:  partner_time by cond (f2f, video)
Z = -6, p-value = 1e-10
alternative hypothesis: true mu is not equal to 0
95 percent confidence interval:
 -50.7 -25.9
sample estimates:
difference in location 
                 -37.8

# The point estimate is Welch's average
# Values and bounds for confidence intervals are times in seconds

# Compare results with two sample t-test
(welch_ttest <- t.test(partner_time ~ cond,
  data = BL22_E,
  conf.int = TRUE))


    Welch Two Sample t-test

data:  partner_time by cond
t = -6, df = 268, p-value = 1e-08
alternative hypothesis: true difference in means between group f2f and group video is not equal to 0
95 percent confidence interval:
 -52.9 -26.5
sample estimates:
  mean in group f2f mean in group video 
               51.7                91.4

Example 2 - Brodeur et al. (2021)

data(BRLS21_T3, package = "hecedsm")
# Friedman test (more popular, but less powerful than Quade)
friedman <- coin::friedman_test(
  nviolation ~ task | id,
  data = BRLS21_T3)
friedman


    Asymptotic Friedman Test

data:  nviolation by
     task (phone, watch, speaker, texting) 
     stratified by id
chi-squared = 19, df = 3, p-value = 3e-04

# Quade test
quade <- coin::quade_test(
  nviolation ~ task | id,
  data = BRLS21_T3)
quade


    Asymptotic Quade Test

data:  nviolation by
     task (phone, watch, speaker, texting) 
     stratified by id
chi-squared = 22, df = 3, p-value = 8e-05

# Effect size for rank-based tests
eff_size <- effectsize::kendalls_w(
  x = "nviolation",
  groups = "task",
  blocks = "id",
  data = BRLS21_T3)
eff_size |> 
  knitr::kable(digits = 2,
               col.names = c("Kendall's W", "level", "lower CL", "upper CL"))

Table 2: Effect size for rank-based test.

Kendall’s W	level	lower CL	upper CL
0.2	0.95	0.1	1

# Compute ranks separately for each person
BRLS21_T3_rank <- BRLS21_T3 |>
  group_by(id) |>
  mutate(rank = rank(nviolation)) |>
  ungroup()
# Which violation type has the highest rank?
BRLS21_T3_rank |>
  group_by(task) |>
  summarize(mrank = mean(rank)) |>
  knitr::kable(col.names = c("task","mean rank"))

Table 3: Average rank of the number of road safety violations.

task	mean rank
phone	2.19
watch	2.19
speaker	2.26
texting	3.35

# So texting leads to more violations
# Transform to wide format - one line per id
smartwatch <- tidyr::pivot_wider(
  data = BRLS21_T3,
  names_from = task,
  values_from = nviolation)

# Wilcoxon signed-rank test for all pairwise differences
tests <- list(
  wilcoxsign_test(phone ~ watch,
                  data = smartwatch),
  wilcoxsign_test(speaker ~ watch,
                  data = smartwatch),
  wilcoxsign_test(phone ~ speaker,
                  data = smartwatch),
  wilcoxsign_test(phone ~ texting,
                  data = smartwatch),
  wilcoxsign_test(texting ~ watch,
                  data = smartwatch),
  wilcoxsign_test(texting ~ speaker,
                  data = smartwatch))
# Extract p-values of tests
(pvals <- sapply(tests, pvalue))

[1] 0.723347 0.767498 0.890557 0.000172 0.000191 0.001344

# Adjust for multiple testing using Holm-Bonferroni
p.adjust(pvals, method = "holm")

[1] 1.00000 1.00000 1.00000 0.00103 0.00103 0.00537

# Only differences with texting are significant (the latter is more distracting)