STAT 3690 Lecture 19
Hello, dear friend, you can consult us at any time if you have any questions, add WeChat: daixieit
STAT 3690 Lecture 19
2022
Multivariate influence measures
● Hat/projection matrix H = [hij ]n×n = X(XT X)_ 1 XT
- <hij < _ 1
● Yˆ = HY
- the ith row of Yˆ: Yˆi . = j=1 hij Yj . = hiiYi + j i hij Yj .
● Leverage: the influence of observation Yi . on Yˆi .
- Observation Yi . is said to have a high leverage if hii is large compared to the other elements on the diagonal of H.
● (Externally) Studentized residuals
Ti2 = i(T)ΣL(_)S(1), (i)i .
- i(T): the ith row of = (I _ H)Y
- T(i) . : remaining part of with row i removed
- ΣLS , (i) = (n _ q _ 2)_ 1 T(i) . (i) . : LS estimator of Σ where we have removed row i from the residual
- Observation Yi may be considered as a potential outlier if
Ti2 > n _ p _ q _ 1 F1 _α,p,n _q _2
* F1 _α,p,n _q _2 : the 1 _ α quantile of F (p, n _ q _ 2)
● (Multivariate) Cook’s distance
Di = i(T)ΣL(_)S(1)i .
- The Cut-off is far from unique even for univariate linear regression (p = 1)
- Pay attention to a small set of observations that has substantially higher values than the remaining observations
install.packages(c ("car" , "EnvStats"))
options(digits = 4)
tear <- c (
6.5 , 6.2 , 5.8 , 6.5 , 6.5 , 6.9 , 7.2 , 6.9 , 6.1 , 6.3 ,
6.7 , 6.6 , 7.2 , 7.1 , 6.8 , 7.1 , 7.0 , 7.2 , 7.5 , 7.6
)
gloss <- c (
9.5 , 9.9 , 9.6 , 9.6 , 9.2 , 9.1 , 10.0 , 9.9 , 9.5 , 9.4 ,
9.1 , 9.3 , 8.3 , 8.4 , 8.5 , 9.2 , 8.8 , 9.7 , 10.1 , 9.2
)
opacity <- c (
4.4 , 6.4 , 3.0 , 4.1 , 0.8 , 5.7 , 2.0 , 3.9 , 1.9 , 5.7 ,
2.8 , 4.1 , 3.8 , 1.6 , 3.4 , 8.4 , 5.2 , 6.9 , 2.7 , 1.9
)
n = length(opacity)
rate <- factor(gl(2 ,10 ,length=n), labels=c ( "Low" , "High"))
additive <- factor(gl(2 ,5 ,length=n), labels=c ( "Low" , "High"))
(plastic = data.frame(tear=tear, gloss=gloss, opacity=opacity,
rate=rate, additive=additive))
fit0 <- lm(cbind(tear, gloss, opacity) ~ rate*additive, data = plastic)
resids <- residuals (fit0)
← Eever宁ge
X <- model.matrix (fit0)
H <- X %*% solve(crossprod(X)) %*% t (X)
Hii = diag (H)
hist (Hii, 50)
← cgtern宁lly atuaent4zea res4au宁ls
n <- nrow (X)
p = ncol(resids)
T_square = numeric(n)
for (i in 1:n){
SigmaHatLS_i <- crossprod(resids[-i,])/(n-1-ncol (X))
T_square[i] = t(resids[i,]) %*% solve(SigmaHatLS_i) %*% resids[i,]
}
hist(T_square, 50)
which(T_square > p*(n-1-ncol (X))/(n-p-ncol (X))*qchisq(.95, p, n-1-ncol (X)))
← Book a4st宁nce
SigmaHatLS <- crossprod(resids)/(n - ncol (X))
cook_values <- Hii/((1 - Hii)ˆ2*ncol (X)) * diag(resids %*% solve(SigmaHatLS) %*% t(resids)) hist(cook_values, 50)
which(cook_values>0.4)
Normality of residuals
● Apply techniques in Lecture 7 to checking the normality of residuals
● Apply Box-Cox transformation to colums of Y
install.packages(c ("car" , "EnvStats"))
options(digits = 4)
tear <- c (
6.5 , 6.2 , 5.8 , 6.5 , 6.5 , 6.9 , 7.2 , 6.9 , 6.1 , 6.3 ,
6.7 , 6.6 , 7.2 , 7.1 , 6.8 , 7.1 , 7.0 , 7.2 , 7.5 , 7.6
)
gloss <- c (
9.5 , 9.9 , 9.6 , 9.6 , 9.2 , 9.1 , 10.0 , 9.9 , 9.5 , 9.4 ,
9.1 , 9.3 , 8.3 , 8.4 , 8.5 , 9.2 , 8.8 , 9.7 , 10.1 , 9.2
)
opacity <- c (
4.4 , 6.4 , 3.0 , 4.1 , 0.8 , 5.7 , 2.0 , 3.9 , 1.9 , 5.7 ,
2.8 , 4.1 , 3.8 , 1.6 , 3.4 , 8.4 , 5.2 , 6.9 , 2.7 , 1.9
)
n = length(opacity)
rate <- factor(gl(2 ,10 ,length=n), labels=c ( "Low" , "High"))
additive <- factor(gl(2 ,5 ,length=n), labels=c ( "Low" , "High"))
(plastic = data.frame(tear=tear, gloss=gloss, opacity=opacity,
rate=rate, additive=additive))
fit0 <- lm(cbind(tear, gloss, opacity) ~ rate*additive, data = plastic)
← Lorm宁l N#N plots of res4au宁ls
res = residuals (fit0)
name = colnames(res)
op <- par (mfrow = c (2 ,2),
oma = c (5 ,4 ,0 ,0),
mar = c ( 1 , 1 ,2 ,2))
for (i in 1:ncol(res)){
car::qqPlot (res[,i], main = name[i], id = F)
}
title(xlab = "Normal quantiles" ,
ylab = "Sample quantiles" ,
outer = TRUE , line = 3)
par (op)
← -og#Bog tr宁nsform宁t4on
fit1 = lm(tear ~ rate*additive, data = plastic)
fit2 = lm(gloss ~ rate*additive, data = plastic)
fit3 = lm(opacity ~ rate*additive, data = plastic)
(lambda1 = EnvStats::boxcox (fit1 , optimize=T, lambda=c (-10 ,10))$lambda) plastic$tear.new = (plastic$tear ˆlambda1-1)/lambda1
(lambda2 = EnvStats::boxcox (fit2 , optimize=T, lambda=c (- 10 , 10))$lambda) plastic$gloss.new = (plastic$gloss ˆlambda2-1)/lambda2
(lambda3 = EnvStats::boxcox (fit3 , optimize=T, lambda=c (-10 ,10))$lambda) plastic$opacity.new = (plastic$opacity ˆlambda3-1)/lambda3
fit0.new <- lm(cbind(tear.new , gloss.new , opacity.new) ~ rate*additive, data = plastic)
2022-03-18