An alternative to Pearson R^2

There is a nice online example of four datasets that show exactly the same variable mean AND Pearson R^2 although they look quite different!

I show here an alternative R solution for the correlation ratio eta that may help if you are dealing with such data.

anscombe.R

 1:
 2:
 3:
 4:
 5:
 6:
 7:
 8:
 9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
46:
# http://en.wikipedia.org/wiki/Anscombe%27s_quartet

ansc <- as.array(matrix(c(
10.0,8.04,10.0,9.14,10.0,7.46,8.0,6.58,
8.0,6.95,8.0,8.14,8.0,6.77,8.0,5.76,
13.0,7.58,13.0,8.74,13.0,12.74,8.0,7.71,
9.0,8.81,9.0,8.77,9.0,7.11,8.0,8.84,
11.0,8.33,11.0,9.26,11.0,7.81,8.0,8.47,
14.0,9.96,14.0,8.10,14.0,8.84,8.0,7.04,
6.0,7.24,6.0,6.13,6.0,6.08,8.0,5.25,
4.0,4.26,4.0,3.10,4.0,5.39,19.0,12.50,
12.0,10.84,12.0,9.13,12.0,8.15,8.0,5.56,
7.0,4.82,7.0,7.26,7.0,6.42,8.0,7.91,
5.0,5.68,5.0,4.74,5.0,5.73,8.0,6.89
),nrow=11,ncol=8,byrow=TRUE))
colnames(ansc) <- c("x1","y1","x2","y2","x3","y3","x4","y4")

op  <- par(mfrow=c(2,2),las=2,ps=10)
plot(ansc[,1],ansc[,2])
plot(ansc[,3],ansc[,4])
plot(ansc[,5],ansc[,6])
plot(ansc[,7],ansc[,8])
par(op)

cor(ansc[,1],ansc[,2])
cor(ansc[,3],ansc[,4])
cor(ansc[,5],ansc[,6])
cor(ansc[,7],ansc[,8])

# eta2
# http://www.opensubscriber.com/message/r-help@stat.math.ethz.ch/7023887.html
# http://www2.chass.ncsu.edu/garson/pa765/eta.htm

eta2<-function(col1,col2) {
    col2 <- cut(col2,quantile(col2,seq(0,1,by=.25),na.rm=TRUE),include.lowest=TRUE,na.rm=TRUE)
    col.aov <- aov(col1 ~ col2)
    ss <- anova(col.aov)$"Sum Sq"  
    eta2 = ( ss[1]/sum(ss) )^2
    return(eta2)
}

eta2(ansc[,1],ansc[,2])
eta2(ansc[,3],ansc[,4])
eta2(ansc[,5],ansc[,6])
eta2(ansc[,7],ansc[,8])