## Tuesday, March 24, 2009

## Sunday, March 22, 2009

## Friday, March 13, 2009

### Visulization of correlation matrix

- Color Image

data(mtcars)

fit = lm(mpg ~ ., mtcars)

cor = summary(fit, correlation = TRUE)$correlation

cor2 = t(cor[11:1, ])

colors = c("#A50F15", "#DE2D26", "#FB6A4A", "#FCAE91", "#FEE5D9",

"white", "#EFF3FF", "#BDD7E7", "#6BAED6", "#3182BD", "#08519C")

image(1:11, 1:11, cor2, axes = FALSE, ann = F, col = colors)

text(rep(1:11, 11), rep(1:11, each = 11), round(100 * cor2))

- Ellipses

library(ellipse)

col = colors[as.vector(apply(corr, 2, rank))]

plotcorr(cor, col = col, mar = rep(0, 4))

circle.cor = function(cor, axes = FALSE, xlab = "",

ylab = "", asp = 1, title = "Taiyun's cor-matrix circles",

...) {

n = nrow(cor)

par(mar = c(0, 0, 2, 0), bg = "white")

plot(c(0, n + 0.8), c(0, n + 0.8), axes = axes, xlab = "",

ylab = "", asp = 1, type = "n")

##add grid

segments(rep(0.5, n + 1), 0.5 + 0:n, rep(n + 0.5, n + 1),

0.5 + 0:n, col = "gray")

segments(0.5 + 0:n, rep(0.5, n + 1), 0.5 + 0:n, rep(n + 0.5,

n), col = "gray")

##define circles' background color.

##black for positive correlation coefficient and white for negative

bg = cor

bg[cor > 0] = "black"

bg[cor <= 0] = "white" ##plot n*n circles using vector language, suggested by Yihui Xie symbols(rep(1:n, each = n), rep(n:1, n), add = TRUE, inches = F, circles = as.vector(sqrt(abs(cor))/2), bg = as.vector(bg)) text(rep(0, n), 1:n, n:1, col = "red") text(1:n, rep(n + 1), 1:n, col = "red") title(title) } ## an example data(mtcars) fit = lm(mpg ~ ., mtcars) cor = summary(fit, correlation = TRUE)$correlation circle.cor(cor)

The circles with black background denote positive correlation coefficient, and the area of circles denotes the absolute value. See more in my Picasa here.

The above three graphs based on the same data. Dear friends, which gives your more information at first galance?

Labels:
color image,
correlation matrix,
ellipse,
R,
Taiyun circles,
visualization

## Wednesday, March 11, 2009

### Andrews' Curve And Parallel Coordinate Graph

Unison graph and parallel coordinate graph share similar thought in visualising the difference of multidimensional data, thought the former is much more complicated. Based on iris data, we can see their performance.

#----------------------------------------------------------------------

#code of unison graph

x=as.matrix(iris[1:4])

t<-seq(-pi, pi, pi/30) m<-nrow(x); n<-ncol(x) f<-matrix(0, c(m,length(t))) for(i in 1:m){ f[i,]<-x[i,1]/sqrt(2) for( j in 2:n){ if (j%%2==0) f[i,]<-f[i,]+x[i,j]*sin(j/2*t) else f[i,]<-f[i,]+x[i,j]*cos(j%/%2*t) } } plot(c(-pi,pi), c(min(f),max(f)), type="n", main="The Unison graph of Iris", xlab="t", ylab="f(t)") for(i in 1:m) lines(t, f[i,] , col=c("red", "green3", "blue")[unclass(iris$Species[i])]) legend(x=-3,y=15,c('setosa','versicolor', 'virginica'), lty=1,col=c("red", "green3", "blue"))

- Parallel coordinate graph

- Andrews' Curve

#----------------------------------------------------------------------

#code of unison graph

x=as.matrix(iris[1:4])

t<-seq(-pi, pi, pi/30) m<-nrow(x); n<-ncol(x) f<-matrix(0, c(m,length(t))) for(i in 1:m){ f[i,]<-x[i,1]/sqrt(2) for( j in 2:n){ if (j%%2==0) f[i,]<-f[i,]+x[i,j]*sin(j/2*t) else f[i,]<-f[i,]+x[i,j]*cos(j%/%2*t) } } plot(c(-pi,pi), c(min(f),max(f)), type="n", main="The Unison graph of Iris", xlab="t", ylab="f(t)") for(i in 1:m) lines(t, f[i,] , col=c("red", "green3", "blue")[unclass(iris$Species[i])]) legend(x=-3,y=15,c('setosa','versicolor', 'virginica'), lty=1,col=c("red", "green3", "blue"))

### Scatterplots

There are many types of scatterplots in R, here are some examples based on the famous Iris data.

- pairs() and coplot() in package graphics.

- gpairs() in package YaleToolkit.

- scatterplot.matrix() or spm() in package car.

- splom() in package lattice.

Labels:
graphics,
iris,
R,
scatterplot,
visualization

Subscribe to:
Posts (Atom)