As usual, please send any questions you have about using SAS or R.
*Not valid in the southern hemisphere.
library(sas7bdat)
helpfromSAS = read.sas7bdat("http://www.math.smith.edu
/sasr/datasets/help.sas7bdat")
> is.data.frame(helpfromSAS)
[1] TRUE
> summary(helpfromSAS$MCS)
Min. 1st Qu. Median Mean 3rd Qu. Max.
6.763 21.680 28.600 31.680 40.940 62.180
> with(helpfromSAS, summary(SUBSTANCE))
alcohol cocaine heroin
177 152 124

ds = read.csv("http://www.math.smith.edu/r/data/help.csv")
library(mosaic)
ds$gender = ifelse(ds$female==1, "female", "male")
ds$homeless = ifelse(ds$homeless==1, "homeless", "housed")
tab = xtabs(~ gender + homeless, data=ds)
> tab
homeless
gender homeless housed
female 40 67
male 169 177
> xchisq.test(tab)
Pearson's Chi-squared test with Yates' continuity correction
data: tab
X-squared = 3.8708, df = 1, p-value = 0.04913
40.00 67.00
( 49.37) ( 57.63)
[1.78] [1.52]
<-1.33> < 1.23>
169.00 177.00
(159.63) (186.37)
[0.55] [0.47]
< 0.74> <-0.69>
key:
observed
(expected)
[contribution to X-squared]
par(mfrow=c(1,2))
mosaicplot(tab, color=TRUE, main="mosaic plot")
assocplot(tab)
title("association plot")
title 'Install mosaic modules';
* location of the zipped files;
filename mosaic 'c:\ken\sasmacros\mosaics';
* storage location of compiled macros;
libname mosaic 'c:\ken\sasmacros\mosaics';
* Code to read in, compile and store the macros;
proc iml ;
reset storage=mosaic.mosaic;
%include mosaic(mosaics) ;
store module=_all_;
show storage;
quit;
* Prep: create the table, save the cell counts;
proc freq data = "c:\book\help.sas7bdat";
tables homeless * female / out=outhelp;
run;
* Read in the wrapper macro;
%include "c:\ken\sasmacros\mosaics\mosaic.sas";
* Make the plot;
%mosaic(data=outhelp,var = female homeless,
sort=homeless descending female, space = 1 1);

proc sgplot data=mvnorms;
scatter x=x1 y=x2 / markerattrs=(symbol=CircleFilled size = .05in)
transparency=0.85;
run;
proc kde data=mvnorms;
bivar x1 x2 / plots=contour;
run;

plot(xvals[,1], xvals[,2], pch=19, col="#00000022", cex=0.1)

smoothScatter(xvals[,1], xvals[,2])

There are many reasons for using hexagons, at least over squares. Hexagons have symmetry of nearest neighbors which is lacking in square bins. Hexagons are the maximum number of sides a polygon can have for a regular tesselation of the plane, so in terms of packing a hexagon is 13% more efficient for covering the plane than squares. This property translates into better sampling efficiency at least for elliptical shapes. Lastly hexagons are visually less biased for displaying densities than other regular tesselations.
library(MASS)
library(hexbin)
mu = c(1, -1)
Sigma = matrix(c(3, 2,
2, 5), nrow=2)
xvals = mvrnorm(10000, mu, Sigma)
Sigma[1,2]/sqrt(Sigma[1,1]*Sigma[2,2]) # correlation
plot(hexbin(xvals[,1], xvals[,2]), xlab="X1", ylab="X2")
data Sigma (type=cov);
infile cards;
input _type_ $ _Name_ $ x1 x2;
cards;
cov x1 3 2
cov x2 2 5
;
run;
proc simnormal data=Sigma out=mvnorms numreal = 10000;
var x1 x2;
run;
%twodhist(data=mvnorms,x=x1,y=x2,nbinsx=30,nbinsy=30,nshades=9);
