Base R Version

Data

Let us begin by simulating our sample data of 3 factor variables and 4 numeric variables.

``````## Simulate some data

## 3 Factor Variables
FacVar1 = as.factor(rep(c("level1", "level2"), 25))
FacVar2 = as.factor(rep(c("levelA", "levelB", "levelC"), 17)[-51])
FacVar3 = as.factor(rep(c("levelI", "levelII", "levelIII", "levelIV"), 13)[-c(51:52)])

## 4 Numeric Vars
set.seed(123)
NumVar1 = round(rnorm(n = 50, mean = 1000, sd = 50), digits = 2)  ## Normal distribution
set.seed(123)
NumVar2 = round(runif(n = 50, min = 500, max = 1500), digits = 2)  ## Uniform distribution
set.seed(123)
NumVar3 = round(rexp(n = 50, rate = 0.001))  ## Exponential distribution
NumVar4 = 2001:2050

simData = data.frame(FacVar1, FacVar2, FacVar3, NumVar1, NumVar2, NumVar3, NumVar4)
``````

One Variable: Numeric Variable

``````plot(simData\$NumVar1, type = "o")  ## Index plot
``````

``````hist(simData\$NumVar1)  ## histogram
``````

``````plot(density(simData\$NumVar1))  ## Kernel density plot
``````

``````boxplot(simData\$NumVar1)  ## box plot
``````

One Variable: Factor Variable

``````plot(simData\$FacVar3)  ## bar plot
``````

``````
## pie chart - Not the best graph --- use with caution
counts = table(simData\$FacVar3)  ## get counts
labs = paste(simData\$FacVar3, counts)  ## create labels
pie(counts, labels = labs)  ## plot
``````

Two Variables: Two Numeric Variables

``````plot(simData\$NumVar1, type = "o", ylim = c(0, max(simData\$NumVar1, simData\$NumVar2)))  ## index plot with one variable
lines(simData\$NumVar2, type = "o", lty = 2, col = "red")  ## add another variable
``````

``````
## Let's draw density plots :
## https://stat.ethz.ch/pipermail/r-help/2006-August/111865.html
dv1 = density(simData\$NumVar1)
dv2 = density(simData\$NumVar2)
plot(range(dv1\$x, dv2\$x), range(dv1\$y, dv2\$y), type = "n", xlab = "NumVar1(red) and NumVar2 (blue)",
ylab = "Density")
lines(dv1, col = "red")
lines(dv2, col = "blue")
``````

``````
## scatterplots
plot(simData\$NumVar1, simData\$NumVar2)
``````

Two Variables: Two Factor Variables

``````## Mosaic plot
plot(table(simData\$FacVar2, simData\$FacVar3))
``````

``````
## barplots
bartable = table(simData\$FacVar2, simData\$FacVar3)  ## get the cross tab
barplot(bartable, beside = TRUE, legend = levels(unique(simData\$FacVar2)))  ## plot
``````

``````barplot(bartable, legend = levels(unique(simData\$FacVar2)))  ## stacked
``````

``````barplot(prop.table(bartable, 2) * 100, legend = levels(unique(simData\$FacVar2)))  ## stacked 100%
``````

Two Variables: One Factor and One Numeric

``````## Box plots for the numeric var over the levels of the factor var
plot(simData\$FacVar1, simData\$NumVar1)
``````

``````
## density plot of numeric var across multiple levels of the factor var
level1 = simData[simData\$FacVar1 == "level1", ]
level2 = simData[simData\$FacVar1 == "level2", ]

dv3 = density(level1\$NumVar1)
dv4 = density(level2\$NumVar1)

plot(range(dv3\$x, dv4\$x), range(dv3\$y, dv4\$y), type = "n", xlab = "NumVar1 at Level1 (red) and NumVar1 at Level2 (blue)",
ylab = "Density")
lines(dv3, col = "red")
lines(dv4, col = "blue")
``````

``````
## Mean of one numeric var over levels of one factor var
meanagg = aggregate(simData\$NumVar1, list(simData\$FacVar3), mean)

dotchart(meanagg\$x, labels = meanagg\$Group.1)  ## Dot Chart
``````

``````barplot(meanagg\$x, names.arg = meanagg\$Group.1)  ## Bar plot
``````

``````## Question: Is a bar plot even appropriate when displaying a mean--- a
## point?
``````

Three Variables: Three Factor Variables

``````par(mfrow = c(1, 2))

bar1table = table(level1\$FacVar2, level1\$FacVar3)
barplot(bar1table, beside = TRUE, main = "FacVar1=level1")

bar2table = table(level2\$FacVar2, level2\$FacVar3)
barplot(bar2table, beside = TRUE, main = "FacVar1=level2", legend = levels(unique(level2\$FacVar2)))
``````

Three Variables: One Numeric and Two Factor Variables

``````par(mfrow = c(1, 1))
## boxplot of NumVar1 over an interaction of 6 levels of the combination of
## FacVar1 and FacVar2
boxplot(NumVar1 ~ interaction(FacVar1, FacVar2), data = simData)
``````

``````
## Mean of 1 Numeric over levels of two factor vars
meanaggg = aggregate(simData\$NumVar1, list(simData\$FacVar1, simData\$FacVar2),
mean)
meanaggg = meanaggg[order(meanaggg\$Group.1), ]
meanaggg\$color[meanaggg\$Group.2 == "levelA"] = "red"
meanaggg\$color[meanaggg\$Group.2 == "levelB"] = "blue"
meanaggg\$color[meanaggg\$Group.2 == "levelC"] = "darkgreen"

dotchart(meanaggg\$x, labels = meanaggg\$Group.2, groups = meanaggg\$Group.1, color = meanaggg\$color)  ## dotchart
``````

``````
interaction.plot(meanaggg\$Group.2, meanaggg\$Group.1, meanaggg\$x, type = "b",
col = c(1:2), pch = c(18, 24))  ## interaction plot - line plots of means
``````

``````
## some a bar plot
par(mfrow = c(1, 2))

level1 = meanaggg[meanaggg\$Group.1 == "level1", ]
level2 = meanaggg[meanaggg\$Group.1 == "level2", ]

barplot(level1\$x, names.arg = level1\$Group.2, main = "FacVar1=level1")
barplot(level2\$x, names.arg = level2\$Group.2, main = "FacVar1=level2")
``````

Three Variables: Two Numeric and One Factor Variables

``````## Scatter plot with color identifying the factor variable
par(mfrow = c(1, 1))
plot(simData\$NumVar1, simData\$NumVar2, col = simData\$FacVar1)
legend("topright", levels(simData\$FacVar1), fill = simData\$FacVar1)
``````

Three Variables: Three Numeric Variables

``````## NumVar4 is 2001 through 2050... possibly, a time variable - use that as
## the x-axis
plot(simData\$NumVar4, simData\$NumVar1, type = "o", ylim = c(0, max(simData\$NumVar1,
simData\$NumVar2)))  ## join dots with lines

lines(simData\$NumVar4, simData\$NumVar2, type = "o", lty = 2, col = "red")  ## add another line
``````

``````
## Bubble plot - scatter plot of NumVar1 and NumVar2 with individual
## observations sized by NumVar3
## http://flowingdata.com/2010/11/23/how-to-make-bubble-charts/
``````pairs(simData[, 4:7], col = simData\$FacVar1)