#####
##### NMST440: Advanced Aspects of the R Environment
#####
##### ---------------------------------------------------------------------
#####
##### Graphics, cont'd
##### - palettes use in all sorts of plots based on categorical data
#####
##### ---------------------------------------------------------------------
#####
##### Arnošt Komárek
##### https://www2.karlin.mff.cuni.cz/~komarek
##### komarek@karlin.mff.cuni.cz
#####
##### ======================================================================
rm(list = ls())
ROOT <- "/home/komarek/teach/mff_2021/nmst440_AdvRko/Tutorial08/"
setwd(ROOT)
##### ======================================================================
##### If categorical variables are involved, different colors
##### are often used on plots to distinguish different categories.
##### Especially with increasing age of the analyst, it becomes more and more
##### difficult to propose needed number of colors which, moreover,
##### will look at least a bit aesthetic and will increase
##### an informativeness of the plot rather than to decrease it.
#####
##### Alternative to thinking about particular color is to use
##### a predefined palette of colors from which needed number
##### of colors is picked up.
#####
##### ======================================================================
##### The first illustration below is done using data
##### from the Parlament elections in 2021 in the Czech Republic
### Data: Poslanecká sněmovna Parlamentu České republiky elections 2021
###
### ***********************************************************************
### Votes by regions (kraje)
### -------------------------
Votes <- read.table(paste(ROOT, "/Data/volby_PSPCR_2021.csv", sep = ""), header = TRUE, sep = ";")
print(Votes)
rownames(Votes) <- Votes[, "Party"]
Votes <- Votes[-c(23, 24),] ## Platné hlasy a počty okrsků
#
Votes <- Votes[order(Votes[, "Sum"], decreasing = TRUE),]
print(Votes)
Votes <- Votes[, -1] ## Strana
#
Other <- c("Zelení", "Švýcar. demokr.", "Volný blok", "PB", "ANS", "APB", "PRAMENY", "Levice", "SENIOŘI", "Nevolte Urza.cz", "Monarchiste.cz", "MZH", "OtČe", "Moravané")
(VotesOther <- apply(Votes[Other,], 2, sum))
#
Votes <- Votes[rownames(Votes)[!(rownames(Votes) %in% Other)],]
Votes <- rbind(Votes, VotesOther)
rownames(Votes)[9] <- "Jiná"
print(Votes)
Votes <- Votes[c(1, 3, 2, 4:9),] ## reorder rows to get government parties first, then opposition and non-parliament parties
print(Votes)
### Table for plotting
votes <- as.table(t(Votes[, -1]))
print(votes)
### columns = parties
### rows = regions, letter is the same as on the car registration plate
### Seats in Poslanecká sněmovna PČR (final state after recalculation using some rules given by the law)
### Trojspolek and PirSTAN divided into single parties
### ---------------------------------------------------------------------------------------------------------------
seats <- structure(c(34, 23, 14, 33, 4, 72, 20), .Names = c("ODS", "KDU-ČSL", "TOP09", "STAN", "Piráti", "ANO", "SPD"))
print(seats)
save(list = c("Votes", "votes", "seats"), file = paste(ROOT, "/Data/volby_PSPCR_2021.RData", sep = ""))
### Motivation: graphical representation of (conditional) relative frequencies
### +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
prop.table(seats)
print(votes)
pvotes <- prop.table(votes, margin = 1)
print(round(pvotes, 3)) ## proportions by region
### Graphical representation
### --------------------------
pie(seats, clockwise = TRUE) ## for journalists
barplot(seats) ## better for statistics...
plot(votes, main = "Votes by region")
### World of colors
### +++++++++++++++++++
### Function to draw a palette of given colors
### -----------------------------------------------------
###
### This is just a technical function used below to show
### selected colors. There is no need to explore this function
### in much details.
###
pal2 <- function(col, border = "light gray", labels, font = 2, ...){
n <- length(col)
plot(0, 0, type="n", xlim = c(0, 1), ylim = c(0, 1), axes = FALSE, xlab = "", ylab = "", ...)
rect(0, (n-1):0/n, 1, n:1/n, col = col, border = border)
if (!missing(labels)){
if (length(labels) != length(col)) stop("inconsistent col and labels")
text(0.5, (length(col):1 - 1)/n, labels = labels, pos = 3, font = font)
}
}
### Selected set of colors
### ------------------------
### Analyst was thinking really hard and decided to use the following set of colors
### to represent Czech political parties
PALparties1 <- c("dodgerblue1", "grey20", "mediumvioletred", "tomato", "yellow", "orangered", "red2", "olivedrab", "grey80")
names(PALparties1) <- colnames(votes)
print(PALparties1)
pal2(PALparties1, labels = names(PALparties1))
#
PALparties1b <- c("dodgerblue1", "yellow", "red3", "chocolate4", "grey20", "mediumvioletred", "tomato")
names(PALparties1b) <- names(seats)
print(PALparties1b)
pal2(PALparties1b, labels = names(PALparties1b))
### Selected colors can now be used in a piechart
pie(seats, clockwise = TRUE, col = PALparties1b, radius = 1)
### Some increase of information provided by the piechart
pie(seats, clockwise = TRUE, col = PALparties1b, radius = 1, labels = paste(names(seats), " (", seats, ")", sep = ""))
### And a "fancy" piechart provided by a function pie3D from package plotrix (must be installed)
plotrix::pie3D(seats, col = PALparties1b, radius = 1, labels = paste(names(seats), " (", seats, ")", sep = ""))
### And finally a plot more useful to support
### statistical thinking
plot(votes, col = PALparties1, main = "Votes by region")
plot(votes[, 9:1], col = PALparties1[9:1], main = "Votes by region") ## better?
### -> Do you expect independence of election preferences on region?
### Do we always have to think about colors?
### Is it necessary to be restricted to the grey scale?
### Solution: prespecified algorithm to get a set of colors --> palette
### Uniform grey palette
### ++++++++++++++++++++++
grey(seq(0, 1, length = 5)) ### 5 colors (their hexadecimal RGB codes) from a grey palette
(grid <- seq(0, 1, length = 9))
grey(grid)
pal2(grey(grid), labels = paste("grey", round(grid, 2), sep = " "))
PALparties0 <- grey(grid) ### provided hexadecimal RGB codes can be used (see below)
### as a 'col' argument in plotting function
plot(votes, col = PALparties0, main = "Votes by region")
### Colored palettes from the standard grDevices package
### +++++++++++++++++++++++++++++++++++++++++++++++++++++++
###
### Similarly, palettes rainbow, heat.colors etc. work
rainbow(9)
pal2(rainbow(9), labels = colnames(votes))
plot(votes, col = rainbow(9), main = "Votes by region")
pal2(heat.colors(9), labels = colnames(votes))
plot(votes, col = heat.colors(9), main = "Votes by region")
pal2(terrain.colors(9), labels = colnames(votes))
plot(votes, col = terrain.colors(9), main = "Votes by region")
pal2(topo.colors(9), labels = colnames(votes))
plot(votes, col = topo.colors(9), main = "Votes by region")
pal2(cm.colors(9), labels = colnames(votes))
plot(votes, col = cm.colors(9), main = "Votes by region")
### "Nicer" palettes are described in two scientific papers by Zeileis, Hornik & [Murrell, Meyer].
###
### Zeileis A., Hornik K., Murrell P (2009). Escaping RGBland: Selecting Colors for Statistical
### Graphics. Computational Statistics & Data Analysis, 53, 3259-3270. doi:10.1016/j.csda.2008.11.033.
### Zeileis A., Meyer D., Hornik K. (2007). Residual-Based Shadings for Visualizing (Conditional)
### Independence. Journal of Computational and Graphical Statistics, 16(3), 507–525. doi: 10.1198/106186007X237856.
###
### The papers also explain rationale behind the palettes and also explain which palette
### should be used for certain class of statistical plots.
###
### Their palettes are implemented in the R package 'colorspace' (must be installed from CRAN)
###
### ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
library("colorspace")
rainbow_hcl(9)
pal2(rainbow_hcl(9), labels = colnames(votes))
pal2(heat_hcl(9), labels = colnames(votes))
pal2(terrain_hcl(9), labels = colnames(votes))
### Useful for ordinal variable
pal2(sequential_hcl(9), labels = 1:9)
### Useful for ordinal variable with a neutral value
pal2(diverge_hcl(9)[9:1], labels = 4:(-4))
### Possibility to set hue ("odstín"), chroma, luminance (jas), ...
pal2(heat_hcl(9))
pal2(heat_hcl(9, c. = c(80, 30), l = c(30, 90), power = c(1/5, 2)))
pal2(heat_hcl(9, c. = c(80, 30), l = c(10, 90), power = c(1/5, 2)))
pal2(heat_hcl(9, c. = c(80, 30), l = c(60, 90), power = c(1/5, 2)))
### Application to Poslanecká sněmovna Parlamentu ČR elections
### ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
### Some colors for votes
PALparties1 <- c("dodgerblue1", "grey20", "mediumvioletred", "tomato", "yellow", "orangered", "red2", "olivedrab", "grey80")
PALparties2 <- rainbow(9)[9:1]
PALparties3 <- rainbow_hcl(9, c = 60, l = 75)
names(PALparties1) <- names(PALparties2) <- names(PALparties3) <- colnames(votes)
par(mfrow = c(1, 3))
pal2(PALparties1, labels = names(PALparties1))
pal2(PALparties2, labels = names(PALparties2))
pal2(PALparties3, labels = names(PALparties3))
par(mfrow = c(1, 1))
### Some colors for seats
PALparties1b <- c("dodgerblue1", "yellow", "red3", "chocolate4", "grey20", "mediumvioletred", "tomato")
PALparties2b <- rainbow(7)[7:1]
PALparties3b <- rainbow_hcl(7, c = 60, l = 75)
names(PALparties1b) <- names(PALparties2b) <- names(PALparties3b) <- names(seats)
par(mfrow = c(1, 3))
pal2(PALparties1b, labels = names(PALparties1b))
pal2(PALparties2b, labels = names(PALparties2b))
pal2(PALparties3b, labels = names(PALparties3b))
par(mfrow = c(1, 1))
### Piechart of seats
pie(seats, clockwise = TRUE, col = PALparties1b, radius = 1)
pie(seats, clockwise = TRUE, col = PALparties2b, radius = 1)
pie(seats, clockwise = TRUE, col = PALparties3b, radius = 1)
pie(seats, clockwise = TRUE, col = PALparties1b, radius = 1, labels = paste(names(seats), " (", seats, ")", sep = ""))
plotrix::pie3D(seats, col = PALparties1b, radius = 1, labels = paste(names(seats), " (", seats, ")", sep = ""))
### Proportions of votes by region
round(prop.table(votes, margin = 1), 3)
plot(votes, main = "Votes by region")
plot(votes, col = PALparties1, main = "Votes by region")
plot(votes, col = PALparties2, main = "Votes by region")
plot(votes[, 9:1], col = PALparties2[9:1], main = "Votes by region")
plot(votes, col = PALparties3, main = "Votes by region")