資料視覺化3

林嶔 (Lin, Chin)

Lesson 10

第一節:結合地理資訊的視覺化(1)

– Google地圖是我們最常用的地圖,他包含了街道圖/衛星圖等不同圖資,在R裡面我們可以透過套件輕鬆的與Google地圖作結合。

– 請至這裡下載登革熱流行的範例資料。

dat = read.csv("data10_1.csv", header = TRUE, fileEncoding = 'CP950')
head(dat)
##       發病日 個案研判日     通報日 性別 年齡層 居住縣市 居住鄉鎮 居住村里
## 1 1998/01/02            1998/01/07   男  40-44   屏東縣   屏東市         
## 2 1998/01/03            1998/01/14   男  30-34   屏東縣   東港鎮         
## 3 1998/01/13            1998/02/18   男  55-59   宜蘭縣   宜蘭市         
## 4 1998/01/15            1998/01/23   男  35-39   高雄市   苓雅區         
## 5 1998/01/20            1998/02/04   男  55-59   宜蘭縣   五結鄉         
## 6 1998/01/22            1998/02/19   男  20-24   桃園市   蘆竹區         
##      最小統計區 最小統計區中心點X 最小統計區中心點Y   一級統計區 二級統計區
## 1 A1320-0136-00          120.5059          22.46425 A1320-04-008   A1320-04
## 2 A1303-0150-00          120.4536          22.46639 A1303-09-007   A1303-09
## 3 A0201-0449-00          121.7514          24.74922 A0201-23-006   A0201-23
## 4 A6408-0153-00          120.3381          22.63032 A6408-10-010   A6408-10
## 5 A0209-0232-00          121.7983          24.68457 A0209-10-005   A0209-10
## 6 A0305-0543-00          121.2965          25.04431 A0305-36-002   A0305-36
##   感染縣市 感染鄉鎮 感染村里 是否境外移入 感染國家 確定病例數
## 1                                      否                   1
## 2                                      是                   1
## 3                                      是                   1
## 4                                      否                   1
## 5                                      否                   1
## 6                                      是                   1
library(RgoogleMaps)

第一節:結合地理資訊的視覺化(2)

– 注意,你需要準備API Key:

lat = c(22.88751, 23.41373)
lon = c(120.023, 120.6562)
center = c(mean(lat), mean(lon))
zoom = min(MaxZoom(range(lat), range(lon)))

MyMap = GetMap(center = center, zoom = zoom, API_console_key = 'AIzaSyA4DVFtF70aXE7RgrXViy2z5Ku2pMkVxFI')
PlotOnStaticMap(MyMap)

MyMap2 = GetMap(center = center, zoom = zoom, maptype = "satellite", API_console_key = 'AIzaSyA4DVFtF70aXE7RgrXViy2z5Ku2pMkVxFI')
PlotOnStaticMap(MyMap2)

第一節:結合地理資訊的視覺化(3)

dat[,1] = as.Date(dat[,1])
subdat = dat[dat[,1] <= as.Date("2015-09-30") & dat[,1] >= as.Date("2015-09-01") & dat[,6] == "台南市",]
nrow(subdat)
## [1] 12815

– 我們可以透過函數「PlotOnStaticMap()」把點放到MyMap上面

PlotOnStaticMap(MyMap, lat = subdat$最小統計區中心點Y, lon = subdat$最小統計區中心點X, pch = 19, col = "red", cex = 1)

第一節:結合地理資訊的視覺化(4)

x1 <- subdat$最小統計區中心點Y
x2 <- subdat$最小統計區中心點X
df <- data.frame(x1,x2)

## Use densCols() output to get density at each point
x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white")))
df$dens <- col2rgb(x)[1,] + 1L

## Map densities to colors
cols <-  colorRampPalette(c("#000099", "#00FEFF", "#45FE4F", 
                            "#FCFF00", "#FF9400", "#FF3100"))(256)
df$col <- cols[df$dens]

PlotOnStaticMap(MyMap, lat = df$x1, lon = df$x2, pch = 19, col = df$col, cex = 1.5)

第一節:結合地理資訊的視覺化(5)

mapdat = read.table("TWmap/TWmap/鄉村資料.txt", fileEncoding = "UTF-8")
head(mapdat)
##   objectid_1 village town county village_id  town_id county_id     tv_all
## 0          1  中山里 東區 嘉義市        061 10020010     10020 東區中山里
## 1          2  後驛里 西區 嘉義市        064 10020020     10020 西區後驛里
## 2          3  東川里 東區 嘉義市        038 10020010     10020 東區東川里
## 3          4  番社里 西區 嘉義市        061 10020020     10020 西區番社里
## 4          5  中央里 東區 嘉義市        062 10020010     10020 東區中央里
## 5          6  東興里 東區 嘉義市        060 10020010     10020 東區東興里
##       villcode ori_villag     area    max_x   max_y    min_x   min_y        x
## 0 A2001-061-00       NULL 230686.8 195185.2 2598108 194071.2 2597692 194698.8
## 1 A2002-064-00       NULL 363016.4 193222.2 2598067 192319.2 2597159 192755.7
## 2 A2001-038-00       NULL 630623.2 196096.4 2598009 194992.9 2596974 195507.5
## 3 A2002-061-00       NULL 170099.2 193546.0 2597889 192872.0 2597386 193249.6
## 4 A2001-062-00       NULL 194986.9 194170.0 2597852 193726.8 2597264 193954.0
## 5 A2001-060-00       NULL 215453.5 195207.0 2597837 194114.4 2597456 194621.2
##         y         v_id sort id countyname townname objectid orig_fid shape_leng
## 0 2597889 10020010-061   20  0       NULL     NULL        0        0          0
## 1 2597628 10020020-064   20  1       NULL     NULL        0        0          0
## 2 2597570 10020010-038   20  2       NULL     NULL        0        0          0
## 3 2597626 10020020-061   20  3       NULL     NULL        0        0          0
## 4 2597550 10020010-062   20  4       NULL     NULL        0        0          0
## 5 2597662 10020010-060   20  5       NULL     NULL        0        0          0
##   shape_le_1 shape_area
## 0   2734.726   230686.8
## 1   2617.877   363016.4
## 2   3891.870   630623.2
## 3   2021.745   170099.2
## 4   1812.928   194986.9
## 5   2658.314   215453.5
Tainan.mapdat = mapdat[mapdat[,4] == "臺南市",]
nrow(Tainan.mapdat)
## [1] 752
MysubMap = GetMap(center = center, zoom = zoom, maptype = "satellite", API_console_key = 'AIzaSyA4DVFtF70aXE7RgrXViy2z5Ku2pMkVxFI')
PlotOnStaticMap(MysubMap)

for (i in 1:nrow(Tainan.mapdat)) {
  linedat = read.csv(paste0("TWmap/TWmap/編號", Tainan.mapdat[i,1], ".csv"), header = TRUE, fileEncoding = 'CP950')
  PlotOnStaticMap(MysubMap, lat = linedat[,2], lon = linedat[,1], FUN = lines, add = TRUE, col = "red")
}

MysubMap = GetMap(center = center, zoom = zoom, maptype = "satellite", API_console_key = 'AIzaSyA4DVFtF70aXE7RgrXViy2z5Ku2pMkVxFI')
PlotOnStaticMap(MysubMap)

for (i in 1:nrow(Tainan.mapdat)) {
  linedat = read.csv(paste0("TWmap/TWmap/編號", Tainan.mapdat[i,1], ".csv"), header = TRUE, fileEncoding = 'CP950')
  PlotOnStaticMap(MysubMap, lat = linedat[,2], lon = linedat[,1], FUN = lines, add = TRUE, col = "red")
}

PlotOnStaticMap(MysubMap, lat = df$x1, lon = df$x2, pch = 19, col = df$col, add = TRUE)

第一節:結合地理資訊的視覺化(6)

lat = c(22.88751, 23.41373)
lon = c(120.023, 120.6562)
plot.new()
plot.window(xlim = lon, ylim = lat)

for (i in 1:nrow(Tainan.mapdat)) {
  linedat = read.csv(paste0("TWmap/TWmap/編號", Tainan.mapdat[i,1], ".csv"), header = TRUE, fileEncoding = 'CP950')
  polygon(linedat[,1], linedat[,2], col = "white")
}

lat = c(22.88751, 23.41373)
lon = c(120.023, 120.6562)
plot.new()
plot.window(xlim = lon, ylim = lat)

for (i in 1:nrow(Tainan.mapdat)) {
  linedat = read.csv(paste0("TWmap/TWmap/編號", Tainan.mapdat[i,1], ".csv"), header = TRUE, fileEncoding = 'CP950')
  n.sample = sum(subdat[,16] == as.character(Tainan.mapdat[i,2]))
  if (n.sample == 0) {COL = "#FFFFFF"} 
  else if (n.sample <= 3) {COL = "#000099"} 
  else if (n.sample <= 10) {COL = "#00FEFF"} 
  else if (n.sample <= 30) {COL = "#45FE4F"} 
  else if (n.sample <= 50) {COL = "#FCFF00"} 
  else if (n.sample <= 100) {COL = "#FF9400"} 
  else {COL = "#FF3100"} 
  polygon(linedat[,1], linedat[,2], col = COL)
}

第一節:結合地理資訊的視覺化(7)

lat = c(22.88751, 23.41373)
lon = c(120.023, 120.6562)
plot.new()
plot.window(xlim = lon, ylim = lat)

for (i in 1:nrow(Tainan.mapdat)) {
  linedat = read.csv(paste0("TWmap/TWmap/編號", Tainan.mapdat[i,1], ".csv"), header = TRUE, fileEncoding = 'CP950')
  n.sample = sum(subdat[,16] == as.character(Tainan.mapdat[i,2]))
  if (n.sample == 0) {COL = "#FFFFFF80"} 
  else if (n.sample <= 3) {COL = "#00009980"} 
  else if (n.sample <= 10) {COL = "#00FEFF80"} 
  else if (n.sample <= 30) {COL = "#45FE4F80"} 
  else if (n.sample <= 50) {COL = "#FCFF0080"} 
  else if (n.sample <= 100) {COL = "#FF940080"} 
  else {COL = "#FF310080"} 
  polygon(linedat[,1], linedat[,2], col = COL)
}

MysubMap = GetMap(center = center, zoom = zoom, maptype = "satellite", API_console_key = 'AIzaSyA4DVFtF70aXE7RgrXViy2z5Ku2pMkVxFI')
PlotOnStaticMap(MysubMap)

for (i in 1:nrow(Tainan.mapdat)) {
  linedat = read.csv(paste0("TWmap/TWmap/編號", Tainan.mapdat[i,1], ".csv"), header = TRUE, fileEncoding = 'CP950')
  n.sample = sum(subdat[,16] == as.character(Tainan.mapdat[i,2]))
  if (n.sample == 0) {COL = "#FFFFFF80"} 
  else if (n.sample <= 3) {COL = "#00009980"} 
  else if (n.sample <= 10) {COL = "#00FEFF80"} 
  else if (n.sample <= 30) {COL = "#45FE4F80"} 
  else if (n.sample <= 50) {COL = "#FCFF0080"} 
  else if (n.sample <= 100) {COL = "#FF940080"} 
  else {COL = "#FF310080"} 
  PlotOnStaticMap(MysubMap, lat = linedat[,2], lon = linedat[,1], FUN = polygon, add = TRUE, col = COL)
}

練習1:逐日變化圖

– 假定你希望呈現整個9月份『逐日』的『累積個案散布圖』,你有辦法熟練得操作你的畫圖技術了嗎?

F10_1

– 如果你想要多學學結合地理資訊系統的視覺化技術,可以參考Spatial data in R: Using R as a GIS

練習1答案

MysubMap = GetMap(center = center, zoom = zoom, maptype = "satellite", API_console_key = 'AIzaSyA4DVFtF70aXE7RgrXViy2z5Ku2pMkVxFI')

pdf('Tainan.pdf', width = 7, height = 7)

for (k in 1:30) {
  subdat = dat[dat[,1] <= as.Date(paste0("2015-09-", k)) & dat[,1] >= as.Date("2015-09-01") & dat[,6] == "台南市",]
  PlotOnStaticMap(MysubMap)
  for (i in 1:nrow(Tainan.mapdat)) {
    linedat = read.csv(paste0("TWmap/TWmap/編號", Tainan.mapdat[i,1], ".csv"), header = TRUE, fileEncoding = 'CP950')
    n.sample = sum(subdat[,16] == as.character(Tainan.mapdat[i,2]))
    if (n.sample == 0) {COL = "#FFFFFF80"} 
    else if (n.sample <= 3) {COL = "#00009980"} 
    else if (n.sample <= 10) {COL = "#00FEFF80"} 
    else if (n.sample <= 30) {COL = "#45FE4F80"} 
    else if (n.sample <= 50) {COL = "#FCFF0080"} 
    else if (n.sample <= 100) {COL = "#FF940080"} 
    else {COL = "#FF310080"} 
    PlotOnStaticMap(MysubMap, lat = linedat[,2], lon = linedat[,1], FUN = polygon, add = TRUE, col = COL)
  }
  text(-200, -250, as.Date(paste0("2015-09-", k)), cex = 2, col = "red")
}

dev.off()

第二節:Google圖表(1)

– 請至這裡下載範例資料

dat = read.csv("Example data.csv", header = TRUE, fileEncoding = 'CP950')
head(dat)
##       eGFR Disease Survival.time Death Diabetes Cancer      SBP      DBP
## 1 34.65379       1     0.4771037     0        0      1 121.2353 121.3079
## 2 37.21183       1     3.0704424     0        1      1 122.2000 122.6283
## 3 32.60074       1     0.2607117     1        0      0 118.9136 121.7621
## 4 29.68481       1            NA    NA        0      0 118.2212 112.7043
## 5 28.35726       0     0.1681673     1        0      0 116.7469 115.7705
## 6 33.95012       1     1.2238556     0        0      0 119.9936 116.3872
##   Education Income
## 1         2      0
## 2         2      0
## 3         0      0
## 4         1      0
## 5         0      0
## 6         1      0
library(googleVis)

第二節:Google圖表(2)

– 圓餅圖

TAB = table(dat$Income)
TAB = data.frame(TAB)
colnames(TAB) = c("Income", "Freq")
TAB[,1] = c("Low income", "Middle-class", "Wealthy")
Pie = gvisPieChart(TAB)
plot(Pie)

– 長條圖

m1.0 = mean(dat[dat[,"Income"] == 0,]$SBP, na.rm = TRUE)
m1.1 = mean(dat[dat[,"Income"] == 1,]$SBP, na.rm = TRUE)
m1.2 = mean(dat[dat[,"Income"] == 2,]$SBP, na.rm = TRUE)
m2.0 = mean(dat[dat[,"Income"] == 0,]$eGFR, na.rm = TRUE)
m2.1 = mean(dat[dat[,"Income"] == 1,]$eGFR, na.rm = TRUE)
m2.2 = mean(dat[dat[,"Income"] == 2,]$eGFR, na.rm = TRUE)

DF = data.frame(Income = c("Low income", "Middle-class", "Wealthy"),
                SBP = c(m1.0, m1.1, m1.2),
                eGFR = c(m2.0, m2.1, m2.2))

Column = gvisColumnChart(DF)
plot(Column)

第二節:Google圖表(3)

newdat = dat[,c("SBP", "DBP")]
SC1 = gvisScatterChart(newdat)
plot(SC1)
newdat = dat[,c("eGFR", "SBP")]
SC2 = gvisScatterChart(newdat,
                        options=list(
                        title = "eGFR vs SBP",
                        legend = "none",
                        colors="['#ff0000']",
                        pointSize = 2,
                        explorer="{actions: ['dragToZoom',
                                  'rightClickToReset'],
                                  maxZoomIn:0.05}"
                        ))
plot(SC2)
cat(SC2$html$chart, file = "SC2.html")

第二節:Google圖表(4)

– 現在,我們將能夠透過函數「gvisScatterChart()」畫出具有各點資訊的KM curve!

library(survival)
library(googleVis)

#KM curve
name.time = "Survival.time"
name.status = "Death"
name.x = "Education"
km.data = survfit(Surv(round(dat[,name.time]), dat[,name.status]) ~ factor(dat[, name.x]))

#整理資料
data.list = list()
data.null = data.frame(time = rep(0:max(km.data$time), 2), rate = 100)
Final.data = NULL
pos = cumsum(c(0, as.numeric(km.data$strata)))
for (k in 1:length(km.data$strata)) {
  data.list[[k]] = data.frame(time = km.data$time[(pos[k]+1):pos[k+1]], rate = round(km.data$surv[(pos[k]+1):pos[k+1]]*100, 3))
  data.list[[k]] = rbind(data.null, data.list[[k]])
  data.list[[k]] = data.list[[k]][order(data.list[[k]][,1]),]
  duplicated.pos = which(data.list[[k]][,1]%in%as.numeric(names(table(data.list[[k]][,1]))[table(data.list[[k]][,1])==3]))
  duplicated.pos = duplicated.pos[seq(3, length(duplicated.pos), by = 3)-2]
  data.list[[k]] = data.list[[k]][-duplicated.pos,]
  for (i in 2:nrow(data.list[[k]])) {
    if (data.list[[k]][i,1]==data.list[[k]][i-1,1]&data.list[[k]][i,2]>=data.list[[k]][i-1,2]) {data.list[[k]][i,2] =  data.list[[k]][i-1,2]}
    if (data.list[[k]][i,1]!=data.list[[k]][i-1,1]&is.na(data.list[[k]][i-1,2])) {data.list[[k]][i,2] = data.list[[k]][i-2,2]}
    if (data.list[[k]][i,1]!=data.list[[k]][i-1,1]&!is.na(data.list[[k]][i-1,2])) {data.list[[k]][i,2] = data.list[[k]][i-1,2]}
  }
  data.list[[k]][data.list[[k]][,1]>km.data$time[pos[k+1]],2] = NA
  rownames(data.list[[k]]) = 1:nrow(data.list[[k]])
  if (k==1) {Final.data = data.list[[k]]} else {Final.data = cbind(Final.data, data.list[[k]])}
}
Final.data = Final.data[,-c(2:k*2-1)]
colnames(Final.data)[-1] = paste0("rate_",name.x,":",levels(factor(dat[, name.x])))

#畫圖
KM.CURVE = gvisScatterChart(Final.data, 
                            options=list(
                            explorer="{actions: ['dragToZoom', 
                            'rightClickToReset'],
                            maxZoomIn:0.05}",
                            lineWidth=2, pointSize=0,
                            vAxis="{title:'Survival (%)'}",
                            vAxes="[{viewWindowMode:'explicit',
                            viewWindow:{min:0, max:100}}]",
                            hAxis="{title:'Time (days)'}", 
                            width=1000, height=500))

#展示圖形
plot(KM.CURVE)

練習2:換一種方式呈現

練習2答案

dat = read.csv("data10_1.csv", header = TRUE, fileEncoding = 'CP950')
dat[,1] = as.Date(dat[,1])

Alldat = data.frame(Date = as.Date(as.Date("2010-01-01")+0:2190),
                    Ncase = NA)

for (i in 1:nrow(Alldat)) {
  Alldat[i,2] = sum(dat[,1] == Alldat[i,1])
}

Denguedata = Alldat[-c(1:(730+366)),]

Dengue = gvisCalendar(Denguedata,
                   datevar="Date",
                   numvar="Ncase",
                   options=list(
                     title="Daily dengue case in Taiwan",
                     width=700, height=320,
                     calendar="{yearLabel: { fontName: 'Times-Roman',
                              fontSize: 32, color: '#1A8763', bold: true},
                              cellSize: 10,
                              cellColor: { stroke: 'red', strokeOpacity: 0.2 },
                              focusedCellColor: {stroke:'red'}}")
)

plot(Dengue)

小結

  1. 同學將有能力結合地理資訊進行空間資料的視覺化

  2. 更熟練的組合各式基本繪圖函數

  3. 利用Google圖表製作互動式圖表,並保留更多資料訊息