資料視覺化1

林嶔 (Lin, Chin)

Lesson 8

第一節:基礎繪圖函數簡介-1(1)

– 請至這裡下載本週的範例資料

dat = read.csv("Example data.csv", header = TRUE)
head(dat)
##       eGFR Disease Survival.time Death Diabetes Cancer      SBP      DBP
## 1 34.65379       1     0.4771037     0        0      1 121.2353 121.3079
## 2 37.21183       1     3.0704424     0        1      1 122.2000 122.6283
## 3 32.60074       1     0.2607117     1        0      0 118.9136 121.7621
## 4 29.68481       1            NA    NA        0      0 118.2212 112.7043
## 5 28.35726       0     0.1681673     1        0      0 116.7469 115.7705
## 6 33.95012       1     1.2238556     0        0      0 119.9936 116.3872
##   Education Income
## 1         2      0
## 2         2      0
## 3         0      0
## 4         1      0
## 5         0      0
## 6         1      0

第一節:基礎繪圖函數簡介-1(2)

  1. 直方圖:需要使用函數「hist()」
hist(dat[,"eGFR"])

  1. 盒鬚圖:需要使用函數「boxplot()」
boxplot(dat[,"eGFR"])

  1. 圓餅圖:需要使用函數「pie()」以及函數「table()」
pie(table(dat[,"Education"]))

  1. 長條圖:需要使用函數「barplot()」以及函數「table()」
barplot(table(dat[,"Education"]))

第一節:基礎繪圖函數簡介-1(3)

– 在R裡面的顏色可以在Colors in R裡查看

– 另外,這裡教一個新函數「par()」,他可以指定繪圖環境。其中最常見的應用為把4張圖放在同一張畫布內:

par(mfrow = c(2, 2))
hist(dat[,"eGFR"], col = "red")
boxplot(dat[,"eGFR"], col = "blue")
pie(table(dat[,"Education"]), col = c("blue", "red", "green"))
barplot(table(dat[,"Education"]), col = c("gray90", "gray50", "gray10"))

pdf("plot1.pdf", height = 8, width = 8, family = "serif")
par(mfrow = c(2, 2))
hist(dat[,"eGFR"], col = "red")
boxplot(dat[,"eGFR"], col = "blue")
pie(table(dat[,"Education"]), col = c("blue", "red", "green"))
barplot(table(dat[,"Education"]), col = c("gray90", "gray50", "gray10"))
dev.off()

練習1:簡單繪圖

練習1答案

boxplot(count ~ spray, data = InsectSprays, col = "lightgray")

boxplot(dat[,"eGFR"] ~ dat[,"Disease"], col = c("blue", "red"), ylab = "eGFR", xlab = "Disease", main = "eGFR value by Disease status", lwd = 1.5)

第二節:基礎繪圖函數簡介-2(1)

plot(dat[,"SBP"], dat[,"DBP"], ylab = "DBP", xlab = "SBP", main = "Scatter plot of SBP and DBP")

plot(dat[,"SBP"], dat[,"DBP"], ylab = "DBP", xlab = "SBP", main = "Scatter plot of SBP and DBP", pch = 19)

第二節:基礎繪圖函數簡介-2(2)

– 函數「lines()」的效果是按照順序把幾個點連起來,舉例來說…

– 註:函數「plot.new()」及函數「plot.window()」是拿來開一張新畫布用的!

x = c(1, 4, 7)
y = c(2, 9, 6)
plot.new()
plot.window(xlim = c(0, 10), ylim = c(0, 10))
lines(x, y)

z = 0:1000/100
x = sin(z) #三角函數sin
y = cos(z) #三角函數cos
plot.new()
plot.window(xlim = c(-1, 1), ylim = c(-1, 1))
lines(x, y)

第二節:基礎繪圖函數簡介-2(3)

– 預測線的方程式,需要先用第7課所學到的函數「glm()」幫忙建立,你看得懂下面的程式碼嗎?

# 建立MODEL以及預測線的座標
X = dat[,"SBP"]
Y = dat[,"DBP"]
model = glm(Y~X)
COEF = model$coefficients
x = c(0, 200)
y = COEF[1] + COEF[2] * x

plot(dat[,"SBP"], dat[,"DBP"], ylab = "DBP", xlab = "SBP", main = "Scatter plot of SBP and DBP", pch = 19)
lines(x, y, col = "red", lwd = 2)

第二節:基礎繪圖函數簡介-2(4)

  1. 第一種方法是將截距依序平移從0到200,做法是在建立預測式時將SBP依序從從0減到200。
x = 0:1000/5
Y = dat[,"DBP"]
y = x
ci.low = x
ci.up = x

indexes = 1:length(x)

for (i in indexes) {
  X = dat[,"SBP"] - x[i]
  model = glm(Y~X)
  COEF = summary(model)$coefficients
  y[i] = COEF[1,1]
  ci.low[i] = COEF[1,1] - qnorm(0.975) * COEF[1,2]
  ci.up[i] = COEF[1,1] + qnorm(0.975) * COEF[1,2]
}
plot(dat[,"SBP"], dat[,"DBP"], ylab = "DBP", xlab = "SBP", main = "Scatter plot of SBP and DBP", pch = 19)
lines(x, y, col = "red", lwd = 2)
lines(x, ci.low, col = "red", lty = 2, lwd = 2)
lines(x, ci.up, col = "red", lty = 2, lwd = 2)