林嶔 (Lin, Chin)
Lesson 2 統計分析實作1(描述性統計及描述統計圖表)
– 請至這裡下載範例資料
dat <- read.csv("ECG_train.csv", header = TRUE, fileEncoding = 'CP950', stringsAsFactors = FALSE, na.strings = "")
## [1] 61.14584
## [1] 18.45581
## [1] 340.6169
## [1] 61.96178
## 0% 25% 50% 75% 100%
## 20.03530 48.18833 61.96178 75.82827 102.63361
## 50%
## 61.96178
## 95%
## 89.31906
## [1] 20.0353
## [1] 102.6336
##
## female male
## 2172 2828
##
## 0 1
## female 459 247
## male 703 703
##
## female male
## 0.4344 0.5656
##
## 0 1
## female 0.2173295 0.1169508
## male 0.3328598 0.3328598
##
## 0 1
## female 0.6501416 0.3498584
## male 0.5000000 0.5000000
##
## 0 1
## female 0.3950086 0.2600000
## male 0.6049914 0.7400000
– 在R裡面,我們能夠畫出任何統計圖!
– 在R裡面的顏色可以在Colors in R裡查看
– 另外,這裡教一個新函數「par()」,他可以指定繪圖環境。其中最常見的應用為把4張圖放在同一張畫布內:
par(mfrow = c(2, 2))
hist(dat[,"AGE"], col = "red")
boxplot(dat[,"AGE"], col = "blue")
pie(table(dat[,"AMI"]), col = c("blue", "red", "green"))
barplot(table(dat[,"AMI"]), col = c("gray90", "gray50", "gray10"))
## count spray
## 1 10 A
## 2 7 A
## 3 20 A
## 4 14 A
## 5 14 A
## 6 12 A
boxplot(dat[,"AGE"] ~ dat[,"LVD"], col = c("blue", "red"), ylab = "Age", xlab = "LVD", main = "Age value by LVD status", lwd = 1.5)
plot(dat[,"AGE"], dat[,"Rate"], ylab = "Heart rate", xlab = "AGE", main = "Scatter plot of AGE and Heart rate")
plot(dat[,"AGE"], dat[,"Rate"], ylab = "Heart rate", xlab = "AGE", main = "Scatter plot of AGE and Heart rate", pch = 19)
– 函數「lines()」的效果是按照順序把幾個點連起來,舉例來說…
– 註:函數「plot.new()」及函數「plot.window()」是拿來開一張新畫布用的!
x <- c(1, 4, 7)
y <- c(2, 9, 6)
plot.new()
plot.window(xlim = c(0, 10), ylim = c(0, 10))
lines(x, y)
z <- 0:1000/100
x <- sin(z) #三角函數sin
y <- cos(z) #三角函數cos
plot.new()
plot.window(xlim = c(-1, 1), ylim = c(-1, 1))
lines(x, y)
– 預測線的方程式,需要先學會線性回歸,在R語言裡面線性回歸是這樣建立的:
##
## Call:
## lm(formula = Y ~ X)
##
## Coefficients:
## (Intercept) X
## 78.9313 0.1021
x <- c(10, 150)
y <- 78.9313 + 0.1021 * x
plot(dat[,"AGE"], dat[,"Rate"], ylab = "Heart rate", xlab = "AGE", main = "Scatter plot of AGE and Heart rate", pch = 19)
lines(x, y, col = "red", lwd = 2)
plot(dat[,"AGE"], dat[,"Rate"], ylab = "Heart rate", xlab = "AGE", main = "Scatter plot of AGE and Heart rate", pch = 19)
abline(model, col = "red", lwd = 2)
## [1] "lm"
– 函數「ls()」可以協助我們看看物件中有哪些東西
– 函數「names()」也可以做到一樣的事情
## [1] "assign" "call" "coefficients" "df.residual"
## [5] "effects" "fitted.values" "model" "qr"
## [9] "rank" "residuals" "terms" "xlevels"
## (Intercept) X
## 78.9312550 0.1021352
x <- c(10, 150)
y = COEF[1] + COEF[2] * x
plot(dat[,"AGE"], dat[,"Rate"], ylab = "Heart rate", xlab = "AGE", main = "Scatter plot of AGE and Heart rate", pch = 19)
lines(x, y, col = "red", lwd = 2)
x = c(1, 0, -1, 0)
y = c(0, 1, 0, -1)
t = c("A", "B", "C", "D")
plot.new()
plot.window(xlim = c(-1, 1), ylim = c(-1, 1))
text(x, y, t)
x = c(1, 0, -1, 0)
y = c(0, 1, 0, -1)
plot.new()
plot.window(xlim = c(-1, 1), ylim = c(-1, 1))
points(x, y, pch = 1:4)
plot.new()
plot.window(xlim = c(-1, 1), ylim = c(-1, 1))
legend("topleft", c("Female", "Male"), col = c("red", "blue"), pch = c(15, 19), bg = "gray90")
legend(0, 0, c("estimates", "95% CI"), lty = c(1, 2), lwd = 2, col = "black")
x = c(1, 0, -1, 0)
y = c(0, 1, 0, -1)
plot.new()
plot.window(xlim = c(-1, 1), ylim = c(-1, 1))
polygon(x, y, col = "green")