R語言程式設計導論

林嶔 (Lin, Chin)

Lesson 16 經典網頁App分享

第一節:網路爬蟲程式(1)

– 想要找的話你可以複製貼上下面的程式碼:

library(rvest)

my_table = matrix("", nrow = 10, ncol = 4)
colnames(my_table) = c("Title", "url", "ID", "time")

URL = "https://www.ptt.cc/bbs/AllTogether/index.html"
current_id = 1

for (i in 1:10) {
  
  website = read_html(URL)
  needed_html = website %>% html_nodes("a")
  needed_txt = needed_html %>% html_text()
  intrested_pos = grep("[徵男]", needed_txt, fixed = TRUE)
  
  if (length(intrested_pos) > 0) {
    
    for (j in intrested_pos) {
      
      if (current_id <= 10) {
        my_table[current_id, 1] = needed_txt[j]
        my_table[current_id, 2] = needed_html[j] %>% html_attr("href")
      }
      
    current_id = current_id + 1
    
    }
    
  }
  
  if (current_id > 10) {
    break
  }
  
  next_page = website %>% html_nodes("a") %>% .[8] %>% html_attr("href")
  URL = paste0("https://www.ptt.cc", next_page, sep = "")
  
}

for (i in 1:nrow(my_table)) {
  
  sub_URL = paste("https://www.ptt.cc", my_table[i, 2], sep = "")
  sub_website = read_html(sub_URL)
  article_info = sub_website %>% html_nodes(".article-meta-value") %>% html_text()
  my_table[i, 3] = article_info[1]
  my_table[i, 4] = article_info[4]
  
}

my_table
##       Title                           
##  [1,] "[徵男] 微解禁後的你"           
##  [2,] "[徵男]人生神隊友 就是你~"      
##  [3,] "[徵男] 內文沒有照片"           
##  [4,] "[徵男] 胎記"                   
##  [5,] "[徵男]總是要試試看"            
##  [6,] "[徵男] 尋找人生夥伴"           
##  [7,] "[徵男] (代徵)-後疫情世代的你"
##  [8,] "[徵男] 我不想錯過你"           
##  [9,] "[徵男] (代徵)來幫閨蜜徵友"   
## [10,] "[徵男] 尋找人生好隊友"         
##       url                                        ID                       
##  [1,] "/bbs/AllTogether/M.1625783750.A.EAA.html" "aiba1229 (Astrid)"      
##  [2,] "/bbs/AllTogether/M.1625813251.A.0B8.html" "sosmart961 (世界未末日)"
##  [3,] "/bbs/AllTogether/M.1625739636.A.C8C.html" "assilem (亞斯藍布魯)"   
##  [4,] "/bbs/AllTogether/M.1625742577.A.46B.html" "hunter6126 (Mineee)"    
##  [5,] "/bbs/AllTogether/M.1625751706.A.965.html" "chenxine (老公是GD)"    
##  [6,] "/bbs/AllTogether/M.1625753462.A.574.html" "kaputt ()"              
##  [7,] "/bbs/AllTogether/M.1625650256.A.86E.html" "costco5 (清風)"         
##  [8,] "/bbs/AllTogether/M.1625704579.A.167.html" "syuan116 (咩咩)"        
##  [9,] "/bbs/AllTogether/M.1625722813.A.C8D.html" "racocopink (可可粉)"    
## [10,] "/bbs/AllTogether/M.1625577513.A.02D.html" "emilyz (全新生活)"      
##       time                      
##  [1,] "Fri Jul  9 06:35:48 2021"
##  [2,] "Fri Jul  9 14:47:29 2021"
##  [3,] "Thu Jul  8 18:20:34 2021"
##  [4,] "Thu Jul  8 19:09:35 2021"
##  [5,] "Thu Jul  8 21:41:44 2021"
##  [6,] "Thu Jul  8 22:11:00 2021"
##  [7,] "Wed Jul  7 17:30:54 2021"
##  [8,] "Thu Jul  8 08:36:17 2021"
##  [9,] "Thu Jul  8 13:40:11 2021"
## [10,] "Tue Jul  6 21:18:30 2021"

第一節:網路爬蟲程式(2)

library(shiny)
library(rvest)

shinyUI(navbarPage("徵男文自動尋找系統",
                   tabPanel("近期文章搜尋",
                            actionButton("submit", strong("按我開始找")),
                            br(),
                            DT::dataTableOutput("view")
                   )
))
library(shiny)
library(rvest)

shinyServer(function(input, output) {
  
  MY_TABLE = eventReactive(input$submit, {
    
    my_table = matrix("", nrow = 10, ncol = 4)
    colnames(my_table) = c("Title", "url", "ID", "time")
    
    URL = "https://www.ptt.cc/bbs/AllTogether/index.html"
    current_id = 1
    
    withProgress(message = "尋找文章中...", value = 0, {
    
      for (i in 1:10) {
        
        website = read_html(URL)
        needed_html = website %>% html_nodes("a")
        needed_txt = needed_html %>% html_text()
        intrested_pos = grep("[徵男]", needed_txt, fixed = TRUE)
        
        if (length(intrested_pos) > 0) {
          
          for (j in intrested_pos) {
            
            if (current_id <= 10) {
              my_table[current_id, 1] = needed_txt[j]
              my_table[current_id, 2] = needed_html[j] %>% html_attr("href")
            }
            
            current_id = current_id + 1
            
          }
          
        }
        
        if (current_id > 10) {
          break
        }
        
        next_page = website %>% html_nodes("a") %>% .[8] %>% html_attr("href")
        URL = paste0("https://www.ptt.cc", next_page, sep = "")
        
        incProgress(1/10)
        
      }
      
    })
    
    withProgress(message = "擷取文章資訊中...", value = 0, {
      
      for (i in 1:nrow(my_table)) {
        
        sub_URL = paste("https://www.ptt.cc", my_table[i, 2], sep = "")
        sub_website = read_html(sub_URL)
        article_info = sub_website %>% html_nodes(".article-meta-value") %>% html_text()
        my_table[i, 3] = article_info[1]
        my_table[i, 4] = article_info[4]
        
        incProgress(1/nrow(my_table))
        
      }
      
    })
    
    my_table
    
  })
  
  output$view = DT::renderDataTable({
    dat = MY_TABLE()
    if (is.null(dat)) {return()} else {
      dat = data.frame(dat, stringsAsFactors = FALSE)
      Result = DT::datatable(dat)
      return(Result)
    }
  })
  
})

第一節:網路爬蟲程式(3)

– 這裡我們會用到一些HTML的語法,還記得超連結的標籤是什麼嗎?

library(shiny)
library(rvest)

shinyUI(navbarPage("徵男文自動尋找系統",
                   tabPanel("近期文章搜尋",
                            actionButton("submit", strong("按我開始找")),
                            br(),
                            DT::dataTableOutput("view")
                   )
))
library(shiny)
library(rvest)

shinyServer(function(input, output) {
  
  MY_TABLE = eventReactive(input$submit, {
    
    my_table = matrix("", nrow = 10, ncol = 4)
    colnames(my_table) = c("Title", "url", "ID", "time")
    
    URL = "https://www.ptt.cc/bbs/AllTogether/index.html"
    current_id = 1
    
    withProgress(message = "尋找文章中...", value = 0, {
    
      for (i in 1:10) {
        
        website = read_html(URL)
        needed_html = website %>% html_nodes("a")
        needed_txt = needed_html %>% html_text()
        intrested_pos = grep("[徵男]", needed_txt, fixed = TRUE)
        
        if (length(intrested_pos) > 0) {
          
          for (j in intrested_pos) {
            
            if (current_id <= 10) {
              my_table[current_id, 1] = needed_txt[j]
              my_table[current_id, 2] = needed_html[j] %>% html_attr("href")
            }
            
            current_id = current_id + 1
            
          }
          
        }
        
        if (current_id > 10) {
          break
        }
        
        next_page = website %>% html_nodes("a") %>% .[8] %>% html_attr("href")
        URL = paste0("https://www.ptt.cc", next_page, sep = "")
        
        incProgress(1/10)
        
      }
      
    })
    
    withProgress(message = "擷取文章資訊中...", value = 0, {
      
      for (i in 1:nrow(my_table)) {
        
        sub_URL = paste("https://www.ptt.cc", my_table[i, 2], sep = "")
        sub_website = read_html(sub_URL)
        article_info = sub_website %>% html_nodes(".article-meta-value") %>% html_text()
        my_table[i, 3] = article_info[1]
        my_table[i, 4] = article_info[4]
        
        incProgress(1/nrow(my_table))
        
      }
      
    })
    
    my_table
    
  })
  
  output$view = DT::renderDataTable({
    dat = MY_TABLE()
    if (is.null(dat)) {return()} else {
      dat = data.frame(dat, stringsAsFactors = FALSE)
      dat[,2] = paste('<a href="https://www.ptt.cc', dat[,2], '">', dat[,2], '</a>', sep = "")
      Result = DT::datatable(dat, escape = FALSE)
      return(Result)
    }
  })
  
})

第二節:病患存活預測(1)

– 目前套件『survival』是最常用來做存活分析的套件,他同時支援各式存活分析的相關功能。

library(survival)

\[log(\frac{h_i(t)}{h_0(t)})= b_{1}x_{1,i} + b_{2}x_{2,i}\]

– 這樣對於第\(i\)個人而言,他的「hazard」就會是baseline的\(exp(b_{1}x_{1,i} + b_{2}x_{2,i})\)倍。

– 「hazard」是Cox比例風險模型所定義的特殊參數,可以跟第\(t\)個時間點的累積存活率\(S(t)\)做下列公式的轉換:

\[S(t) = exp(-h_i(t))\]

第二節:病患存活預測(2)

library(survival)

data(ovarian)    #呼叫ovarian dataset
dat = ovarian    #將ovarian轉存為dat
model <- coxph(Surv(futime, fustat) ~ age, data = dat) #利用age預測存活時間
summary(model)   #看結果
## Call:
## coxph(formula = Surv(futime, fustat) ~ age, data = dat)
## 
##   n= 26, number of events= 12 
## 
##        coef exp(coef) se(coef)     z Pr(>|z|)   
## age 0.16162   1.17541  0.04974 3.249  0.00116 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##     exp(coef) exp(-coef) lower .95 upper .95
## age     1.175     0.8508     1.066     1.296
## 
## Concordance= 0.784  (se = 0.083 )
## Rsquare= 0.423   (max possible= 0.932 )
## Likelihood ratio test= 14.29  on 1 df,   p=2e-04
## Wald test            = 10.56  on 1 df,   p=0.001
## Score (logrank) test = 12.26  on 1 df,   p=5e-04
h0.hazard <- basehaz(model)
head(h0.hazard)
##       hazard time
## 1 0.01205169   59
## 2 0.02647352  115
## 3 0.04647344  156
## 4 0.06883685  268
## 5 0.10829595  329
## 6 0.14794557  353

第二節:病患存活預測(3)

linear_pred <- predict(object = model, newdata = data.frame(age = 60))
hazardratio <- exp(linear_pred)
indv.hazard <- h0.hazard
indv.hazard[,'hazard'] <- indv.hazard[,'hazard'] * hazardratio
indv.hazard[,'surv'] <- exp(-indv.hazard[,'hazard']) * 100

第二節:病患存活預測(4)

km_curve_data <- data.frame(time = c(0, rep(indv.hazard[,'time'], each = 2), indv.hazard[nrow(indv.hazard),'time']),
                            surv = c(100, 100, rep(indv.hazard[,'surv'], each = 2)))

plot(surv ~ time, data = km_curve_data, type = 'l', ylim = c(0, 100))

第二節:病患存活預測(5)

library(googleVis)

Scatter <- gvisScatterChart(km_curve_data, 
                            options=list(
                              explorer="{actions: ['dragToZoom', 
                                        'rightClickToReset'],
                                        maxZoomIn:0.05}",
                              legend="none",
                              lineWidth=2, pointSize=0,
                              vAxis="{title:'Survival (%)'}",
                              vAxes="[{viewWindowMode:'explicit',
                                     viewWindow:{min:0, max:100}}]",
                              hAxis="{title:'Time (days)'}", 
                              colors="['#ff0000']",
                              width=800, height=700))
plot(Scatter)

第二節:病患存活預測(6)

– ui.R

library(shiny)
library(survival)
library(googleVis)

fluidPage(
  sliderInput("Age", "Please enter your age", min=40, max=80, value=50),
  htmlOutput("chart1")
)

– server.R

library(shiny)
library(survival)
library(googleVis)

######################################################
# 這些函數只需要跑1次即可

data(ovarian)   
dat = ovarian 
model <- coxph(Surv(futime, fustat) ~ age, data = dat) 
h0.hazard <- basehaz(model)

######################################################

shinyServer(function(input, output, session) {
  
  output$chart1<- renderGvis({
    
    linear_pred <- predict(object = model, newdata = data.frame(age = input$Age))
    hazardratio <- exp(linear_pred)
    
    indv.hazard <- h0.hazard
    indv.hazard[,'hazard'] <- indv.hazard[,'hazard'] * hazardratio
    indv.hazard[,'surv'] <- exp(-indv.hazard[,'hazard']) * 100
    
    km_curve_data <- data.frame(time = c(0, rep(indv.hazard[,'time'], each = 2), indv.hazard[nrow(indv.hazard),'time']),
                                surv = c(100, 100, rep(indv.hazard[,'surv'], each = 2)))
    
    Scatter <- gvisScatterChart(km_curve_data, 
                                options=list(
                                  explorer="{actions: ['dragToZoom', 
                                  'rightClickToReset'],
                                  maxZoomIn:0.05}",
                                  legend="none",
                                  lineWidth=2, pointSize=0,
                                  vAxis="{title:'Survival (%)'}",
                                  vAxes="[{viewWindowMode:'explicit',
                                  viewWindow:{min:0, max:100}}]",
                                  hAxis="{title:'Time (days)'}", 
                                  colors="['#ff0000']",
                                  width=800, height=500))
    Scatter
    
  })
  
})

練習1:修正預測

– 請將剛剛的WebApp改寫,讓使用者能輸出age+rx的數值並進行預測

– 注意,rx的數值僅可以是1或2,請用radioButtons()來讓使用者輸入參數

– 註:radioButtons()回傳的物件為『文字』,需使用as.numeric()來使該物件轉換為數字

練習1答案

library(shiny)
library(survival)
library(googleVis)

fluidPage(
  sliderInput("Age", "Please enter your age", min=40, max=80, value=50),
  radioButtons("rx", "Please select a treatment group", c("1","2")),
  htmlOutput("chart1")
)
library(shiny)
library(survival)
library(googleVis)

######################################################
# 這些函數只需要跑1次即可

data(ovarian)   
dat = ovarian 
model <- coxph(Surv(futime, fustat) ~ age + rx, data = dat) 
h0.hazard <- basehaz(model)

######################################################

shinyServer(function(input, output, session) {
  
  output$chart1<- renderGvis({
    
    linear_pred <- predict(object = model, newdata = data.frame(age = input$Age, rx = as.numeric(input$rx)))
    hazardratio <- exp(linear_pred)
    
    indv.hazard <- h0.hazard
    indv.hazard[,'hazard'] <- indv.hazard[,'hazard'] * hazardratio
    indv.hazard[,'surv'] <- exp(-indv.hazard[,'hazard']) * 100
    
    km_curve_data <- data.frame(time = c(0, rep(indv.hazard[,'time'], each = 2), indv.hazard[nrow(indv.hazard),'time']),
                                surv = c(100, 100, rep(indv.hazard[,'surv'], each = 2)))
    
    Scatter <- gvisScatterChart(km_curve_data, 
                                options=list(
                                  explorer="{actions: ['dragToZoom', 
                                  'rightClickToReset'],
                                  maxZoomIn:0.05}",
                                  legend="none",
                                  lineWidth=2, pointSize=0,
                                  vAxis="{title:'Survival (%)'}",
                                  vAxes="[{viewWindowMode:'explicit',
                                  viewWindow:{min:0, max:100}}]",
                                  hAxis="{title:'Time (days)'}", 
                                  colors="['#ff0000']",
                                  width=800, height=500))
    Scatter
    
  })
  
})

小結

– 關於使用shiny套件的學習資源,可以參考shiny的官方網站

– 如果你想多看看別人寫的shiny應用程式,你可以到shiny gallery去學習學習!

– 但注意,免費帳戶每月僅能讓App運作25小時,並且只能上傳5個App

– 除此之外,如果你的原始碼有重要的商業價值,建議還是自建server

分享你的App至shinyapps.io

install.packages("devtools")
library(devtools)
devtools::install_github('rstudio/shinyapps')
library(shinyapps)

建立R與你的帳戶的聯結

F16_1

F16_2

F16_3

分享

– 點選Publish後,會出現個小視窗,指定檔名後(這也是你未來的網址名稱)就可以上傳至shinyapps.io了

F16_4