Lesson 5 原始心電圖左心室功能障礙分類挑戰

下載資料

請你先到資料科學競賽網站選擇第五個任務，登入後下載4份資料，包含之前任務沒有的「supplement.zip」，裡面放著每個病人的原始心電圖資料。

– 與之前所有任務相同，這個任務的資料下載是需要經過申請的，請你找助教申請帳號。

讀取資料

最開始我們還是需要先將「train.csv」、「test.csv」以及「sample_submission.csv」讀進來：

library(data.table)

train_dat <- fread('train.csv', data.table = FALSE)
test_dat <- fread('test.csv', data.table = FALSE)
submit_dat <- fread('sample_submission.csv', data.table = FALSE)

如果你是使用伺服器版的Rstudio，那直接上傳上傳「supplement.zip」就能得到資料夾「ecg」了。或是你可以先解壓縮放到你的project的資料夾下

自建分析模型

心電圖的資料室沒有辦法套用過往的分析模型的，我們可以參考paper，使用他們的架構進行分析。

– 讓我們參考Mayo clinic所發表的：Screening for cardiac contractile dysfunction using an artificial intelligence–enabled electrocardiogram，他提供了一個基本的模型架構。

F01

– 注意它的操作方式，他把讓我們試著重現這個網路吧。要注意在圖上面說資料的輸入是\(1000 /times 12\)，但paper卻是說\(5000 /times 12\)，我們這裡假設paper內文是對的，並且在進去之前先使用「Average pooling」縮減5倍。

– 基於這樣的操作，他的輸入長度應該5120，是如果我們不想進行標準化，第一層要使用Batch normalization喔！

library(magrittr)
library(mxnet)

# Data preprocessing

data <- mx.symbol.Variable('data')

pool_data <- mx.symbol.Pooling(data = data, pool_type = "avg", kernel = c(5,1), stride = c(5,1), pad = c(60, 0), name = 'pool_data')
bn_data <- mx.symbol.BatchNorm(data = pool_data, axis = 2, eps = 1e-5, fix.gamma = TRUE, name = 'bn_data')

# Temporal analysis - 1

temporal_conv1 <- mx.symbol.Convolution(data = bn_data, kernel = c(5, 1), pad = c(2, 0), num_filter = 16, name = 'temporal_conv1')
temporal_bn1 <- mx.symbol.BatchNorm(data = temporal_conv1, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = 'temporal_bn1')
temporal_relu1 <- mx.symbol.Activation(data = temporal_bn1, act_type = "relu", name = 'temporal_relu1')
temporal_pool1 <- mx.symbol.Pooling(data = temporal_relu1, pool_type = "max", kernel = c(2, 1), stride = c(2, 1), name = 'temporal_pool1')

# Temporal analysis - 2

temporal_conv2 <- mx.symbol.Convolution(data = temporal_pool1, kernel = c(5, 1), pad = c(2, 0), num_filter = 16, name = 'temporal_conv2')
temporal_bn2 <- mx.symbol.BatchNorm(data = temporal_conv2, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = 'temporal_bn2')
temporal_relu2 <- mx.symbol.Activation(data = temporal_bn2, act_type = "relu", name = 'temporal_relu2')
temporal_pool2 <- mx.symbol.Pooling(data = temporal_relu2, pool_type = "max", kernel = c(2, 1), stride = c(2, 1), name = 'temporal_pool2')

# Temporal analysis - 3

temporal_conv3 <- mx.symbol.Convolution(data = temporal_pool2, kernel = c(5, 1), pad = c(2, 0), num_filter = 32, name = 'temporal_conv3')
temporal_bn3 <- mx.symbol.BatchNorm(data = temporal_conv3, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = 'temporal_bn3')
temporal_relu3 <- mx.symbol.Activation(data = temporal_bn3, act_type = "relu", name = 'temporal_relu3')
temporal_pool3 <- mx.symbol.Pooling(data = temporal_relu3, pool_type = "max", kernel = c(4, 1), stride = c(4, 1), name = 'temporal_pool3')

# Temporal analysis - 4

temporal_conv4 <- mx.symbol.Convolution(data = temporal_pool3, kernel = c(3, 1), pad = c(1, 0), num_filter = 32, name = 'temporal_conv4')
temporal_bn4 <- mx.symbol.BatchNorm(data = temporal_conv4, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = 'temporal_bn4')
temporal_relu4 <- mx.symbol.Activation(data = temporal_bn4, act_type = "relu", name = 'temporal_relu4')
temporal_pool4 <- mx.symbol.Pooling(data = temporal_relu4, pool_type = "max", kernel = c(2, 1), stride = c(2, 1), name = 'temporal_pool4')

# Temporal analysis - 5

temporal_conv5 <- mx.symbol.Convolution(data = temporal_pool4, kernel = c(3, 1), pad = c(1, 0), num_filter = 64, name = 'temporal_conv5')
temporal_bn5 <- mx.symbol.BatchNorm(data = temporal_conv5, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = 'temporal_bn5')
temporal_relu5 <- mx.symbol.Activation(data = temporal_bn5, act_type = "relu", name = 'temporal_relu5')
temporal_pool5 <- mx.symbol.Pooling(data = temporal_relu5, pool_type = "max", kernel = c(2, 1), stride = c(2, 1), name = 'temporal_pool5')

# Temporal analysis - 6

temporal_conv6 <- mx.symbol.Convolution(data = temporal_pool5, kernel = c(3, 1), pad = c(1, 0), num_filter = 64, name = 'temporal_conv6')
temporal_bn6 <- mx.symbol.BatchNorm(data = temporal_conv6, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = 'temporal_bn6')
temporal_relu6 <- mx.symbol.Activation(data = temporal_bn6, act_type = "relu", name = 'temporal_relu6')
temporal_pool6 <- mx.symbol.Pooling(data = temporal_relu6, pool_type = "max", kernel = c(4, 1), stride = c(4, 1), name = 'temporal_pool6')

# Spatial analysis

spatial_conv <- mx.symbol.Convolution(data = temporal_pool6, kernel = c(1, 12), pad = c(0, 0), num_filter = 64, name = 'spatial_conv')
spatial_bn <- mx.symbol.BatchNorm(data = spatial_conv, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = 'spatial_bn')
spatial_relu <- mx.symbol.Activation(data = spatial_bn, act_type = "relu", name = 'spatial_relu')

# Fully connected - 1

fc1 <- mx.symbol.FullyConnected(data = spatial_relu, num.hidden = 256, no.bias = TRUE, name = 'fc1')
bn1 <- mx.symbol.BatchNorm(data = fc1, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = 'bn1')
relu1 <- mx.symbol.Activation(data = bn1, act_type = "relu", name = 'relu1')
dp1 <- mx.symbol.Dropout(data = relu1, p = 0.5, axes = 1, name = 'dp1')

# Fully connected - 2

fc2 <- mx.symbol.FullyConnected(data = dp1, num.hidden = 256, no.bias = TRUE, name = 'fc2')
bn2 <- mx.symbol.BatchNorm(data = fc2, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = 'bn2')
relu2 <- mx.symbol.Activation(data = bn2, act_type = "relu", name = 'relu2')
dp2 <- mx.symbol.Dropout(data = relu2, p = 0.5, axes = 1, name = 'dp2')

# Prediction

linear_pred <- mx.symbol.FullyConnected(data = dp2, num.hidden = 1, no.bias = FALSE, name = 'linear_pred')
logistic_pred <- mx.symbol.sigmoid(data = linear_pred, name = 'logistic_pred')

# Loss

label <- mx.symbol.Variable(name = 'label')

eps <- 1e-8
ce_loss_pos <- mx.symbol.broadcast_mul(mx.symbol.log(logistic_pred + eps), label)
ce_loss_neg <- mx.symbol.broadcast_mul(mx.symbol.log(1 - logistic_pred + eps), 1 - label)
ce_loss_mean <- 0 - mx.symbol.mean(ce_loss_pos + ce_loss_neg)
ce_loss <- mx.symbol.MakeLoss(ce_loss_mean, name = 'ce_loss')

先讓我們寫一個Iterator，注意當我們遇到類別不平衡時，一定是需要oversampling的：

library(abind)
library(data.table)

data_dir <- 'ecg/'

my_iterator_core <- function (batch_size) {
  
  batch <- 0
  batch_per_epoch <- ceiling(nrow(train_dat) / batch_size)
  
  reset <- function() {batch <<- 0}
  
  iter.next <- function() {
    batch <<- batch + 1
    if (batch > batch_per_epoch) {return(FALSE)} else {return(TRUE)}
  }
  
  value <- function() {
    pos.idx <- sample(which(train_dat[,'LVD'] == 1), batch_size / 2)
    neg.idx <- sample(which(train_dat[,'LVD'] == 0), batch_size / 2)
    idx <- c(pos.idx, neg.idx)
    batch_list <- list()
    for (i in 1:batch_size) {
      ecg_matrix <- fread(paste0(data_dir, train_dat[idx[i],'UID'], '.csv'), data.table = FALSE)
      batch_list[[i]] <- ecg_matrix
    }
    batch_array <- abind(batch_list, along = 3)
    dim(batch_array) <- c(5000, 12, 1, batch_size)
    label_array <- array(train_dat[idx,'LVD'], dim = c(1, batch_size))
    return(list(data = mx.nd.array(batch_array), label = mx.nd.array(label_array)))
  }
  
  return(list(reset = reset, iter.next = iter.next, value = value, batch_size = batch_size, batch = batch))
  
}

my_iterator_func <- setRefClass("Custom_Iter",
                                fields = c("iter", "batch_size"),
                                contains = "Rcpp_MXArrayDataIter",
                                methods = list(
                                  initialize = function(iter, batch_size = 32) {
                                    .self$iter <- my_iterator_core(batch_size = batch_size)
                                    .self
                                  },
                                  value = function(){
                                    .self$iter$value()
                                  },
                                  iter.next = function(){
                                    .self$iter$iter.next()
                                  },
                                  reset = function(){
                                    .self$iter$reset()
                                  },
                                  finalize=function(){
                                  }
                                )
)

my_iter <- my_iterator_func(iter = NULL, batch_size = 16)

指定優化器：

my_optimizer <- mx.opt.create(name = "adam", learning.rate = 1e-3, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-08, wd = 1e-4)

可以開始訓練了，讓我們來進行實驗：

my.eval.metric.loss <- mx.metric.custom(
  name = "my-loss", 
  function(real, pred) {
    return(as.array(pred))
  }
)

mx.set.seed(0)

my_model <- mx.model.FeedForward.create(symbol = ce_loss, X = my_iter, optimizer = my_optimizer,
                                        array.batch.size = 16, num.round = 30, ctx = mx.gpu(0),
                                        eval.metric = my.eval.metric.loss)

讓我們直接拿去預測的話，我們來試試看他的準確度，最終大約可以獲得0.8877的AUC，這比單純用「心電圖參數」好的多。

my_model$symbol <- logistic_pred

test_list <- list()

for (i in 1:nrow(test_dat)) {
  ecg_matrix <- fread(paste0(data_dir, test_dat[i,'UID'], '.csv'), data.table = FALSE)
  test_list[[i]] <- ecg_matrix
}

test_array <- abind(test_list, along = 3)
dim(test_array) <- c(5000, 12, 1, dim(test_array)[3])

pred_test <- predict(model = my_model, X = test_array, ctx = mx.gpu(0))
submit_dat[,'p_LVD'] <- pred_test[1,]
fwrite(submit_dat, file = 'my_submission.csv', na = '', row.names = FALSE, quote = FALSE)

重新換一個模型架構

讓我們參考Deep learning algorithm for management of diabetes mellitus via electrocardiogram-based glycated hemoglobin (ECG-HbA1c): a retrospective cohort study，改一個模型架構

– 我們捨去SE module的部分，並且將輸入一樣改成5120，與之前對比直接看看效果：

library(magrittr)
library(mxnet)

# Functions

pool_module <- function (indata, k = 16, stage = 1) {
  
  conv1 <- mx.symbol.Convolution(data = indata, kernel = c(1, 1), pad = c(0, 0), num_filter = k / 4, name = paste0('pool_module', stage, '.conv1'))
  bn1 <- mx.symbol.BatchNorm(data = conv1, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = paste0('pool_module', stage, '.bn1'))
  relu1 <- mx.symbol.Activation(data = bn1, act_type = "relu", name = paste0('pool_module', stage, '.relu1'))  
  
  conv2 <- mx.symbol.Convolution(data = relu1, kernel = c(3, 1), pad = c(1, 0), stride = c(2, 1), num_filter = k / 4, name = paste0('pool_module', stage, '.conv2'))
  bn2 <- mx.symbol.BatchNorm(data = conv2, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = paste0('pool_module', stage, '.bn2'))
  relu2 <- mx.symbol.Activation(data = bn2, act_type = "relu", name = paste0('pool_module', stage, '.relu2'))  
  
  conv3 <- mx.symbol.Convolution(data = relu2, kernel = c(1, 1), pad = c(0, 0), num_filter = k, name = paste0('pool_module', stage, '.conv3'))
  bn3 <- mx.symbol.BatchNorm(data = conv3, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = paste0('pool_module', stage, '.bn3'))
  relu3 <- mx.symbol.Activation(data = bn3, act_type = "relu", name = paste0('pool_module', stage, '.relu3'))
  
  pool <- mx.symbol.Pooling(data = indata, pool_type = "avg", kernel = c(2, 1), stride = c(2, 1), pad = c(0, 0),
                            name = paste0('pool_module', stage, '.pool'))
  
  concat <- mx.symbol.concat(data = list(pool, relu3), num.args = 2, dim = 1, name = paste0('pool_module', stage, '.concat'))
  
  return(concat)
  
}

res_module <- function (indata, k = 32, stage = 1, loop = 1) {
  
  conv1 <- mx.symbol.Convolution(data = indata, kernel = c(1, 1), pad = c(0, 0), num_filter = k / 4, name = paste0('res_module', stage, '_', loop, '.conv1'))
  bn1 <- mx.symbol.BatchNorm(data = conv1, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = paste0('res_module', stage, '_', loop, '.bn1'))
  relu1 <- mx.symbol.Activation(data = bn1, act_type = "relu", name = paste0('res_module', stage, '_', loop, '.relu1'))  
  
  conv2 <- mx.symbol.Convolution(data = relu1, kernel = c(3, 1), pad = c(1, 0), num_filter = k / 4, name = paste0('res_module', stage, '_', loop, '.conv2'))
  bn2 <- mx.symbol.BatchNorm(data = conv2, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = paste0('res_module', stage, '_', loop, '.bn2'))
  relu2 <- mx.symbol.Activation(data = bn2, act_type = "relu", name = paste0('res_module', stage, '_', loop, '.relu2'))  
  
  conv3 <- mx.symbol.Convolution(data = relu2, kernel = c(1, 1), pad = c(0, 0), num_filter = k, name = paste0('res_module', stage, '_', loop, '.conv3'))
  bn3 <- mx.symbol.BatchNorm(data = conv3, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = paste0('res_module', stage, '_', loop, '.bn3'))
  relu3 <- mx.symbol.Activation(data = bn3, act_type = "relu", name = paste0('res_module', stage, '_', loop, '.relu3'))
  
  plus <- mx.symbol.broadcast_plus(lhs = indata, rhs = relu3, name = paste0('res_module', stage, '_', loop, '.plus'))
  
  return(plus)
  
}

# Data preprocessing

data <- mx.symbol.Variable('data')

bn1_data <- mx.symbol.BatchNorm(data = data, axis = 2, eps = 1e-5, fix.gamma = TRUE, name = 'bn1_data')

conv_data <- mx.symbol.Convolution(data = bn1_data, kernel = c(11, 1), pad = c(65, 0), stride = c(2, 1), num_filter = 16, name = 'conv_data')
bn2_data <- mx.symbol.BatchNorm(data = conv_data, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = 'bn2_data')
relu_data <- mx.symbol.Activation(data = bn2_data, act_type = "relu", name = 'relu_data')

# Loop architecture

k_seq <- c(16, 32, 64, 128, 256, 512)
n_res <- c(3, 3, 6, 6, 3, 3)

res_output <- relu_data

for (i in 1:length(k_seq)) {
  
  pool_output <- pool_module(indata = res_output, k = k_seq[i], stage = i)
  res_output <- pool_output
  for (j in 1:n_res[i]) {res_output <- res_module(indata = res_output, k = k_seq[i] * 2, stage = i, loop = j)}
  
}

final_pool <- mx.symbol.mean(data = res_output, axis = 3, keepdims = FALSE, name = 'final_pool')

feature_list <- mx.symbol.SliceChannel(data = final_pool, num.outputs = 12, axis = 2, squeeze.axis = FALSE, name = 'feature_list')
sigmoid_list <- list()
attention_list <- list()

for (lead in 1:12) {
  
  lead_pred <- mx.symbol.FullyConnected(data = feature_list[[lead]], num.hidden = 1, name = paste0('lead', lead, '_pred'))
  sigmoid_list[[lead]] <- mx.symbol.sigmoid(data = lead_pred, name = paste0('lead', lead, '_sigmoid'))
  
  lead_att1 <- mx.symbol.FullyConnected(data = feature_list[[lead]], num.hidden = 8, no.bias = TRUE, name = paste0('lead', lead, '_att1'))
  lead_bn <- mx.symbol.BatchNorm(data = lead_att1, axis = 1, eps = 1e-5, fix.gamma = FALSE, name = paste0('lead', lead, '_bn'))
  lead_relu <- mx.symbol.Activation(data = lead_bn, act_type = "relu", name = paste0('lead', lead, '_relu'))  
  attention_list[[lead]] <- mx.symbol.FullyConnected(data = lead_relu, num.hidden   = 1, name = paste0('lead', lead, '_att2'))

}

pred_concat <- mx.symbol.concat(data = sigmoid_list, num.args = 12, dim = 1, name = 'pred_concat')
attention_concat <- mx.symbol.concat(data = attention_list, num.args = 12, dim = 1, name = 'attention_concat')
attention_score <- mx.symbol.softmax(data = attention_concat, axis = 1, name = 'attention_score')

weighted_pred <- mx.symbol.broadcast_mul(pred_concat, attention_score, name = 'weighted_pred')
final_pred <- mx.symbol.sum(weighted_pred, axis = 1, keepdims = TRUE, name = 'final_pred')

# Loss

label <- mx.symbol.Variable(name = 'label')

eps <- 1e-8
ce_loss_pos <- mx.symbol.broadcast_mul(mx.symbol.log(final_pred + eps), label)
ce_loss_neg <- mx.symbol.broadcast_mul(mx.symbol.log(1 - final_pred + eps), 1 - label)
ce_loss_mean <- 0 - mx.symbol.mean(ce_loss_pos + ce_loss_neg)
ce_loss <- mx.symbol.MakeLoss(ce_loss_mean, name = 'ce_loss')

再讓我們訓練一次：

my.eval.metric.loss <- mx.metric.custom(
  name = "my-loss", 
  function(real, pred) {
    return(as.array(pred))
  }
)

mx.set.seed(0)

my_model <- mx.model.FeedForward.create(symbol = ce_loss, X = my_iter, optimizer = my_optimizer,
                                        array.batch.size = 16, num.round = 30, ctx = mx.gpu(0),
                                        eval.metric = my.eval.metric.loss)

你應該有注意到，新的大網路loss降得非常快，這是因為我們的網路有解決梯度消失問題。

– 預測的所有的語法幾乎一樣，最終我們將能獲得一個0.9452的AUC，這比單純用「心電圖參數」好的多。

my_model$symbol <- final_pred

test_list <- list()

for (i in 1:nrow(test_dat)) {
  ecg_matrix <- fread(paste0(data_dir, test_dat[i,'UID'], '.csv'), data.table = FALSE)
  test_list[[i]] <- ecg_matrix
}

test_array <- abind(test_list, along = 3)
dim(test_array) <- c(5000, 12, 1, dim(test_array)[3])

pred_test <- predict(model = my_model, X = test_array, ctx = mx.gpu(0))
submit_dat[,'p_LVD'] <- pred_test[1,]
fwrite(submit_dat, file = 'my_submission.csv', na = '', row.names = FALSE, quote = FALSE)

請你再試試看多增加一些資料擴增的手段和多階段訓練的方式，看能不能增強模型準確度吧！