Lesson 3 胸部X光左心室功能障礙分類挑戰

下載資料

請你先到資料科學競賽網站選擇第三個任務，登入後下載4份資料，包含之前任務沒有的「supplement.zip」，裡面放著每個病人的胸部X光影像。

– 與之前所有任務相同，這個任務的資料下載是需要經過申請的，請你找助教申請帳號。

讀取資料

最開始我們還是需要先將「train.csv」、「test.csv」以及「sample_submission.csv」讀進來：

library(data.table)

train_dat <- fread('train.csv', data.table = FALSE)
test_dat <- fread('test.csv', data.table = FALSE)
submit_dat <- fread('sample_submission.csv', data.table = FALSE)

如果你是使用伺服器版的Rstudio，那直接上傳上傳「supplement.zip」就能得到資料夾「image」了。或是你可以先解壓縮放到你的project的資料夾下

F01

先讓我們將第一張圖片讀進來看看長成什麼樣子，我們可以採用自己寫的函數

library(OpenImageR)

Show_img <- function (img, plot_xlim = c(0.04, 0.96), plot_ylim = c(0.96, 0.04)) {
  
  par(mar = rep(0, 4))
  plot(NA, xlim = plot_xlim, ylim = plot_ylim, xaxt = "n", yaxt = "n", bty = "n")
  img = (img - min(img))/(max(img) - min(img))
  img = as.raster(img)
  rasterImage(img, 0, 1, 1, 0, interpolate=FALSE)
  
}

img_path <- paste0('image/', train_dat[1,'UID'], '.jpeg')
img <- readImage(img_path)
Show_img(img)

我們可能需要先進行資料前處理，我們將jpeg檔案讀進來並將他resize成短邊大小為256，語法如下：

image_dir <- 'image/'
processed_dir <- 'processed/'

img_paths <- list.files(image_dir)

pb <- txtProgressBar(max = length(img_paths), style = 3)

for (i in 1:length(img_paths)) {
  
  current_img_path <- paste0(image_dir, img_paths[i])
  current_prc_path <- paste0(processed_dir, gsub('\\.jpeg$', '.RData', img_paths[i]))
  
  img <- readImage(current_img_path)
  
  target_size <- c(256, 256)
  targer_fold <- dim(img)[1] / dim(img)[2]
  if (targer_fold > 1) {target_size[1] <- target_size[1] * targer_fold} else {target_size[2] <- target_size[2] / targer_fold}
  resize_img <- resizeImage(img, width = target_size[1], height = target_size[2], method = 'bilinear')
    
  save(resize_img, file = current_prc_path)
  
  setTxtProgressBar(pb, i)
  
}

close(pb)

卷積神經網路

卷積神經網路(Convolutional neural network)是進行圖像分析時最好用的工具，我們在這裡提供了一個已經預訓練好的模型列表

– 這個範例使用「resnet-50」進行，這是一個50層深的ResNet，非常標準的深度神經網路。我們要建構一個新的神經網路將最後一層的FC從分1000類轉變成分2類：

library(magrittr)
library(mxnet)

res_model <- mx.model.load(prefix = "resnet-50", iteration = 0)
all_layers <- res_model$symbol$get.internals()
flatten0_output <- which(all_layers$outputs == 'flatten0_output') %>% all_layers$get.output()

fc1 <- mx.symbol.FullyConnected(data = flatten0_output, num_hidden = 2, name = 'fc1')
softmax <- mx.symbol.SoftmaxOutput(data = fc1, name = 'softmax')

產生了新的深度神經網路後，我們僅使用圖片中間做為輸入(這是為了避免縮放)，並將他們合併成一個大陣列：

processed_dir <- 'processed/'

train_y_array <- model.matrix(~ -1 + factor(train_dat[,'LVD'])) %>% t()
train_img_array <-  array(0, dim = c(256, 256, 3, nrow(train_dat)))

for (i in 1:nrow(train_dat)) {
  
  if (i == 1) {t0 <- Sys.time()}
  
  current_prc_path <- paste0(processed_dir, train_dat[i, 'UID'], '.RData')
  load(current_prc_path)
  
  row_select <- floor((dim(resize_img)[1] - 256) / 2) + 1:256
  col_select <- floor((dim(resize_img)[2] - 256) / 2) + 1:256
  train_img_array[,,,i] <- resize_img[row_select,col_select,]
  
  if (i %% 100 == 0) {
    
    batch_time <- as.double(difftime(Sys.time(), t0, units = "secs"))
    batch_speed <- i / batch_time
    estimated_mins <- (nrow(train_dat) - i) / batch_speed / 60
    
    message(i, "/", nrow(train_dat), ": Speed: ", formatC(batch_speed, 2, format = "f"),
            " samples/sec | Remaining time: ", formatC(estimated_mins, 1, format = "f"), " mins")
    
  }
  
}

接著，我們在開始訓練之前需要取得模型權重參數，我們可以將最後一層以外的部分填入resnet-50的參數，並以這為基礎開始訓練任務：

new_arg <- mxnet:::mx.model.init.params(symbol = softmax,
                                        input.shape = list(data = c(224, 224, 3, 32)),
                                        output.shape = NULL,
                                        initializer = mxnet:::mx.init.uniform(0.01),
                                        ctx = mx.cpu())

for (i in 1:length(new_arg$arg.params)) {
  pos <- which(names(res_model$arg.params) == names(new_arg$arg.params)[i])
  if (length(pos) == 1) {
    if (all.equal(dim(res_model$arg.params[[pos]]), dim(new_arg$arg.params[[i]])) == TRUE) {
      new_arg$arg.params[[i]] <- res_model$arg.params[[pos]]
    }
  }
}

for (i in 1:length(new_arg$aux.params)) {
  pos <- which(names(res_model$aux.params) == names(new_arg$aux.params)[i])
  if (length(pos) == 1) {
    if (all.equal(dim(res_model$aux.params[[pos]]), dim(new_arg$aux.params[[i]])) == TRUE) {
      new_arg$aux.params[[i]] <- res_model$aux.params[[pos]]
    }
  }
}

可以開始訓練了，如果你的電腦沒有GPU，請你將「mx.gpu()」改成「mx.cpu()」

my.eval.metric.mlogloss <- mx.metric.custom(
  name = "m-logloss", 
  function(real, pred) {
    real1 = as.numeric(as.array(real))
    pred1 = as.numeric(as.array(pred))
    pred1[pred1 <= 1e-6] = 1e-6
    pred1[pred1 >= 1 - 1e-6] = 1 - 1e-6
    return(-mean(real1 * log(pred1), na.rm = TRUE))
  }
)

mx.set.seed(0)

my_model <- mx.model.FeedForward.create(symbol = softmax,
                                       X = train_img_array, y = train_y_array,
                                       optimizer = "sgd", learning.rate = 0.001, momentum = 0.9,
                                       array.batch.size = 16, num.round = 20,
                                       arg.params = new_arg$arg.params, aux.params = new_arg$aux.params,
                                       ctx = mx.gpu(1),
                                       eval.metric = my.eval.metric.mlogloss)

現在，我們能直接用這個網路來預測，首先先產生大陣列：

processed_dir <- 'processed/'
test_img_array <-  array(0, dim = c(256, 256, 3, nrow(test_dat)))

for (i in 1:nrow(test_dat)) {
  
  if (i == 1) {t0 <- Sys.time()}
  
  current_prc_path <- paste0(processed_dir, test_dat[i, 'UID'], '.RData')
  load(current_prc_path)
  
  row_select <- floor((dim(resize_img)[1] - 256) / 2) + 1:256
  col_select <- floor((dim(resize_img)[2] - 256) / 2) + 1:256
  test_img_array[,,,i] <- resize_img[row_select,col_select,]
  
  if (i %% 100 == 0) {
    
    batch_time <- as.double(difftime(Sys.time(), t0, units = "secs"))
    batch_speed <- i / batch_time
    estimated_mins <- (nrow(test_dat) - i) / batch_speed / 60
    
    message(i, "/", nrow(test_dat), ": Speed: ", formatC(batch_speed, 2, format = "f"),
            " samples/sec | Remaining time: ", formatC(estimated_mins, 1, format = "f"), " mins")
    
  }
  
}

讓我們直接進行預測，最簡單的神經網路能幫助我們能夠取得很不錯的成績(C-index = 0.7256)

pred_test <-  predict(model = my_model, X = test_img_array, ctx = mx.gpu(1))
submit_dat[,'p_LVD'] <- pred_test[2,]
fwrite(submit_dat, file = 'my_submission.csv', na = '', row.names = FALSE, quote = FALSE)

資料擴增及交叉驗證

如果你想進一步提升模型準確度，你會發現有很多值得改進的地方：

樣本中LVD的病人偏少，好像沒有做oversampling
我們都是使用圖形的中央，雖然應該是可以的，畢竟圖的中央應該都看得到心臟，但全圖會不會有更多資訊
我們其實不知道訓練到什麼時間該結束，應該要有驗證組給我們提示
呈上，如果切割一個驗證組會讓我們的樣本變的很少，我們可能會想要用5-fold cross-validation來幫助我們
學習率該設多少?這個很難決定。

為了完成上面的工作，我們需要重新設計整個流程，而且「一定」要寫底層執行器的語法

– 先讓我們將原始樣本分成5個組別：

set.seed(0)

train_dat[,'fold'] <- sample(1:5, size = nrow(train_dat), replace = TRUE)

再寫一個圖像生成器:

library(abind)

cxr_process_core <- function(img_paths, img_crop = 224, VALID_CROP = NULL,
                             col_flip = TRUE, add_sd = 0.05, mul_sd = 0.1, pow_sd = 0.1) {
  
  batch_IMG_LIST <- list()
  
  for (i in 1:length(img_paths)) {
    
    load(img_paths[i])
    
    row_df <- dim(resize_img)[1] - img_crop
    col_df <- dim(resize_img)[2] - img_crop
    
    if (is.null(VALID_CROP)) {
      
      random.row <- sample(0:row_df, 1)
      random.col <- sample(0:col_df, 1)
      
      batch_IMG_LIST[[i]] <- resize_img[random.row+1:img_crop,random.col+1:img_crop,,drop=FALSE]
      
    } else {
      
      if (VALID_CROP %in% 1:10) {
        
        if (VALID_CROP %in% c(1, 3, 6, 8)) {
          
          fixed.row <- 0
          
        } else if (VALID_CROP %in% c(2, 4, 7, 9)) {
          
          fixed.row <- row_df
          
        } else if (VALID_CROP %in% c(5, 10)) {
          
          fixed.row <- floor(row_df / 2)
          
        }
        
        if (VALID_CROP %in% c(1, 2, 6, 7)) {
          
          fixed.col <- 0
          
        } else if (VALID_CROP %in% c(3, 4, 8, 9)) {
          
          fixed.col <- col_df
          
        } else if (VALID_CROP %in% c(5, 10)) {
          
          fixed.col <- floor(col_df / 2)
          
        }
        
        if (VALID_CROP %in% c(1:5)) {
          
          fixed.flip <- FALSE
          
        } else {
          
          fixed.flip <- TRUE
          
        }
        
        batch_IMG_LIST[[i]] <- resize_img[fixed.row+1:img_crop,fixed.col+1:img_crop,,drop=FALSE]
        
      } else {
        
        stop('VALID_CROP should be ranged 1 to 10!')
        
      }
      
    }
    
  }
  
  batch_img_array <- abind(batch_IMG_LIST, along = 4)

  if (!is.null(VALID_CROP)) {
    
    if (fixed.flip) {
      
      batch_img_array <- batch_img_array[,dim(batch_img_array)[2]:1,,,drop = FALSE]
      
    }
    
    batch_img_array <- mx.nd.array(batch_img_array)
    
  } else {
    
    if (col_flip) {
      
      if (sample(c(TRUE, FALSE), 1)) {
        
        batch_img_array <- batch_img_array[,dim(batch_img_array)[2]:1,,,drop = FALSE]
        
      }
      
    }
    
    batch_img_array <- mx.nd.array(batch_img_array)
    
    if (add_sd != 0) {
      
      add_item <- mx.nd.array(array(runif(length(img_paths) * 1, min = -add_sd, max = add_sd), dim = c(1, 1, 1, length(img_paths))))
      batch_img_array <- mx.nd.broadcast.add(batch_img_array, add_item)
      
    }
    
    if (mul_sd != 0) {
      
      mul_item <- mx.nd.array(array(exp(runif(length(img_paths) * 1, min = -mul_sd, max = mul_sd)), dim = c(1, 1, 1, length(img_paths))))
      batch_img_array <- mx.nd.broadcast.mul(batch_img_array, mul_item)
      
    }
    
    if (pow_sd != 0) {
      
      pow_item <- mx.nd.array(array(exp(runif(length(img_paths) * 1, min = -pow_sd, max = pow_sd)), dim = c(1, 1, 1, length(img_paths))))
      batch_img_array <- mx.nd.broadcast.power(batch_img_array, pow_item)
      
    }
    
    batch_img_array <- mx.nd.broadcast.maximum(batch_img_array, mx.nd.array(array(0, dim = c(1, 1, 1, 1))))
    batch_img_array <- mx.nd.broadcast.minimum(batch_img_array, mx.nd.array(array(1, dim = c(1, 1, 1, 1))))
    
  }
  
  return(batch_img_array)
  
}

跟剛剛一樣的Model architecture，但我們需要自己寫loss:

library(magrittr)
library(mxnet)

res_model <- mx.model.load(prefix = "resnet-50", iteration = 0)
all_layers <- res_model$symbol$get.internals()
flatten0_output <- which(all_layers$outputs == 'flatten0_output') %>% all_layers$get.output()

fc1 <- mx.symbol.FullyConnected(data = flatten0_output, num_hidden = 1, name = 'fc1')
logistic_pred <- mx.symbol.sigmoid(data = fc1, name = 'logistic_pred')

label_sym <- mx.symbol.Variable('label')

eps <- 1e-8
ce_loss_pos <- mx.symbol.broadcast_mul(mx.symbol.log(logistic_pred + eps), label_sym)
ce_loss_neg <- mx.symbol.broadcast_mul(mx.symbol.log(1 - logistic_pred + eps), 1 - label_sym)
ce_loss_mean <- 0 - mx.symbol.mean(ce_loss_pos + ce_loss_neg)
loss_symbol <- mx.symbol.MakeLoss(ce_loss_mean, name = 'ce_loss')

開始訓練，這個語法有點複雜，畢竟我們要做5次結果：

library(pROC)

processed_dir <- 'processed/'
batch_size <- 32
valid_type <- 1:10

# executor

my_executor <- mx.simple.bind(symbol = loss_symbol,
                              data = c(224, 224, 3, batch_size), label = c(1, batch_size),
                              ctx = mx.gpu(1), grad.req = "write")

pred_executor <- mx.simple.bind(symbol = logistic_pred,
                                data = c(224, 224, 3, batch_size), 
                                ctx = mx.gpu(1))

for (fold in 1:5) {
  
  if (!dir.exists(paste0('model/fold', fold, '/'))) {dir.create(paste0('model/fold', fold, '/'), recursive = TRUE)}
  
  # Subsets
  
  train_subdat <- train_dat[train_dat[,'fold'] != fold,]
  valid_subdat <- train_dat[train_dat[,'fold'] == fold,]
  
  # Initailize
  
  mx.set.seed(0)
  new_arg <- mxnet:::mx.model.init.params(symbol = loss_symbol,
                                          input.shape = list(data = c(224, 224, 3, batch_size), label = c(1, batch_size)),
                                          output.shape = NULL,
                                          initializer = mxnet:::mx.init.uniform(0.01),
                                          ctx = mx.cpu())
  
  for (i in 1:length(new_arg$arg.params)) {
    pos <- which(names(res_model$arg.params) == names(new_arg$arg.params)[i])
    if (length(pos) == 1) {
      if (all.equal(dim(res_model$arg.params[[pos]]), dim(new_arg$arg.params[[i]])) == TRUE) {
        new_arg$arg.params[[i]] <- res_model$arg.params[[pos]]
      }
    }
  }
  
  for (i in 1:length(new_arg$aux.params)) {
    pos <- which(names(res_model$aux.params) == names(new_arg$aux.params)[i])
    if (length(pos) == 1) {
      if (all.equal(dim(res_model$aux.params[[pos]]), dim(new_arg$aux.params[[i]])) == TRUE) {
        new_arg$aux.params[[i]] <- res_model$aux.params[[pos]]
      }
    }
  }
  
  mx.exec.update.arg.arrays(my_executor, new_arg$arg.params, match.name = TRUE)
  mx.exec.update.aux.arrays(my_executor, new_arg$aux.params, match.name = TRUE)
  
  # Multi-LR strategy
  
  current_epoch <- 0L
  train_loss <- NULL
  valid_auc <- NULL

  t0 <- Sys.time()
  
  for (lr in c(1e-3, 1e-4, 1e-5)) {
    
    my_optimizer <- mx.opt.create(name = "adam", learning.rate = lr, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-08, wd = 1e-3)
    my_updater <- mx.opt.get.updater(optimizer = my_optimizer, weights = my_executor$ref.arg.arrays)
    
    # Training
    
    for (sub_epoch in 1:50) {
      
      current_epoch <- current_epoch + 1L
      batch_loss <- NULL

      for (batch in 1:50) {
        
        positive_id <- sample(which(train_subdat[,'LVD'] == 1), size = batch_size / 2)
        negative_id <- sample(which(train_subdat[,'LVD'] == 0), size = batch_size / 2)
        used_img_paths <- paste0(processed_dir, train_subdat[c(positive_id, negative_id),'UID'], '.RData')
        
        current_data <- cxr_process_core(img_paths = used_img_paths)
        current_label <- mx.nd.array(array(rep(1:0, each = batch_size / 2), dim = c(1, batch_size)))
        
        mx.exec.update.arg.arrays(my_executor, arg.arrays = list(data = current_data, label = current_label), match.name = TRUE)
        mx.exec.forward(my_executor, is.train = TRUE)
        mx.exec.backward(my_executor)
        update_args <- my_updater(weight = my_executor$ref.arg.arrays, grad = my_executor$ref.grad.arrays)
        mx.exec.update.arg.arrays(my_executor, update_args, skip.null = TRUE)
        batch_loss <- c(batch_loss, as.array(my_executor$ref.outputs$ce_loss_output))

      }
      
      # Overall
      
      epoch_time <- as.double(difftime(Sys.time(), t0, units = "mins"))
      epoch_speed <- epoch_time / current_epoch
      train_loss <- c(train_loss, mean(batch_loss))
      
      message("Fold [", fold, "] lr = ", lr, "] Epoch [", current_epoch, "] Speed: ", formatC(epoch_speed, 2, format = "f"),
              " min/epoch loss = ", formatC(mean(batch_loss), format = "f", 4))
      
      # Save model
      
      pred_model <- mxnet:::mx.model.extract.model(symbol = logistic_pred, train.execs = list(my_executor))
      mx.model.save(model = pred_model, prefix = paste0('model/fold', fold, '/CXR2LVD'), iteration = current_epoch)
      
      # Validation process
      
      mx.exec.update.arg.arrays(pred_executor, pred_model$arg.params, match.name = TRUE)
      mx.exec.update.aux.arrays(pred_executor, pred_model$aux.params, match.name = TRUE)
      
      current_n_batch <- ceiling(nrow(valid_subdat) / batch_size)
      
      for (k in 1:current_n_batch) {
        
        sample_idx <- 1:batch_size + (k - 1) * batch_size
        sample_idx[sample_idx > nrow(valid_subdat)] <- nrow(valid_subdat)
        used_img_paths <- paste0(processed_dir, valid_subdat[sample_idx,'UID'], '.RData')

        for (l in 1:length(valid_type)) {
          
          BATCH_CXR_ARRAY <- cxr_process_core(img_paths = used_img_paths, VALID_CROP = valid_type[l],
                                              col_flip = FALSE, add_sd = 0, mul_sd = 0, pow_sd = 0)
          
          mx.exec.update.arg.arrays(pred_executor, arg.arrays = list(data = BATCH_CXR_ARRAY), match.name = TRUE)
          mx.exec.forward(pred_executor, is.train = FALSE)
          
          current_pred <- as.array(pred_executor[['ref.outputs']][[1]])
          valid_subdat[sample_idx,paste0('pred.', l)] <- current_pred %>% as.numeric()
          
        }
        
      }
      
      valid_subdat[,'final_pred'] <- apply(valid_subdat[,paste0('pred.', 1:length(valid_type))], 1, mean)
      roc_valid <- roc(LVD ~ final_pred, data = valid_subdat)
      valid_auc <- c(valid_auc, roc_valid[['auc']])

      message("valid-auc = ", formatC(roc_valid[['auc']], format = "f", 4))
      
      save(train_loss, valid_auc, file = paste0('model/fold', fold, '/logger.RData'))
      
      if (which.max(valid_auc) < current_epoch) {break}
      
    }
    
  }
  
}

讓我們把每一代最佳的結果拿出來，我們必須「標準化」每一代的預測結果，從而得到5組關鍵參數：

pred_executor <- mx.simple.bind(symbol = logistic_pred,
                                data = c(224, 224, 3, batch_size), 
                                ctx = mx.gpu(1))

for (fold in 1:5) {
  
  # Subsets
  
  train_subdat <- train_dat[train_dat[,'fold'] != fold,]
  valid_subdat <- train_dat[train_dat[,'fold'] == fold,]
  
  # Load logger
  
  load(paste0('model/fold', fold, '/logger.RData'))
  
  best_iter <- which.max(valid_auc)
  pred_model <- mx.model.load(prefix = paste0('model/fold', fold, '/CXR2LVD'), iteration = best_iter)
  
  # Validation process
  
  mx.exec.update.arg.arrays(pred_executor, pred_model$arg.params, match.name = TRUE)
  mx.exec.update.aux.arrays(pred_executor, pred_model$aux.params, match.name = TRUE)
  
  current_n_batch <- ceiling(nrow(valid_subdat) / batch_size)
  
  for (k in 1:current_n_batch) {
    
    sample_idx <- 1:batch_size + (k - 1) * batch_size
    sample_idx[sample_idx > nrow(valid_subdat)] <- nrow(valid_subdat)
    used_img_paths <- paste0(processed_dir, valid_subdat[sample_idx,'UID'], '.RData')
    
    for (l in 1:length(valid_type)) {
      
      BATCH_CXR_ARRAY <- cxr_process_core(img_paths = used_img_paths, VALID_CROP = valid_type[l],
                                          col_flip = FALSE, add_sd = 0, mul_sd = 0, pow_sd = 0)
      
      mx.exec.update.arg.arrays(pred_executor, arg.arrays = list(data = BATCH_CXR_ARRAY), match.name = TRUE)
      mx.exec.forward(pred_executor, is.train = FALSE)
      
      current_pred <- as.array(pred_executor[['ref.outputs']][[1]])
      valid_subdat[sample_idx,paste0('pred.', l)] <- current_pred %>% as.numeric()
      
    }
    
  }
  
  valid_subdat[,'final_pred'] <- apply(valid_subdat[,paste0('pred.', 1:length(valid_type))], 1, mean)
  
  # Standardized
  
  valid_subdat[,'final_pred'] <- log(valid_subdat[,'final_pred'] / (1 - valid_subdat[,'final_pred']))
  valid_subdat[valid_subdat[,'final_pred'] < -10,'final_pred'] <- -10
  valid_subdat[valid_subdat[,'final_pred'] > 10,'final_pred'] <- 10
  
  roc_valid <- roc(LVD ~ final_pred, data = valid_subdat)
  best_pos <- which.max(roc_valid$sensitivities + roc_valid$specificities)
  offset_coef <- roc_valid$thresholds[best_pos]
  zoom_coef <- sd(valid_subdat[,'final_pred'], na.rm = TRUE)
  
  save(best_iter, offset_coef, zoom_coef, file = paste0('model/fold', fold, '/key_index.RData'))
  
}

現在，我們能直接用這個網路來在測試組上預測5次：

for (fold in 1:5) {
  
  # Load key_index
  
  load(paste0('model/fold', fold, '/key_index.RData'))
  pred_model <- mx.model.load(prefix = paste0('model/fold', fold, '/CXR2LVD'), iteration = best_iter)
  
  # Validation process
  
  mx.exec.update.arg.arrays(pred_executor, pred_model$arg.params, match.name = TRUE)
  mx.exec.update.aux.arrays(pred_executor, pred_model$aux.params, match.name = TRUE)
  
  current_n_batch <- ceiling(nrow(test_dat) / batch_size)
  
  pb <- txtProgressBar(max = current_n_batch, style = 3)
  
  for (k in 1:current_n_batch) {
    
    sample_idx <- 1:batch_size + (k - 1) * batch_size
    sample_idx[sample_idx > nrow(test_dat)] <- nrow(test_dat)
    used_img_paths <- paste0(processed_dir, test_dat[sample_idx,'UID'], '.RData')
    
    for (l in 1:length(valid_type)) {
      
      BATCH_CXR_ARRAY <- cxr_process_core(img_paths = used_img_paths, VALID_CROP = valid_type[l],
                                          col_flip = FALSE, add_sd = 0, mul_sd = 0, pow_sd = 0)
      
      mx.exec.update.arg.arrays(pred_executor, arg.arrays = list(data = BATCH_CXR_ARRAY), match.name = TRUE)
      mx.exec.forward(pred_executor, is.train = FALSE)
      
      current_pred <- as.array(pred_executor[['ref.outputs']][[1]])
      test_dat[sample_idx,paste0('pred.', l)] <- current_pred %>% as.numeric()
      
    }
    
    setTxtProgressBar(pb, k)
    
  }
  
  close(pb)
  
  test_dat[,'final_pred'] <- apply(test_dat[,paste0('pred.', 1:length(valid_type))], 1, mean)
  
  # Standardized
  
  test_dat[,'final_pred'] <- log(test_dat[,'final_pred'] / (1 - test_dat[,'final_pred']))
  test_dat[test_dat[,'final_pred'] < -10,'final_pred'] <- -10
  test_dat[test_dat[,'final_pred'] > 10,'final_pred'] <- 10
  
  # Build fold-specific prediction
  
  test_dat[,paste0('fold', fold)] <- (test_dat[,'final_pred'] - offset_coef) / zoom_coef
  
}

將5組預測值平均起來送出，我們最終獲得一個大幅提升的成績(C-index = 0.8632)

submit_dat[,'p_LVD'] <- apply(test_dat[,paste0('fold', 1:5)], 1, mean)
fwrite(submit_dat, file = 'my_submission.csv', na = '', row.names = FALSE, quote = FALSE)

體驗到資料擴增的威力後，試著再多調整一些參數及納入更多資料擴增方式吧！