– 與之前所有任務相同,這個任務的資料下載是需要經過申請的,請你找助教申請帳號。
library(data.table)
train_dat <- fread('train.csv', data.table = FALSE)
test_dat <- fread('test.csv', data.table = FALSE)
submit_dat <- fread('sample_submission.csv', data.table = FALSE)
如果你是使用伺服器版的Rstudio,那直接上傳上傳「supplement.zip」就能得到資料夾「image」了。或是你可以先解壓縮放到你的project的資料夾下
我們可能需要先進行資料前處理,我們將jpeg檔案讀進來並將他resize成短邊大小為256,語法如下:
image_dir <- 'image/'
processed_dir <- 'processed/'
img_paths <- list.files(image_dir)
pb <- txtProgressBar(max = length(img_paths), style = 3)
for (i in 1:length(img_paths)) {
current_img_path <- paste0(image_dir, img_paths[i])
current_prc_path <- paste0(processed_dir, gsub('\\.jpeg$', '.RData', img_paths[i]))
img <- readImage(current_img_path)
target_size <- c(256, 256)
targer_fold <- dim(img)[1] / dim(img)[2]
if (targer_fold > 1) {target_size[1] <- target_size[1] * targer_fold} else {target_size[2] <- target_size[2] / targer_fold}
resize_img <- resizeImage(img, width = target_size[1], height = target_size[2], method = 'bilinear')
save(resize_img, file = current_prc_path)
setTxtProgressBar(pb, i)
}
close(pb)
– 這個範例使用「resnet-50」進行,這是一個50層深的ResNet,非常標準的深度神經網路。我們要建構一個新的神經網路,輸出最後一層的特徵。
library(magrittr)
library(mxnet)
res_model <- mx.model.load(prefix = "resnet-50", iteration = 0)
all_layers <- res_model$symbol$get.internals()
flatten0_output <- which(all_layers$outputs == 'flatten0_output') %>% all_layers$get.output()
included_items <- mx.symbol.infer.shape(flatten0_output, data = c(256, 256, 3, 32))
res_model$symbol <- flatten0_output
res_model$arg.params <- res_model$arg.params[names(res_model$arg.params) %in% names(included_items$arg.shapes)]
res_model$aux.params <- res_model$aux.params[names(res_model$aux.params) %in% names(included_items$aux.shapes)]
– 如果你的電腦沒有GPU,請你將「mx.gpu()」改成「mx.cpu()」
library(OpenImageR)
processed_dir <- 'processed/'
batch_size <- 32
train_feature_mat <- matrix(0, nrow = nrow(train_dat), ncol = included_items$out.shapes$flatten0_output[1])
for (i in 1:nrow(train_dat)) {
if (i == 1) {t0 <- Sys.time()}
if (i %% batch_size == 1) {
start_pos <- i
img_array <- array(0, dim = c(256, 256, 3, min(batch_size, nrow(train_dat) - i + 1)))
}
array_pos <- i %% batch_size
if (array_pos == 0) {array_pos <- batch_size}
current_prc_path <- paste0(processed_dir, train_dat[i, 'UID'], '.RData')
load(current_prc_path)
row_select <- floor((dim(resize_img)[1] - 256) / 2) + 1:256
col_select <- floor((dim(resize_img)[2] - 256) / 2) + 1:256
img_array[,,,array_pos] <- resize_img[row_select,col_select,]
if (i %% batch_size == 0 | i == nrow(train_dat)) {
img_feature <- predict(model = res_model, X = img_array, ctx = mx.gpu(0))
train_feature_mat[start_pos:i,] <- img_feature %>% t()
batch_time <- as.double(difftime(Sys.time(), t0, units = "secs"))
batch_speed <- i / batch_time
estimated_mins <- (nrow(train_dat) - i) / batch_speed / 60
message(i, "/", nrow(train_dat), ": Speed: ", formatC(batch_speed, 2, format = "f"),
" samples/sec | Remaining time: ", formatC(estimated_mins, 1, format = "f"), " mins")
}
}
– 由於我們沒有學過「非線性」的機器學習方法來處理「存活分析」任務,所以我們還是使用極限梯度提升機,讓我們改預測「1年內是否死亡」,這樣會造成我們損失了333個樣本。
library(xgboost)
train_dat[,'y'] <- train_dat[,'death']
train_dat[train_dat[,'time'] > 365,'y'] <- 0
train_dat[train_dat[,'death'] %in% 0 & train_dat[,'time'] < 365,'y'] <- NA
train_demo_mat <- model.matrix(~ ., data = train_dat[,c('GENDER', 'AGE')])
set.seed(0)
usable_idx <- which(!is.na(train_dat[,'y']))
train_idx <- sample(usable_idx, sum(!is.na(train_dat[,'y'])) * 0.8)
valid_idx <- usable_idx[!usable_idx %in% train_idx]
xgb.data_train <- xgb.DMatrix(data = cbind(train_demo_mat[train_idx,-1], train_feature_mat[train_idx,]),
label = train_dat[train_idx, 'y'])
xgb.data_valid <- xgb.DMatrix(data = cbind(train_demo_mat[valid_idx,-1], train_feature_mat[valid_idx,]),
label = train_dat[valid_idx, 'y'])
xgb_fit <- xgb.train(data = xgb.data_train, watchlist = list(eval = xgb.data_valid),
early_stopping_rounds = 10, eval_metric = 'auc', verbose = TRUE,
nthread = 2, nrounds = 200, objective = "binary:logistic")
processed_dir <- 'processed/'
batch_size <- 32
test_feature_mat <- matrix(0, nrow = nrow(test_dat), ncol = included_items$out.shapes$flatten0_output[1])
for (i in 1:nrow(test_dat)) {
if (i == 1) {t0 <- Sys.time()}
if (i %% batch_size == 1) {
start_pos <- i
img_array <- array(0, dim = c(256, 256, 3, min(batch_size, nrow(test_dat) - i + 1)))
}
array_pos <- i %% batch_size
if (array_pos == 0) {array_pos <- batch_size}
current_prc_path <- paste0(processed_dir, test_dat[i, 'UID'], '.RData')
load(current_prc_path)
row_select <- floor((dim(resize_img)[1] - 256) / 2) + 1:256
col_select <- floor((dim(resize_img)[2] - 256) / 2) + 1:256
img_array[,,,array_pos] <- resize_img[row_select,col_select,]
if (i %% batch_size == 0 | i == nrow(test_dat)) {
img_feature <- predict(model = res_model, X = img_array, ctx = mx.gpu(0))
test_feature_mat[start_pos:i,] <- img_feature %>% t()
batch_time <- as.double(difftime(Sys.time(), t0, units = "secs"))
batch_speed <- i / batch_time
estimated_mins <- (nrow(test_dat) - i) / batch_speed / 60
message(i, "/", nrow(test_dat), ": Speed: ", formatC(batch_speed, 2, format = "f"),
" samples/sec | Remaining time: ", formatC(estimated_mins, 1, format = "f"), " mins")
}
}
test_demo_mat <- model.matrix(~ ., data = test_dat[,c('GENDER', 'AGE')])
test_X_mat <- cbind(test_demo_mat[,-1], test_feature_mat)
submit_dat[,'p_death'] <- predict(xgb_fit, test_X_mat)
fwrite(submit_dat, file = 'my_submission.csv', na = '', row.names = FALSE, quote = FALSE)
– 如果你偷懶,可以直接下載CXRsurvnet-symbol.json和CXRsurvnet-0000.params。
– 先讓我們寫一個Iterator,要特別注意我們怎樣進行\(y\)的encode:
library(abind)
processed_dir <- 'processed/'
survival_encode <- function (event, time) {
sub_dat <- data.frame(id = 1:length(event), event = event, time = time, stringsAsFactors = FALSE)
sub_dat <- sub_dat[order(sub_dat[,'event'], decreasing = TRUE),]
sub_dat <- sub_dat[order(sub_dat[,'time']),]
label <- array(0L, dim = rep(nrow(sub_dat), 2))
mask <- array(0L, dim = rep(nrow(sub_dat), 2))
diag(label) <- 1L
for (m in 1:nrow(sub_dat)) {
mask[m,m:nrow(sub_dat)] <- 1L
if (sub_dat[m,'event'] == 0) {
mask[m,] <- 0L
} else if (m > 1) {
zero_pos <- which(sub_dat[1:(m-1),'time'] == sub_dat[m,'time'])
if (length(zero_pos) > 0) {
mask[zero_pos,m] <- 0L
}
}
}
original_pos <- order(sub_dat[,'id'])
return(list(label = label[,original_pos,drop = FALSE], mask = mask[,original_pos,drop = FALSE]))
}
cxr_process_core <- function(img_paths, img_crop = 224, col_flip = TRUE, add_sd = 0.05, mul_sd = 0.1, pow_sd = 0.1) {
batch_IMG_LIST <- list()
for (i in 1:length(img_paths)) {
load(img_paths[i])
row_df <- dim(resize_img)[1] - img_crop
col_df <- dim(resize_img)[2] - img_crop
random.row <- sample(0:row_df, 1)
random.col <- sample(0:col_df, 1)
batch_IMG_LIST[[i]] <- resize_img[random.row+1:img_crop,random.col+1:img_crop,,drop=FALSE]
}
batch_img_array <- abind(batch_IMG_LIST, along = 4)
if (col_flip) {
if (sample(c(TRUE, FALSE), 1)) {
batch_img_array <- batch_img_array[,dim(batch_img_array)[2]:1,,,drop = FALSE]
}
}
batch_img_array <- mx.nd.array(batch_img_array)
if (add_sd != 0) {
add_item <- mx.nd.array(array(runif(length(img_paths) * 1, min = -add_sd, max = add_sd), dim = c(1, 1, 1, length(img_paths))))
batch_img_array <- mx.nd.broadcast.add(batch_img_array, add_item)
}
if (mul_sd != 0) {
mul_item <- mx.nd.array(array(exp(runif(length(img_paths) * 1, min = -mul_sd, max = mul_sd)), dim = c(1, 1, 1, length(img_paths))))
batch_img_array <- mx.nd.broadcast.mul(batch_img_array, mul_item)
}
if (pow_sd != 0) {
pow_item <- mx.nd.array(array(exp(runif(length(img_paths) * 1, min = -pow_sd, max = pow_sd)), dim = c(1, 1, 1, length(img_paths))))
batch_img_array <- mx.nd.broadcast.power(batch_img_array, pow_item)
}
batch_img_array <- mx.nd.broadcast.maximum(batch_img_array, mx.nd.array(array(0, dim = c(1, 1, 1, 1))))
batch_img_array <- mx.nd.broadcast.minimum(batch_img_array, mx.nd.array(array(1, dim = c(1, 1, 1, 1))))
return(batch_img_array)
}
my_iterator_core <- function(batch_size, img_crop = 224, col_flip = TRUE, add_sd = 0.05, mul_sd = 0.1, pow_sd = 0.1) {
batch <- 0
batch_per_epoch <- ceiling(nrow(train_dat) / batch_size)
reset <- function() {batch <<- 0}
iter.next <- function() {
batch <<- batch + 1
if (batch > batch_per_epoch) {return(FALSE)} else {return(TRUE)}
}
value <- function() {
idx <- sample(1:nrow(train_dat), batch_size)
current_paths <- paste0(processed_dir, train_dat[idx,'UID'], '.RData')
data <- cxr_process_core(img_paths = current_paths, img_crop = img_crop,
col_flip = col_flip, add_sd = add_sd, mul_sd = mul_sd, pow_sd = pow_sd)
label_list <- survival_encode(event = train_dat[idx,'death'], time = train_dat[idx,'time'])
return(list(data = data, label = mx.nd.array(label_list[['label']]), mask = mx.nd.array(label_list[['mask']])))
}
return(list(reset = reset, iter.next = iter.next, value = value, batch_size = batch_size, batch = batch))
}
my_iterator_func <- setRefClass("Custom_Iter",
fields = c("iter", "batch_size", "img_crop", "col_flip", "add_sd", "mul_sd", "pow_sd"),
contains = "Rcpp_MXArrayDataIter",
methods = list(
initialize = function(iter, batch_size = 32, img_crop = 224,
col_flip = TRUE, add_sd = 0.05, mul_sd = 0.1, pow_sd = 0.1) {
.self$iter <- my_iterator_core(batch_size = batch_size, img_crop = img_crop,
col_flip = col_flip, add_sd = add_sd,
mul_sd = mul_sd, pow_sd = pow_sd)
.self
},
value = function(){
.self$iter$value()
},
iter.next = function(){
.self$iter$iter.next()
},
reset = function(){
.self$iter$reset()
},
finalize=function(){
}
)
)
my_iter <- my_iterator_func(iter = NULL, batch_size = 32, img_crop = 224, col_flip = TRUE, add_sd = 0, mul_sd = 0, pow_sd = 0)
– 再寫一個Model architecture,需要特別注意最後一層要用BN:
library(mxnet)
res_model <- mx.model.load(prefix = "resnet-50", iteration = 0)
all_layers <- res_model$symbol$get.internals()
flatten0_output <- which(all_layers$outputs == 'flatten0_output') %>% all_layers$get.output()
included_items <- mx.symbol.infer.shape(flatten0_output, data = c(256, 256, 3, 32))
res_model$symbol <- flatten0_output
res_model$arg.params <- res_model$arg.params[names(res_model$arg.params) %in% names(included_items$arg.shapes)]
res_model$aux.params <- res_model$aux.params[names(res_model$aux.params) %in% names(included_items$aux.shapes)]
fc1 <- mx.symbol.FullyConnected(data = flatten0_output, num.hidden = 1, no.bias = TRUE, name = 'fc1')
final_pred <- mx.symbol.BatchNorm(data = fc1, eps = 1e-04, fix_gamma = TRUE,
momentum = 0.9, use_global_stats = FALSE, name = 'final_pred')
– 寫出loss:
COX_LOSS <- function (indata, inlabel, inmask, eps = 1e-8, make_loss = TRUE) {
exp_data <- mx.symbol.exp(data = indata, name = 'exp_data')
part_event_y <- mx.symbol.broadcast_mul(lhs = inlabel, rhs = inmask, name = 'part_event_y')
part_event <- mx.symbol.broadcast_mul(lhs = part_event_y, rhs = exp_data, name = 'part_event')
part_at_risk <- mx.symbol.broadcast_mul(lhs = inmask, rhs = exp_data, name = 'part_at_risk')
sum_part_event <- mx.symbol.sum(data = part_event, axis = 0, keepdims = TRUE, name = 'part_event')
sum_part_at_risk <- mx.symbol.sum(data = part_at_risk, axis = 0, keepdims = TRUE, name = 'sum_part_at_risk')
log_part_event <- mx.symbol.log(data = sum_part_event + eps, name = 'log_part_event')
log_part_at_risk <- mx.symbol.log(data = sum_part_at_risk + eps, name = 'log_part_at_risk')
diff_at_risk_event <- mx.symbol.broadcast_minus(lhs = log_part_at_risk, rhs = log_part_event, name = 'diff_at_risk_event')
important_time <- mx.symbol.sum(data = part_event_y, axis = 0, keepdims = TRUE, name = 'important_time')
number_time <- mx.symbol.sum(data = important_time, axis = 1, name = 'number_time')
event_loss <- mx.symbol.broadcast_mul(lhs = diff_at_risk_event, rhs = important_time, name = 'event_loss')
sum_event_loss <- mx.symbol.sum(data = event_loss, axis = 1, name = 'sum_event_loss')
final_loss <- mx.symbol.broadcast_div(lhs = sum_event_loss, rhs = number_time, name = 'final_loss')
if (make_loss) {final_loss <- mx.symbol.MakeLoss(data = final_loss, name = 'COX_loss')}
return(final_loss)
}
label_sym <- mx.symbol.Variable('label')
mask_sym <- mx.symbol.Variable('mask')
loss_symbol <- COX_LOSS(indata = final_pred, inlabel = label_sym, inmask = mask_sym)
– 指定優化器:
my_optimizer <- mx.opt.create(name = "adam", learning.rate = 1e-4, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-08, wd = 1e-3)
– 做存活分析網路「一定」要從底層寫,因為我們需要確保「final_pred」的截距為0,這需要手動消除:
my_executor <- mx.simple.bind(symbol = loss_symbol,
data = c(224, 224, 3, 32), label = c(32, 32), mask = c(32, 32),
ctx = mx.gpu(0), grad.req = "write")
mx.set.seed(0)
new_arg <- mxnet:::mx.model.init.params(symbol = loss_symbol,
input.shape = list(data = c(224, 224, 3, 32), label = c(32, 32), mask = c(32, 32)),
output.shape = NULL,
initializer = mxnet:::mx.init.uniform(0.01),
ctx = mx.cpu())
for (i in 1:length(new_arg$arg.params)) {
pos <- which(names(res_model$arg.params) == names(new_arg$arg.params)[i])
if (length(pos) == 1) {
if (all.equal(dim(res_model$arg.params[[pos]]), dim(new_arg$arg.params[[i]])) == TRUE) {
new_arg$arg.params[[i]] <- res_model$arg.params[[pos]]
}
}
}
for (i in 1:length(new_arg$aux.params)) {
pos <- which(names(res_model$aux.params) == names(new_arg$aux.params)[i])
if (length(pos) == 1) {
if (all.equal(dim(res_model$aux.params[[pos]]), dim(new_arg$aux.params[[i]])) == TRUE) {
new_arg$aux.params[[i]] <- res_model$aux.params[[pos]]
}
}
}
mx.exec.update.arg.arrays(my_executor, new_arg$arg.params, match.name = TRUE)
mx.exec.update.aux.arrays(my_executor, new_arg$aux.params, match.name = TRUE)
my_updater <- mx.opt.get.updater(optimizer = my_optimizer, weights = my_executor$ref.arg.arrays)
– 開始訓練,我們就訓練30代就好,並把模型存下來:
n_epoch <- 30
for (i in 1:n_epoch) {
if (i == 1) {t0 <- Sys.time()}
my_iter$reset()
batch_loss <- NULL
while (my_iter$iter.next()) {
my_values <- my_iter$value()
mx.exec.update.arg.arrays(my_executor, arg.arrays = my_values, match.name = TRUE)
mx.exec.forward(my_executor, is.train = TRUE)
mx.exec.backward(my_executor)
update_args <- my_updater(weight = my_executor$ref.arg.arrays, grad = my_executor$ref.grad.arrays)
### Important ###
beta_pos <- which(grepl('final_pred_beta', names(my_executor[['ref.arg.arrays']])))
update_args[[beta_pos]] <- mx.nd.array(array(0, dim = 1))
#################
mx.exec.update.arg.arrays(my_executor, update_args, skip.null = TRUE)
batch_loss <- c(batch_loss, as.array(my_executor$ref.outputs$COX_loss_output))
}
epoch_time <- as.double(difftime(Sys.time(), t0, units = "secs"))
epoch_speed <- epoch_time / i
estimated_mins <- (n_epoch - i) * epoch_speed / 60
message("Epoch [", i, "/", n_epoch, "] Speed: ", formatC(epoch_speed, 2, format = "f"),
" sec/epoch | Remaining time: ", formatC(estimated_mins, 1, format = "f"),
" mins loss = ", formatC(mean(batch_loss), format = "f", 4))
}
pred_model <- mxnet:::mx.model.extract.model(symbol = final_pred, train.execs = list(my_executor))
pred_model[['arg.params']] <- pred_model[['arg.params']][which(names(pred_model[['arg.params']]) != 'mask')]
mx.model.save(model = pred_model, prefix = 'CXRsurvnet', iteration = 0)
processed_dir <- 'processed/'
batch_size <- 32
test_feature_mat <- matrix(0, nrow = nrow(test_dat), ncol = 1)
for (i in 1:nrow(test_dat)) {
if (i == 1) {t0 <- Sys.time()}
if (i %% batch_size == 1) {
start_pos <- i
img_array <- array(0, dim = c(256, 256, 3, min(batch_size, nrow(test_dat) - i + 1)))
}
array_pos <- i %% batch_size
if (array_pos == 0) {array_pos <- batch_size}
current_prc_path <- paste0(processed_dir, test_dat[i, 'UID'], '.RData')
load(current_prc_path)
row_select <- floor((dim(resize_img)[1] - 256) / 2) + 1:256
col_select <- floor((dim(resize_img)[2] - 256) / 2) + 1:256
img_array[,,,array_pos] <- resize_img[row_select,col_select,]
if (i %% batch_size == 0 | i == nrow(test_dat)) {
img_feature <- predict(model = pred_model, X = img_array, ctx = mx.gpu(0))
test_feature_mat[start_pos:i,] <- img_feature %>% t()
batch_time <- as.double(difftime(Sys.time(), t0, units = "secs"))
batch_speed <- i / batch_time
estimated_mins <- (nrow(test_dat) - i) / batch_speed / 60
message(i, "/", nrow(test_dat), ": Speed: ", formatC(batch_speed, 2, format = "f"),
" samples/sec | Remaining time: ", formatC(estimated_mins, 1, format = "f"), " mins")
}
}
submit_dat[,'p_death'] <- as.numeric(test_feature_mat)
fwrite(submit_dat, file = 'my_submission.csv', na = '', row.names = FALSE, quote = FALSE)