下載資料

– 與之前所有任務相同,這個任務的資料下載是需要經過申請的,請你找助教申請帳號。

讀取資料

library(data.table)

train_dat <- fread('train.csv', data.table = FALSE)
test_dat <- fread('test.csv', data.table = FALSE)
submit_dat <- fread('sample_submission.csv', data.table = FALSE)

二元分類任務

– 「Additionally, convolutional networks were trained to reject images for insufficient image quality, as well as for being invalid input (i.e. not being a retinal image). For the latter model, a broad variety of natural images was used as the negative class, in training. Images rejected by either of these models are considered as being recommended for further referral, for the purposes of computing the experimental results. Once an image is analyzed, a report will be generated for the users.」-出自Supplementary page 7。

– 這個範例使用「resnet-50」進行,這是一個50層深的ResNet,非常標準的深度神經網路。我們要建構一個新的神經網路將最後一層的FC從分1000類轉變成分2類:

library(magrittr)
library(mxnet)

res_model <- mx.model.load(prefix = "resnet-50", iteration = 0)
all_layers <- res_model$symbol$get.internals()
flatten0_output <- which(all_layers$outputs == 'flatten0_output') %>% all_layers$get.output()

fc1 <- mx.symbol.FullyConnected(data = flatten0_output, num_hidden = 2, name = 'fc1')
softmax <- mx.symbol.SoftmaxOutput(data = fc1, name = 'softmax')
library(OpenImageR)

image_dir <- 'image/'

y_array <- model.matrix(~ -1 + factor(train_dat[,'anomaly'])) %>% t()
img_array <-  array(0, dim = c(256, 256, 3, nrow(train_dat)))

for (i in 1:nrow(train_dat)) {
  
  if (i == 1) {t0 <- Sys.time()}
  
  current_prc_path <- paste0(image_dir, train_dat[i, 'UID'], '.jpeg')
  img_array[,,,i] <- readImage(current_prc_path)
  
  if (i %% 100 == 0) {
    
    batch_time <- as.double(difftime(Sys.time(), t0, units = "secs"))
    batch_speed <- i / batch_time
    estimated_mins <- (nrow(train_dat) - i) / batch_speed / 60
    
    message(i, "/", nrow(train_dat), ": Speed: ", formatC(batch_speed, 2, format = "f"),
            " samples/sec | Remaining time: ", formatC(estimated_mins, 1, format = "f"), " mins")
    
  }
  
}
set.seed(0)

train_ids <- sample(1:nrow(train_dat), nrow(train_dat) * 0.8)

train_y_array <- y_array[,train_ids]
train_img_array <- img_array[,,,train_ids]

valid_y_array <- y_array[,-train_ids]
valid_img_array <- img_array[,,,-train_ids]
new_arg <- mxnet:::mx.model.init.params(symbol = softmax,
                                        input.shape = list(data = c(256, 256, 3, 32)),
                                        output.shape = NULL,
                                        initializer = mxnet:::mx.init.uniform(0.01),
                                        ctx = mx.cpu())

for (i in 1:length(new_arg$arg.params)) {
  pos <- which(names(res_model$arg.params) == names(new_arg$arg.params)[i])
  if (length(pos) == 1) {
    if (all.equal(dim(res_model$arg.params[[pos]]), dim(new_arg$arg.params[[i]])) == TRUE) {
      new_arg$arg.params[[i]] <- res_model$arg.params[[pos]]
    }
  }
}

for (i in 1:length(new_arg$aux.params)) {
  pos <- which(names(res_model$aux.params) == names(new_arg$aux.params)[i])
  if (length(pos) == 1) {
    if (all.equal(dim(res_model$aux.params[[pos]]), dim(new_arg$aux.params[[i]])) == TRUE) {
      new_arg$aux.params[[i]] <- res_model$aux.params[[pos]]
    }
  }
}
my.eval.metric.mlogloss <- mx.metric.custom(
  name = "m-logloss", 
  function(real, pred) {
    real1 = as.numeric(as.array(real))
    pred1 = as.numeric(as.array(pred))
    pred1[pred1 <= 1e-6] = 1e-6
    pred1[pred1 >= 1 - 1e-6] = 1 - 1e-6
    return(-mean(real1 * log(pred1), na.rm = TRUE))
  }
)

mx.set.seed(0)

my_model <- mx.model.FeedForward.create(symbol = softmax,
                                       X = train_img_array, y = train_y_array,
                                       optimizer = "sgd", learning.rate = 0.005, momentum = 0.9,
                                       array.batch.size = 16, num.round = 10,
                                       arg.params = new_arg$arg.params, aux.params = new_arg$aux.params,
                                       ctx = mx.gpu(0),
                                       eval.metric = my.eval.metric.mlogloss)
library(pROC)

pred_valid <-  predict(model = my_model, X = valid_img_array, ctx = mx.gpu(0))
roc_valid <- roc(response = valid_y_array[2,], predictor = pred_valid[2,])

best_pos <- which.max(roc_valid$sensitivities + roc_valid$specificities)
best_cut <- roc_valid$thresholds[best_pos]
image_dir <- 'image/'
test_img_array <-  array(0, dim = c(256, 256, 3, nrow(test_dat)))

for (i in 1:nrow(test_dat)) {
  
  if (i == 1) {t0 <- Sys.time()}
  
  current_prc_path <- paste0(image_dir, test_dat[i, 'UID'], '.jpeg')
  test_img_array[,,,i] <- readImage(current_prc_path)
  
  if (i %% 100 == 0) {
    
    batch_time <- as.double(difftime(Sys.time(), t0, units = "secs"))
    batch_speed <- i / batch_time
    estimated_mins <- (nrow(test_dat) - i) / batch_speed / 60
    
    message(i, "/", nrow(test_dat), ": Speed: ", formatC(batch_speed, 2, format = "f"),
            " samples/sec | Remaining time: ", formatC(estimated_mins, 1, format = "f"), " mins")
    
  }
  
}
pred_test <-  predict(model = my_model, X = test_img_array, ctx = mx.gpu(0))
submit_dat[,'anomaly'] <- (pred_test[2,] > best_cut)
fwrite(submit_dat, file = 'my_submission.csv', na = '', row.names = FALSE, quote = FALSE)

無監督學習

– 其實用自編碼器也可以,重點是中間的隱編碼的數目要小。

– 先讓我們建構一個Model architecture:

# Encoder

data <- mx.symbol.Variable('data')

conv1 <- mx.symbol.Convolution(data = data, kernel = c(8, 8), stride = c(8, 8), num_filter = 8, name = 'conv1')
relu1 <- mx.symbol.Activation(data = conv1, act_type = "relu", name = 'relu1')

conv2 <- mx.symbol.Convolution(data = relu1, kernel = c(4, 4), stride = c(4, 4), num_filter = 32, name = 'conv2')
relu2 <- mx.symbol.Activation(data = conv2, act_type = "relu", name = 'relu2')

conv3 <- mx.symbol.Convolution(data = relu2, kernel = c(4, 4), stride = c(4, 4), num_filter = 128, name = 'conv3')
relu3 <- mx.symbol.Activation(data = conv3, act_type = "relu", name = 'relu3')

encoder <- mx.symbol.Convolution(data = relu3, kernel = c(2, 2), stride = c(2, 2), num_filter = 256, name = 'encoder')

# Decoder

deconv4 <- mx.symbol.Deconvolution(data = encoder, kernel = c(2, 2), stride = c(2, 2), num_filter = 128, name = 'deconv4')
relu4 <- mx.symbol.Activation(data = deconv4, act_type = "relu", name = 'relu4')

deconv5 <- mx.symbol.Deconvolution(data = relu4, kernel = c(4, 4), stride = c(4, 4), num_filter = 32, name = 'deconv5')
relu5 <- mx.symbol.Activation(data = deconv5, act_type = "relu", name = 'relu5')

deconv6 <- mx.symbol.Deconvolution(data = relu5, kernel = c(4, 4), stride = c(4, 4), num_filter = 8, name = 'deconv6')
relu6 <- mx.symbol.Activation(data = deconv6, act_type = "relu", name = 'relu6')

decoder <- mx.symbol.Deconvolution(data = relu6, kernel = c(8, 8), stride = c(8, 8), num_filter = 3, name = 'decoder')

# MSE loss

label <- mx.symbol.Variable(name = 'label')

residual <- mx.symbol.broadcast_minus(lhs = label, rhs = decoder) 
square_residual <- mx.symbol.square(data = residual)
mean_square_residual <- mx.symbol.mean(data = square_residual, axis = 0:3, keepdims = FALSE)
mse_loss <- mx.symbol.MakeLoss(data = mean_square_residual, name = 'mse')
nomaly_array <- train_img_array[,,,which(train_y_array[2,] == 0)]

my_optimizer <- mx.opt.create(name = "adam", learning.rate = 1e-3, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-08, wd = 0)

my.eval.metric.loss <- mx.metric.custom(
  name = "my-loss", 
  function(real, pred) {
    return(as.array(pred))
  }
)

mx.set.seed(0)

my_model <- mx.model.FeedForward.create(symbol = mse_loss, optimizer = my_optimizer,
                                        X = nomaly_array, y = nomaly_array,
                                        array.batch.size = 16, num.round = 50,
                                        ctx = mx.gpu(0),
                                        eval.metric = my.eval.metric.loss)

my_model$symbol <- decoder
library(pROC)

pred_img_array <-  predict(model = my_model, X = valid_img_array, ctx = mx.gpu(0))
diff_img_array <- (pred_img_array - valid_img_array)^2
pred_valid <- apply(diff_img_array, 4, mean)
  
roc_valid <- roc(response = valid_y_array[2,], predictor = pred_valid)

best_pos <- which.max(roc_valid$sensitivities + roc_valid$specificities)
best_cut <- roc_valid$thresholds[best_pos]
image_dir <- 'image/'
test_img_array <-  array(0, dim = c(256, 256, 3, nrow(test_dat)))

for (i in 1:nrow(test_dat)) {
  
  if (i == 1) {t0 <- Sys.time()}
  
  current_prc_path <- paste0(image_dir, test_dat[i, 'UID'], '.jpeg')
  test_img_array[,,,i] <- readImage(current_prc_path)
  
  if (i %% 100 == 0) {
    
    batch_time <- as.double(difftime(Sys.time(), t0, units = "secs"))
    batch_speed <- i / batch_time
    estimated_mins <- (nrow(test_dat) - i) / batch_speed / 60
    
    message(i, "/", nrow(test_dat), ": Speed: ", formatC(batch_speed, 2, format = "f"),
            " samples/sec | Remaining time: ", formatC(estimated_mins, 1, format = "f"), " mins")
    
  }
  
}
pred_img_array <-  predict(model = my_model, X = test_img_array, ctx = mx.gpu(0))
diff_img_array <- (pred_img_array - test_img_array)^2
pred_test <- apply(diff_img_array, 4, mean)

submit_dat[,'anomaly'] <- (pred_test > best_cut)
fwrite(submit_dat, file = 'my_submission.csv', na = '', row.names = FALSE, quote = FALSE)
best_cut <- quantile(pred_valid[valid_y_array[2,] == 0], 0.9)

submit_dat[,'anomaly'] <- (pred_test > best_cut)
fwrite(submit_dat, file = 'my_submission.csv', na = '', row.names = FALSE, quote = FALSE)