深度學習理論與實務

林嶔 (Lin, Chin)

Lesson 9 變分自編碼器與異常檢測

第一節:生成模型的原理(1)

– 我們講述了他其中一個重要的功能,就是解壓縮的部分能夠拿來當作「圖像生成」用,但效果相當悲劇

F01

– 請在這裡下載MNIST的手寫數字資料

library(data.table)
library(OpenImageR)

DAT = fread("data/MNIST.csv", data.table = FALSE)
DAT = data.matrix(DAT)

imageShow(t(matrix(as.numeric(DAT[123,-1]), nrow = 28, byrow = TRUE)))

第一節:生成模型的原理(2)

library(mxnet)
library(magrittr)

my_iterator_func <- setRefClass("Custom_Iter1",
                                fields = c("iter", "data.csv", "data.shape", "batch.size"),
                                contains = "Rcpp_MXArrayDataIter",
                                methods = list(
                                  initialize = function(iter, data.csv, data.shape, batch.size){
                                    csv_iter <- mx.io.CSVIter(data.csv = data.csv, data.shape = data.shape, batch.size = batch.size)
                                    .self$iter <- csv_iter
                                    .self
                                  },
                                  value = function(){
                                    val <- as.array(.self$iter$value()$data)
                                    val.x <- val[-1,]
                                    dim(val.x) <- c(28, 28, 1, ncol(val.x))
                                    val.x <- val.x/255
                                    val.x <- mx.nd.array(val.x)
                                    val.y <- val.x
                                    list(data=val.x, label=val.y)
                                  },
                                  iter.next = function(){
                                    .self$iter$iter.next()
                                  },
                                  reset = function(){
                                    .self$iter$reset()
                                  },
                                  finalize=function(){
                                  }
                                )
)

my_iter = my_iterator_func(iter = NULL,  data.csv = 'data/MNIST.csv', data.shape = 785, batch.size = 50)

第一節:生成模型的原理(3)

# Encoder

data <- mx.symbol.Variable('data')

fc1 <- mx.symbol.FullyConnected(data = data, num.hidden = 64, name = 'fc1')
relu1 <- mx.symbol.Activation(data = fc1, act_type = "relu", name = 'relu1')

fc2 <- mx.symbol.FullyConnected(data = data, num.hidden = 8, name = 'fc2')
relu2 <- mx.symbol.Activation(data = fc2, act_type = "relu", name = 'relu2')

encoder <- mx.symbol.FullyConnected(data = relu2, num.hidden = 2, name = 'encoder')

# Decoder

fc3 <- mx.symbol.FullyConnected(data = encoder, num.hidden = 8, name = 'fc3')
relu3 <- mx.symbol.Activation(data = fc3, act_type = "relu", name = 'relu3')

fc4 <- mx.symbol.FullyConnected(data = relu3, num.hidden = 64, name = 'fc4')
relu4 <- mx.symbol.Activation(data = fc4, act_type = "relu", name = 'relu4')

fc5 <- mx.symbol.FullyConnected(data = relu4, num.hidden = 784, name = 'fc5')
decoder <- mx.symbol.reshape(data = fc5, shape = c(28, 28, 1, -1), name = 'decoder')

# MSE loss

label <- mx.symbol.Variable(name = 'label')

residual <- mx.symbol.broadcast_minus(lhs = label, rhs = decoder) 
square_residual <- mx.symbol.square(data = residual)
mean_square_residual <- mx.symbol.mean(data = square_residual, axis = 0:3, keepdims = FALSE)
mse_loss <- mx.symbol.MakeLoss(data = mean_square_residual, name = 'mse')

第一節:生成模型的原理(4)

my_optimizer <- mx.opt.create(name = "adam", learning.rate = 0.001, beta1 = 0.9, beta2 = 0.999, wd = 1e-4)
my.eval.metric.loss <- mx.metric.custom(
  name = "my-loss", 
  function(real, pred) {
    return(as.array(pred))
  }
)

mx.set.seed(0)

model <- mx.model.FeedForward.create(symbol = mse_loss, X = my_iter, optimizer = my_optimizer,
                                     eval.metric = my.eval.metric.loss,
                                     array.batch.size = 20, ctx = mx.gpu(), num.round = 20)
all_layers <- model$symbol$get.internals()
encoder_output <- which(all_layers$outputs == 'encoder_output') %>% all_layers$get.output()

encoder_model <- model
encoder_model$symbol <- encoder_output
encoder_model$arg.params <- encoder_model$arg.params[names(encoder_model$arg.params) %in% names(mx.symbol.infer.shape(encoder_output, data = c(28, 28, 1, 7))$arg.shapes)]
encoder_model$aux.params <- encoder_model$aux.params[names(encoder_model$aux.params) %in% names(mx.symbol.infer.shape(encoder_output, data = c(28, 28, 1, 7))$aux.shapes)]
data <- mx.symbol.Variable('data')

fc3 <- mx.symbol.FullyConnected(data = data, num.hidden = 8, name = 'fc3')
relu3 <- mx.symbol.Activation(data = fc3, act_type = "relu", name = 'relu3')

fc4 <- mx.symbol.FullyConnected(data = relu3, num.hidden = 64, name = 'fc4')
relu4 <- mx.symbol.Activation(data = fc4, act_type = "relu", name = 'relu4')

fc5 <- mx.symbol.FullyConnected(data = relu4, num.hidden = 784, name = 'fc5')
decoder_output <- mx.symbol.reshape(data = fc5, shape = c(28, 28, 1, -1), name = 'decoder')

decoder_model <- model
decoder_model$symbol <- decoder_output
decoder_model$arg.params <- decoder_model$arg.params[names(decoder_model$arg.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(2, 7))$arg.shapes)]
decoder_model$aux.params <- decoder_model$aux.params[names(decoder_model$aux.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(2, 7))$aux.shapes)]

第一節:生成模型的原理(5)

X <- t(DAT[,-1])
dim(X) <- c(28, 28, 1, ncol(X))
X <- X/255

Y <- DAT[,1]

zip_code <- predict(encoder_model, X)
plot(zip_code[1,], zip_code[2,], xlab = 'dim 1', ylab = 'dim 2',
     pch = 1, cex = 0.5, col = rainbow(10, alpha = 0.5)[Y + 1])
legend('bottomleft', legend = 0:9, pch = 1, col = rainbow(10))