第一節：生成模型的原理(1)

上週的課程我們講解了一個有趣的模型-自編碼器。

– 我們講述了他其中一個重要的功能，就是解壓縮的部分能夠拿來當作「圖像生成」用，但效果相當悲劇

F01

悲劇的原因在於在壓縮的過程中，他的樣本空間很有可能只有部分是可用的，而當你的「隨機數」指定到剩下的部分當然沒有辦法很好的做出圖像「還原」。
讓我們利用MNIST的再做一個有趣的範例！

– 請在這裡下載MNIST的手寫數字資料

library(data.table)
library(OpenImageR)

DAT = fread("data/MNIST.csv", data.table = FALSE)
DAT = data.matrix(DAT)

imageShow(t(matrix(as.numeric(DAT[123,-1]), nrow = 28, byrow = TRUE)))

第一節：生成模型的原理(2)

我們再次複習一下要如何利用MxNet建構神經網路，首先要先編寫Iterator，我們直接從第6課的地方把當初的Iterator抄過來，需要注意的是，我們把val.y改成與val.x完全一樣：

library(mxnet)
library(magrittr)

my_iterator_func <- setRefClass("Custom_Iter1",
                                fields = c("iter", "data.csv", "data.shape", "batch.size"),
                                contains = "Rcpp_MXArrayDataIter",
                                methods = list(
                                  initialize = function(iter, data.csv, data.shape, batch.size){
                                    csv_iter <- mx.io.CSVIter(data.csv = data.csv, data.shape = data.shape, batch.size = batch.size)
                                    .self$iter <- csv_iter
                                    .self
                                  },
                                  value = function(){
                                    val <- as.array(.self$iter$value()$data)
                                    val.x <- val[-1,]
                                    dim(val.x) <- c(28, 28, 1, ncol(val.x))
                                    val.x <- val.x/255
                                    val.x <- mx.nd.array(val.x)
                                    val.y <- val.x
                                    list(data=val.x, label=val.y)
                                  },
                                  iter.next = function(){
                                    .self$iter$iter.next()
                                  },
                                  reset = function(){
                                    .self$iter$reset()
                                  },
                                  finalize=function(){
                                  }
                                )
)

my_iter = my_iterator_func(iter = NULL,  data.csv = 'data/MNIST.csv', data.shape = 785, batch.size = 50)

第一節：生成模型的原理(3)

讓我們做一次上次的實驗，差別在這次我們將維度壓縮到2維，方便我們觀察：

# Encoder

data <- mx.symbol.Variable('data')

fc1 <- mx.symbol.FullyConnected(data = data, num.hidden = 64, name = 'fc1')
relu1 <- mx.symbol.Activation(data = fc1, act_type = "relu", name = 'relu1')

fc2 <- mx.symbol.FullyConnected(data = data, num.hidden = 8, name = 'fc2')
relu2 <- mx.symbol.Activation(data = fc2, act_type = "relu", name = 'relu2')

encoder <- mx.symbol.FullyConnected(data = relu2, num.hidden = 2, name = 'encoder')

# Decoder

fc3 <- mx.symbol.FullyConnected(data = encoder, num.hidden = 8, name = 'fc3')
relu3 <- mx.symbol.Activation(data = fc3, act_type = "relu", name = 'relu3')

fc4 <- mx.symbol.FullyConnected(data = relu3, num.hidden = 64, name = 'fc4')
relu4 <- mx.symbol.Activation(data = fc4, act_type = "relu", name = 'relu4')

fc5 <- mx.symbol.FullyConnected(data = relu4, num.hidden = 784, name = 'fc5')
decoder <- mx.symbol.reshape(data = fc5, shape = c(28, 28, 1, -1), name = 'decoder')

# MSE loss

label <- mx.symbol.Variable(name = 'label')

residual <- mx.symbol.broadcast_minus(lhs = label, rhs = decoder) 
square_residual <- mx.symbol.square(data = residual)
mean_square_residual <- mx.symbol.mean(data = square_residual, axis = 0:3, keepdims = FALSE)
mse_loss <- mx.symbol.MakeLoss(data = mean_square_residual, name = 'mse')

第一節：生成模型的原理(4)

先定義Optimizer：

my_optimizer <- mx.opt.create(name = "adam", learning.rate = 0.001, beta1 = 0.9, beta2 = 0.999, wd = 1e-4)

我們這裡使用內建的函數「mx.model.FeedForward.create」進行運算：

my.eval.metric.loss <- mx.metric.custom(
  name = "my-loss", 
  function(real, pred) {
    return(as.array(pred))
  }
)

mx.set.seed(0)

model <- mx.model.FeedForward.create(symbol = mse_loss, X = my_iter, optimizer = my_optimizer,
                                     eval.metric = my.eval.metric.loss,
                                     array.batch.size = 20, ctx = mx.gpu(), num.round = 20)

想要分離壓縮模型並不困難，上節課我們教過了：

all_layers <- model$symbol$get.internals()
encoder_output <- which(all_layers$outputs == 'encoder_output') %>% all_layers$get.output()

encoder_model <- model
encoder_model$symbol <- encoder_output
encoder_model$arg.params <- encoder_model$arg.params[names(encoder_model$arg.params) %in% names(mx.symbol.infer.shape(encoder_output, data = c(28, 28, 1, 7))$arg.shapes)]
encoder_model$aux.params <- encoder_model$aux.params[names(encoder_model$aux.params) %in% names(mx.symbol.infer.shape(encoder_output, data = c(28, 28, 1, 7))$aux.shapes)]

這是解壓縮模型，需要重新寫架構一次：

data <- mx.symbol.Variable('data')

fc3 <- mx.symbol.FullyConnected(data = data, num.hidden = 8, name = 'fc3')
relu3 <- mx.symbol.Activation(data = fc3, act_type = "relu", name = 'relu3')

fc4 <- mx.symbol.FullyConnected(data = relu3, num.hidden = 64, name = 'fc4')
relu4 <- mx.symbol.Activation(data = fc4, act_type = "relu", name = 'relu4')

fc5 <- mx.symbol.FullyConnected(data = relu4, num.hidden = 784, name = 'fc5')
decoder_output <- mx.symbol.reshape(data = fc5, shape = c(28, 28, 1, -1), name = 'decoder')

decoder_model <- model
decoder_model$symbol <- decoder_output
decoder_model$arg.params <- decoder_model$arg.params[names(decoder_model$arg.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(2, 7))$arg.shapes)]
decoder_model$aux.params <- decoder_model$aux.params[names(decoder_model$aux.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(2, 7))$aux.shapes)]

第一節：生成模型的原理(5)

讓我們將所有樣本都通過這個壓縮模型，並將數值放到2維圖上：

X <- t(DAT[,-1])
dim(X) <- c(28, 28, 1, ncol(X))
X <- X/255

Y <- DAT[,1]

zip_code <- predict(encoder_model, X)
plot(zip_code[1,], zip_code[2,], xlab = 'dim 1', ylab = 'dim 2',
     pch = 1, cex = 0.5, col = rainbow(10, alpha = 0.5)[Y + 1])
legend('bottomleft', legend = 0:9, pch = 1, col = rainbow(10))

你發現了什麼?是不是在空間中並非相當均勻?

第一節：生成模型的原理(6)

這樣會造成什麼問題呢?我們現在挑選一個在0附近的點，讓模型進行生成看看：

my_zip_code <- apply(zip_code[,which(Y == 0)], 1, mean)
dim(my_zip_code) <- c(2, 1)

unzip_pred <- predict(decoder_model, my_zip_code, array.layout = 'colmajor')
unzip_pred[unzip_pred > 1] <- 1
unzip_pred[unzip_pred < 0] <- 0

par(mar = rep(0,4))
plot(NA, xlim = c(0.04, 0.96), ylim = c(0.04, 0.96), xaxt = "n", yaxt = "n", bty = "n")
rasterImage(unzip_pred[,,,1], 0, 0, 1, 1, interpolate=FALSE)

看起來沒甚麼問題，但假設你選了一個不在剛剛分布範圍內的點，那就會看不出這是什麼了：

my_zip_code <- c(-25, -15)
dim(my_zip_code) <- c(2, 1)

unzip_pred <- predict(decoder_model, my_zip_code, array.layout = 'colmajor')
unzip_pred[unzip_pred > 1] <- 1
unzip_pred[unzip_pred < 0] <- 0

par(mar = rep(0,4))
plot(NA, xlim = c(0.04, 0.96), ylim = c(0.04, 0.96), xaxt = "n", yaxt = "n", bty = "n")
rasterImage(unzip_pred[,,,1], 0, 0, 1, 1, interpolate=FALSE)

第一節：生成模型的原理(7)

這還不是最悲劇的一點，讓我們從6的中心點讓他跑到0的中心點看看：

zip_code.6 <- apply(zip_code[,which(Y == 6)], 1, mean)
zip_code.0 <- apply(zip_code[,which(Y == 0)], 1, mean)

my_zip_code <- rbind(seq(zip_code.6[1], zip_code.0[1], length.out = 10), seq(zip_code.6[2], zip_code.0[2], length.out = 10))

unzip_pred <- predict(decoder_model, my_zip_code, array.layout = 'colmajor')
unzip_pred[unzip_pred > 1] <- 1
unzip_pred[unzip_pred < 0] <- 0

par(mar = rep(0,4),mfrow = c(2, 5))

for (i in 1:10) {
  plot(NA, xlim = c(0.04, 0.96), ylim = c(0.04, 0.96), xaxt = "n", yaxt = "n", bty = "n")
  rasterImage(unzip_pred[,,,i], 0, 0, 1, 1, interpolate=FALSE)
}

你是否注意到了在轉換過程中出現莫名其妙的數字？讓我們在壓縮編碼圖上看看他的軌跡：

par(mar = rep(0,4))
zip_code <- predict(encoder_model, X)
plot(zip_code[1,], zip_code[2,],
     pch = 1, cex = 0.5, col = rainbow(10, alpha = 0.5)[Y + 1])
legend('bottomleft', legend = 0:9, pch = 1, col = rainbow(10))
points(my_zip_code[1,], my_zip_code[2,], pch = 19, cex = 1.5)

只要在轉換過程中遇到不在原分布空間內的軌跡，就會造成這個問題。

第一節：生成模型的原理(8)

生成模型說穿了，就是一個「分布」映射器，讓我們可以從「隱碼」變成「圖像」。

– 我們對「圖像」該長成什麼樣子應該已經有個既定的印象了，這個印象從我們的類似經驗中獲取，所以出現了新的樣本我們就會覺得「不像」。

F02

因此，「生成模型沒有所謂的好與壞」，只是我們在使用的時候要非常清楚原始的「隱碼分布」，否則如果我們給一個不在「隱碼分布」內的資料，生成模型必然會產生一個「不像」的圖像。

– 那現在的關鍵是，自編碼器的訓練中我們沒辦法保證我們知道這個「隱碼分布」長成什麼樣子，那我們該怎樣確保呢?

第二節：分布限制自編碼器(1)

我們必須加上一個限制，保證模型在訓練時的「隱碼分布」在某個範圍之內。

– 舉例來說，在剛剛的範例中以類似正則化的方式限制隱碼要盡可能的接近原點$(0, 0)$，那這樣子問題自然就有可能好轉。

– 實現過程不會很難，讓我們加上一個損失就可以了，這邊要注意權重：

# Encoder

data <- mx.symbol.Variable('data')

fc1 <- mx.symbol.FullyConnected(data = data, num.hidden = 64, name = 'fc1')
relu1 <- mx.symbol.Activation(data = fc1, act_type = "relu", name = 'relu1')

fc2 <- mx.symbol.FullyConnected(data = data, num.hidden = 8, name = 'fc2')
relu2 <- mx.symbol.Activation(data = fc2, act_type = "relu", name = 'relu2')

encoder <- mx.symbol.FullyConnected(data = relu2, num.hidden = 2, name = 'encoder')

# Decoder

fc3 <- mx.symbol.FullyConnected(data = encoder, num.hidden = 8, name = 'fc3')
relu3 <- mx.symbol.Activation(data = fc3, act_type = "relu", name = 'relu3')

fc4 <- mx.symbol.FullyConnected(data = relu3, num.hidden = 64, name = 'fc4')
relu4 <- mx.symbol.Activation(data = fc4, act_type = "relu", name = 'relu4')

fc5 <- mx.symbol.FullyConnected(data = relu4, num.hidden = 784, name = 'fc5')
decoder <- mx.symbol.reshape(data = fc5, shape = c(28, 28, 1, -1), name = 'decoder')

# MSE loss

label <- mx.symbol.Variable(name = 'label')

residual <- mx.symbol.broadcast_minus(lhs = label, rhs = decoder) 
square_residual <- mx.symbol.square(data = residual)
mean_square_residual <- mx.symbol.mean(data = square_residual, axis = 0:3, keepdims = FALSE)

# Encoder loss

square_encoder <- mx.symbol.square(data = encoder)
mean_square_encoder <- mx.symbol.mean(data = square_encoder, axis = 0:1, keepdims = FALSE)

my_loss <- mx.symbol.MakeLoss(data = mean_square_residual + 1e-4 * mean_square_encoder, name = 'loss')

第二節：分布限制自編碼器(2)

再讓我們訓練一次：

my.eval.metric.loss <- mx.metric.custom(
  name = "my-loss", 
  function(real, pred) {
    return(as.array(pred))
  }
)

mx.set.seed(0)

model <- mx.model.FeedForward.create(symbol = my_loss, X = my_iter, optimizer = my_optimizer,
                                     eval.metric = my.eval.metric.loss,
                                     array.batch.size = 20, ctx = mx.gpu(), num.round = 20)

再讓我們分離一次壓縮模型及解壓縮模型：

# Encoder

all_layers <- model$symbol$get.internals()
encoder_output <- which(all_layers$outputs == 'encoder_output') %>% all_layers$get.output()

encoder_model <- model
encoder_model$symbol <- encoder_output
encoder_model$arg.params <- encoder_model$arg.params[names(encoder_model$arg.params) %in% names(mx.symbol.infer.shape(encoder_output, data = c(28, 28, 1, 7))$arg.shapes)]
encoder_model$aux.params <- encoder_model$aux.params[names(encoder_model$aux.params) %in% names(mx.symbol.infer.shape(encoder_output, data = c(28, 28, 1, 7))$aux.shapes)]

# Decoder

data <- mx.symbol.Variable('data')

fc3 <- mx.symbol.FullyConnected(data = data, num.hidden = 8, name = 'fc3')
relu3 <- mx.symbol.Activation(data = fc3, act_type = "relu", name = 'relu3')

fc4 <- mx.symbol.FullyConnected(data = relu3, num.hidden = 64, name = 'fc4')
relu4 <- mx.symbol.Activation(data = fc4, act_type = "relu", name = 'relu4')

fc5 <- mx.symbol.FullyConnected(data = relu4, num.hidden = 784, name = 'fc5')
decoder_output <- mx.symbol.reshape(data = fc5, shape = c(28, 28, 1, -1), name = 'decoder')

decoder_model <- model
decoder_model$symbol <- decoder_output
decoder_model$arg.params <- decoder_model$arg.params[names(decoder_model$arg.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(2, 7))$arg.shapes)]
decoder_model$aux.params <- decoder_model$aux.params[names(decoder_model$aux.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(2, 7))$aux.shapes)]

第二節：分布限制自編碼器(3)

新版的隱碼分布是不是被限制了？雖然還不是非常完美

X <- t(DAT[,-1])
dim(X) <- c(28, 28, 1, ncol(X))
X <- X/255

Y <- DAT[,1]

zip_code <- predict(encoder_model, X)
plot(zip_code[1,], zip_code[2,], xlab = 'dim 1', ylab = 'dim 2',
     pch = 1, cex = 0.5, col = rainbow(10, alpha = 0.5)[Y + 1])
legend('bottomleft', legend = 0:9, pch = 1, col = rainbow(10))

不但限制的並不完美，並且還會對生成模型造成解析度的損失：

zip_code.6 <- apply(zip_code[,which(Y == 6)], 1, mean)
zip_code.0 <- apply(zip_code[,which(Y == 0)], 1, mean)

my_zip_code <- rbind(seq(zip_code.6[1], zip_code.0[1], length.out = 10), seq(zip_code.6[2], zip_code.0[2], length.out = 10))

unzip_pred <- predict(decoder_model, my_zip_code, array.layout = 'colmajor')
unzip_pred[unzip_pred > 1] <- 1
unzip_pred[unzip_pred < 0] <- 0

par(mar = rep(0,4),mfrow = c(2, 5))

for (i in 1:10) {
  plot(NA, xlim = c(0.04, 0.96), ylim = c(0.04, 0.96), xaxt = "n", yaxt = "n", bty = "n")
  rasterImage(unzip_pred[,,,i], 0, 0, 1, 1, interpolate=FALSE)
}

其實他還有一個大缺陷，那就是Encoder loss的權重很難調整﹐調的太高會無法訓練，調得太低又沒有效果。

第三節：變分自編碼器(1)

變分自編碼器(Variational AutoEncoder，簡稱VAE)，是由Diederik P Kingma與Max Welling在2013年所發表的Auto-Encoding Variational Bayes第一次描述

– 簡單來說VAE加入了一些noise進去AutoEncoder內，透過Normal distribution的抽樣讓結果更好。

F03

透過隨機抽樣的部分，它可以讓原始的隱碼分布更加的平滑(因為這個部分是全隨機的)，並且由於這個隨機抽樣分布是我們所指定的，因此我們會非常清楚這個分布的性質。

第三節：變分自編碼器(2)

注意VAE在Encoder的結構，以及Loss的變化。

– 在標準差的部分，我們輸出的是$ln(sd)$，這是為了避免出現負數。

# Encoder

data <- mx.symbol.Variable('data')

fc1 <- mx.symbol.FullyConnected(data = data, num.hidden = 64, name = 'fc1')
relu1 <- mx.symbol.Activation(data = fc1, act_type = "relu", name = 'relu1')

fc2 <- mx.symbol.FullyConnected(data = data, num.hidden = 8, name = 'fc2')
relu2 <- mx.symbol.Activation(data = fc2, act_type = "relu", name = 'relu2')

encoder_mean <- mx.symbol.FullyConnected(data = relu2, num.hidden = 2, name = 'encoder_mean')
encoder_lnsd <- mx.symbol.FullyConnected(data = relu2, num.hidden = 2, name = 'encoder_lnsd')
encoder_sd <- mx.symbol.exp(encoder_lnsd, name = 'encoder_sd')
encoder_noise <- mx.symbol.random_normal(loc = 0, scale = 1, shape = c(2, 50))
encoder <- encoder_noise * encoder_sd + encoder_mean

# Decoder

fc3 <- mx.symbol.FullyConnected(data = encoder, num.hidden = 8, name = 'fc3')
relu3 <- mx.symbol.Activation(data = fc3, act_type = "relu", name = 'relu3')

fc4 <- mx.symbol.FullyConnected(data = relu3, num.hidden = 64, name = 'fc4')
relu4 <- mx.symbol.Activation(data = fc4, act_type = "relu", name = 'relu4')

fc5 <- mx.symbol.FullyConnected(data = relu4, num.hidden = 784, name = 'fc5')
decoder <- mx.symbol.reshape(data = fc5, shape = c(28, 28, 1, -1), name = 'decoder')

# MSE loss

label <- mx.symbol.Variable(name = 'label')

residual <- mx.symbol.broadcast_minus(lhs = label, rhs = decoder) 
square_residual <- mx.symbol.square(data = residual)
mean_square_residual <- mx.symbol.mean(data = square_residual, axis = 0:3, keepdims = FALSE)

# Encoder loss

square_encoder_mean <- mx.symbol.square(data = encoder_mean)
loss_encoder <- mx.symbol.mean(1 + encoder_lnsd - square_encoder_mean - encoder_sd, axis = 0:1, keepdims = FALSE)

my_loss <- mx.symbol.MakeLoss(data = mean_square_residual - 5e-4 * loss_encoder, name = 'loss')

第三節：變分自編碼器(3)

再用同樣的資料訓練一次：

my.eval.metric.loss <- mx.metric.custom(
  name = "my-loss", 
  function(real, pred) {
    return(as.array(pred))
  }
)

mx.set.seed(0)

model <- mx.model.FeedForward.create(symbol = my_loss, X = my_iter, optimizer = my_optimizer,
                                     eval.metric = my.eval.metric.loss,
                                     array.batch.size = 20, ctx = mx.gpu(), num.round = 20)

再讓我們分離一次壓縮模型，注意這次我們不要隨機的部分：

# Encoder

all_layers <- model$symbol$get.internals()
encoder_output <- which(all_layers$outputs == 'encoder_mean_output') %>% all_layers$get.output()

encoder_model <- model
encoder_model$symbol <- encoder_output
encoder_model$arg.params <- encoder_model$arg.params[names(encoder_model$arg.params) %in% names(mx.symbol.infer.shape(encoder_output, data = c(28, 28, 1, 7))$arg.shapes)]
encoder_model$aux.params <- encoder_model$aux.params[names(encoder_model$aux.params) %in% names(mx.symbol.infer.shape(encoder_output, data = c(28, 28, 1, 7))$aux.shapes)]

# Decoder

data <- mx.symbol.Variable('data')

fc3 <- mx.symbol.FullyConnected(data = data, num.hidden = 8, name = 'fc3')
relu3 <- mx.symbol.Activation(data = fc3, act_type = "relu", name = 'relu3')

fc4 <- mx.symbol.FullyConnected(data = relu3, num.hidden = 64, name = 'fc4')
relu4 <- mx.symbol.Activation(data = fc4, act_type = "relu", name = 'relu4')

fc5 <- mx.symbol.FullyConnected(data = relu4, num.hidden = 784, name = 'fc5')
decoder_output <- mx.symbol.reshape(data = fc5, shape = c(28, 28, 1, -1), name = 'decoder')

decoder_model <- model
decoder_model$symbol <- decoder_output
decoder_model$arg.params <- decoder_model$arg.params[names(decoder_model$arg.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(2, 7))$arg.shapes)]
decoder_model$aux.params <- decoder_model$aux.params[names(decoder_model$aux.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(2, 7))$aux.shapes)]

第三節：變分自編碼器(4)

新版的隱碼分布是不是被限制的更好了?

X <- t(DAT[,-1])
dim(X) <- c(28, 28, 1, ncol(X))
X <- X/255

Y <- DAT[,1]

zip_code <- predict(encoder_model, X)
plot(zip_code[1,], zip_code[2,], xlab = 'dim 1', ylab = 'dim 2',
     pch = 1, cex = 0.5, col = rainbow(10, alpha = 0.5)[Y + 1])
legend('bottomleft', legend = 0:9, pch = 1, col = rainbow(10))

再讓我們看一次從6到0的過程，是不是好多了：

zip_code.6 <- apply(zip_code[,which(Y == 6)], 1, mean)
zip_code.0 <- apply(zip_code[,which(Y == 0)], 1, mean)

my_zip_code <- rbind(seq(zip_code.6[1], zip_code.0[1], length.out = 10), seq(zip_code.6[2], zip_code.0[2], length.out = 10))

unzip_pred <- predict(decoder_model, my_zip_code, array.layout = 'colmajor')
unzip_pred[unzip_pred > 1] <- 1
unzip_pred[unzip_pred < 0] <- 0

par(mar = rep(0,4),mfrow = c(2, 5))

for (i in 1:10) {
  plot(NA, xlim = c(0.04, 0.96), ylim = c(0.04, 0.96), xaxt = "n", yaxt = "n", bty = "n")
  rasterImage(unzip_pred[,,,i], 0, 0, 1, 1, interpolate=FALSE)
}

第三節：變分自編碼器(5)

好多了的關鍵在於我們在中間過程中使用了抽樣，這會讓我們每個點周圍都會增加一個範圍。

– 我們這次把隨機的過程也加進來，並且再看看隨機隱碼分布：

all_layers <- model$symbol$get.internals()

encoder_mean_output <- which(all_layers$outputs == 'encoder_mean_output') %>% all_layers$get.output()
encoder_sd_output <- which(all_layers$outputs == 'encoder_sd_output') %>% all_layers$get.output()

encoder_mean_model <- model
encoder_mean_model$symbol <- encoder_mean_output
encoder_mean_model$arg.params <- encoder_mean_model$arg.params[names(encoder_mean_model$arg.params) %in% names(mx.symbol.infer.shape(encoder_mean_output, data = c(28, 28, 1, 7))$arg.shapes)]
encoder_mean_model$aux.params <- encoder_mean_model$aux.params[names(encoder_mean_model$aux.params) %in% names(mx.symbol.infer.shape(encoder_mean_output, data = c(28, 28, 1, 7))$aux.shapes)]

encoder_sd_model <- model
encoder_sd_model$symbol <- encoder_sd_output
encoder_sd_model$arg.params <- encoder_sd_model$arg.params[names(encoder_sd_model$arg.params) %in% names(mx.symbol.infer.shape(encoder_sd_output, data = c(28, 28, 1, 7))$arg.shapes)]
encoder_sd_model$aux.params <- encoder_sd_model$aux.params[names(encoder_sd_model$aux.params) %in% names(mx.symbol.infer.shape(encoder_sd_output, data = c(28, 28, 1, 7))$aux.shapes)]

再看一下含隨機的隱碼分布，你是不是理解了為什麼VAE會有較好的生成效果？

– 這個過程具有隨機性，你可以多跑幾次看看。

zip_mean <- predict(encoder_mean_model, X)
zip_sd <- predict(encoder_sd_model, X)
zip_code <- zip_sd * rnorm(n = length(zip_sd), mean = 0, sd = 1) + zip_mean

plot(zip_code[1,], zip_code[2,], xlab = 'dim 1', ylab = 'dim 2',
     pch = 1, cex = 0.5, col = rainbow(10, alpha = 0.5)[Y + 1])
legend('bottomleft', legend = 0:9, pch = 1, col = rainbow(10))

練習1：在VAE中增加隱碼數目

還記得上節課悲劇的生成模型嗎？只不過將隱碼數目提升到32維，我們的模型產生出來的資料就非常的糟糕：

F01

現在我們已經學會了VAE，請你試著增加隱碼數目到32維，再看看它的效果是不是還這麼糟糕？

練習1答案(1)

只要你修正模型架構即可，請你增加隱碼數目。

– 但要注意一點，如果你想讓生成模型更不容易出現亂碼，請你增加Encoder loss的權重。

– 但如果你希望圖像清楚一點，請你降低Encoder loss的權重，但這樣隨機生成時更容易出現亂碼。

# Encoder

data <- mx.symbol.Variable('data')

fc1 <- mx.symbol.FullyConnected(data = data, num.hidden = 128, name = 'fc1')
relu1 <- mx.symbol.Activation(data = fc1, act_type = "relu", name = 'relu1')

encoder_mean <- mx.symbol.FullyConnected(data = relu1, num.hidden = 32, name = 'encoder_mean')
encoder_lnsd <- mx.symbol.FullyConnected(data = relu1, num.hidden = 32, name = 'encoder_lnsd')
encoder_sd <- mx.symbol.exp(encoder_lnsd, name = 'encoder_sd')
encoder_noise <- mx.symbol.random_normal(loc = 0, scale = 1, shape = c(32, 50))
encoder <- encoder_noise * encoder_sd + encoder_mean

# Decoder

fc2 <- mx.symbol.FullyConnected(data = encoder, num.hidden = 128, name = 'fc2')
relu2 <- mx.symbol.Activation(data = fc2, act_type = "relu", name = 'relu2')

fc3 <- mx.symbol.FullyConnected(data = relu2, num.hidden = 784, name = 'fc3')
sigmoid_out <- mx.symbol.Activation(data = fc3, act_type = "sigmoid", name = 'sigmoid_out')

decoder <- mx.symbol.reshape(data = sigmoid_out, shape = c(28, 28, 1, -1), name = 'decoder')

# MSE loss

label <- mx.symbol.Variable(name = 'label')

residual <- mx.symbol.broadcast_minus(lhs = label, rhs = decoder) 
square_residual <- mx.symbol.square(data = residual)
mean_square_residual <- mx.symbol.mean(data = square_residual, axis = 0:3, keepdims = FALSE)

# Encoder loss

square_encoder_mean <- mx.symbol.square(data = encoder_mean)
loss_encoder <- mx.symbol.mean(1 + encoder_lnsd - square_encoder_mean - encoder_sd, axis = 0:1, keepdims = FALSE)

my_loss <- mx.symbol.MakeLoss(data = mean_square_residual - 5e-2 * loss_encoder, name = 'loss')

練習1答案(2)

再用同樣的資料訓練一次：

my.eval.metric.loss <- mx.metric.custom(
  name = "my-loss", 
  function(real, pred) {
    return(as.array(pred))
  }
)

mx.set.seed(0)

model <- mx.model.FeedForward.create(symbol = my_loss, X = my_iter, optimizer = my_optimizer,
                                     eval.metric = my.eval.metric.loss,
                                     array.batch.size = 20, ctx = mx.gpu(), num.round = 20)

讓我們只將decoder拿出來：

data <- mx.symbol.Variable('data')

fc2 <- mx.symbol.FullyConnected(data = data, num.hidden = 128, name = 'fc2')
relu2 <- mx.symbol.Activation(data = fc2, act_type = "relu", name = 'relu2')

fc3 <- mx.symbol.FullyConnected(data = relu2, num.hidden = 784, name = 'fc3')
sigmoid_out <- mx.symbol.Activation(data = fc3, act_type = "sigmoid", name = 'sigmoid_out')

decoder_output <- mx.symbol.reshape(data = sigmoid_out, shape = c(28, 28, 1, -1), name = 'decoder')

decoder_model <- model
decoder_model$symbol <- decoder_output
decoder_model$arg.params <- decoder_model$arg.params[names(decoder_model$arg.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(32, 7))$arg.shapes)]
decoder_model$aux.params <- decoder_model$aux.params[names(decoder_model$aux.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(32, 7))$aux.shapes)]

練習1答案(3)

再讓我們隨機抽數字生成模型：

my_zip_code <- array(rnorm(320, mean = 0, sd = 1), dim = c(32, 10))

unzip_pred <- predict(decoder_model, my_zip_code, array.layout = 'colmajor')
unzip_pred[unzip_pred > 1] <- 1
unzip_pred[unzip_pred < 0] <- 0

par(mar = rep(0,4),mfrow = c(2, 5))

for (i in 1:10) {
  plot(NA, xlim = c(0.04, 0.96), ylim = c(0.04, 0.96), xaxt = "n", yaxt = "n", bty = "n")
  rasterImage(unzip_pred[,,,i], 0, 0, 1, 1, interpolate=FALSE)
}

雖然還沒有很好，但這是不是已經比起最開始的時候好太多了！

第四節：異常檢測(1)

了解了自編碼器的原理及進階應用後，我們非常清楚其實生成模型並不是完美的，他只不過是一個「分布轉換器」罷了。

– 也許你覺得很失望，但換個念頭，我們也許能夠利用這個「缺陷」。

假設我們手上的圖像都是「正常」的資料，那出現一個異常資料經過Encoder後，那他的隱碼是不是就會不屬於本來的分布?

– 那既然不屬於本來的分布，在他經過Decoder還原回來的時候，圖片是不是就會變的很怪?

– 也許我們可以利用這個特性來做異常檢測任務！

$F03$

第四節：異常檢測(2)

我們假定，手上的手寫數字中，2跟3是都是異常的數字。現在讓我們建構一個資料科學實驗。

– 在這個實驗中，我們假設我們手上有的「異常」資料非常非常少，只有少量的2，並且我們根本就沒有3。

– 但我們假設在未來的狀況之下，我們會面對到2與3的資料，而我們要有能力將他們分離出來。

先創造一個虛擬的資料集：

library(data.table)
library(OpenImageR)

DAT <- fread("data/MNIST.csv", data.table = FALSE)
DAT <- data.matrix(DAT)

Normal_idx <- which(!DAT[,1] %in% 2:3)
Abnormal_idx.2 <- which(DAT[,1] %in% 2) 
Abnormal_idx.3 <- which(DAT[,1] %in% 3) 

set.seed(0)

Train_Normal_idx <- sample(Normal_idx, length(Normal_idx) * 0.6, replace = FALSE)
Train_Abnormal_idx.2 <- sample(Abnormal_idx.2, 10, replace = FALSE)

Train_idx <- sort(c(Train_Normal_idx, Train_Abnormal_idx.2))

Train_X <- t(DAT[Train_idx,-1])
dim(Train_X) <- c(28, 28, 1, ncol(Train_X))
Train_X <- Train_X / 255
Train_Y <- DAT[Train_idx,1]

Test_X <- t(DAT[-Train_idx,-1])
dim(Test_X) <- c(28, 28, 1, ncol(Test_X))
Test_X <- Test_X / 255
Test_Y <- DAT[-Train_idx,1]

第四節：異常檢測(3)

讓我們來做個二分類模型，並且用上Oversampling技術。我們需要重新自訂一個Iterator：

my_iterator_core <- function (batch_size) {
  
  batch = 0
  batch_per_epoch = dim(Train_X)[4] / batch_size
  
  reset = function() {batch <<- 0}
  
  iter.next = function() {
    batch <<- batch+1
    if (batch > batch_per_epoch) {return(FALSE)} else {return(TRUE)}
  }
  
  value = function() {
    pos_idx <- sample(which(Train_Y %in% 2:3), batch_size / 2)
    neg_idx <- sample(which(!Train_Y %in% 2:3), batch_size / 2)
    idx <- c(pos_idx, neg_idx)
    data <- mx.nd.array(Train_X[,,,idx, drop=FALSE])
    label <- mx.nd.array(array((Train_Y[idx] %in% 2:3) + 0, dim = c(1, batch_size)))
    return(list(data = data, label = label))
  }
  
  return(list(reset = reset, iter.next = iter.next, value = value, batch_size = batch_size, batch = batch))
}

my_iterator_func <- setRefClass("Custom_Iter",
                                fields = c("iter", "batch_size"),
                                contains = "Rcpp_MXArrayDataIter",
                                methods = list(
                                  initialize = function(iter, batch_size = 100){
                                    .self$iter <- my_iterator_core(batch_size = batch_size)
                                    .self
                                  },
                                  value = function(){
                                    .self$iter$value()
                                  },
                                  iter.next = function(){
                                    .self$iter$iter.next()
                                  },
                                  reset = function(){
                                    .self$iter$reset()
                                  },
                                  finalize=function(){
                                  }
                                )
)

my_iter <- my_iterator_func(iter = NULL, batch_size = 20)

第四節：異常檢測(3)

讓我們用標準的Lenet來做個二分類模型：

# input
data <- mx.symbol.Variable('data')
label = mx.symbol.Variable(name = 'label')

# first conv
conv1 <- mx.symbol.Convolution(data=data, kernel=c(5,5), num_filter=10, name = 'conv1')
relu1 <- mx.symbol.Activation(data=conv1, act_type="relu")
pool1 <- mx.symbol.Pooling(data=relu1, pool_type="max",
                          kernel=c(2,2), stride=c(2,2))
# second conv
conv2 <- mx.symbol.Convolution(data=pool1, kernel=c(5,5), num_filter=20, name = 'conv2')
relu2 <- mx.symbol.Activation(data=conv2, act_type="relu")
pool2 <- mx.symbol.Pooling(data=relu2, pool_type="max",
                          kernel=c(2,2), stride=c(2,2))
# first fullc
flatten <- mx.symbol.Flatten(data=pool2)
fc1 <- mx.symbol.FullyConnected(data=flatten, num_hidden=150, name = 'fc1')
relu3 <- mx.symbol.Activation(data=fc1, act_type="relu")

# second fullc
fc2 <- mx.symbol.FullyConnected(data=relu3, num_hidden=1, name = 'fc2')

# logistic
lenet = mx.symbol.sigmoid(data = fc2, name = 'lenet')

eps = 1e-8
ce_loss_pos =  mx.symbol.broadcast_mul(mx.symbol.log(lenet + eps), label)
ce_loss_neg =  mx.symbol.broadcast_mul(mx.symbol.log(1 - lenet + eps), 1 - label)
ce_loss_mean = 0 - mx.symbol.mean(ce_loss_pos + ce_loss_neg)
ce_loss = mx.symbol.MakeLoss(ce_loss_mean, name = 'ce_loss')

訓練模型：

my.eval.metric.loss <- mx.metric.custom(
  name = "my-loss", 
  function(real, pred) {
    return(as.array(pred))
  }
)

mx.set.seed(0)

model <- mx.model.FeedForward.create(symbol = ce_loss, X = my_iter, optimizer = my_optimizer,
                                     eval.metric = my.eval.metric.loss,
                                     array.batch.size = 20, ctx = mx.gpu(), num.round = 20)

第四節：異常檢測(4)

讓我們將這個模型直接用於測試組的資料：

model$symbol <- lenet
pred_test <- predict(model, Test_X)

library(pROC)

roc_test <- roc((Test_Y %in% 2:3) ~ as.numeric(pred_test))
plot(roc_test)
text(0.5, 0.5, paste0('AUC = ', formatC(roc_test[['auc']], 4, format = 'f')), col = 'red')

不算太差，但是我們並不知道最佳切點在哪，如果想知道就還要把有限的10個樣本再變小。

– 除此之外，他是針對2做的不錯，對3其實做的非常差，這說明我們未來如果有更多不夠像2的樣本，這樣模型也很難識別出他是個錯誤的樣本。

par(mfrow = c(1, 2))

roc_test <- roc((Test_Y[Test_Y != 3] %in% 2) ~ pred_test[,Test_Y != 3])
plot(roc_test)
text(0.5, 0.5, paste0('AUC = ', formatC(roc_test[['auc']], 4, format = 'f')), col = 'red')

roc_test <- roc((Test_Y[Test_Y != 2] %in% 3) ~ pred_test[,Test_Y != 2])
plot(roc_test)
text(0.5, 0.5, paste0('AUC = ', formatC(roc_test[['auc']], 4, format = 'f')), col = 'red')

第五節：使用變分自編碼器來進行異常檢測(1)

讓我們來使用看看VAE，讓我們先修正一下我們的樣本，把「驗證組」給切出來：

Valid_Normal_idx <- sample(which(!Train_Y %in% 2:3), 1000, replace = FALSE)
Valid_Abnormal_idx <- which(Train_Y %in% 2:3)

Valid_idx <- sort(c(Valid_Normal_idx, Valid_Abnormal_idx))

Valid_X <- Train_X[,,,Valid_idx,drop=FALSE]
Valid_Y <- Train_Y[Valid_idx]

Train_X <- Train_X[,,,-Valid_idx,drop=FALSE]
Train_Y <- Train_Y[-Valid_idx]

注意這時候我們在訓練組用的是「全正常樣本」，接著修正我們的Iterator：

my_iterator_core <- function (batch_size) {
  
  batch = 0
  batch_per_epoch = dim(Train_X)[4] / batch_size
  
  reset = function() {batch <<- 0}
  
  iter.next = function() {
    batch <<- batch+1
    if (batch > batch_per_epoch) {return(FALSE)} else {return(TRUE)}
  }
  
  value = function() {
    idx <- sample(1:dim(Train_X)[4], batch_size)
    data <- mx.nd.array(Train_X[,,,idx, drop=FALSE])
    label <- mx.nd.array(Train_X[,,,idx, drop=FALSE])
    return(list(data = data, label = label))
  }
  
  return(list(reset = reset, iter.next = iter.next, value = value, batch_size = batch_size, batch = batch))
}

my_iterator_func <- setRefClass("Custom_Iter",
                                fields = c("iter", "batch_size"),
                                contains = "Rcpp_MXArrayDataIter",
                                methods = list(
                                  initialize = function(iter, batch_size = 100){
                                    .self$iter <- my_iterator_core(batch_size = batch_size)
                                    .self
                                  },
                                  value = function(){
                                    .self$iter$value()
                                  },
                                  iter.next = function(){
                                    .self$iter$iter.next()
                                  },
                                  reset = function(){
                                    .self$iter$reset()
                                  },
                                  finalize=function(){
                                  }
                                )
)

my_iter <- my_iterator_func(iter = NULL, batch_size = 20)

第五節：使用變分自編碼器來進行異常檢測(2)

現在讓我們編寫VAE：

# Encoder

data <- mx.symbol.Variable('data')

fc1 <- mx.symbol.FullyConnected(data = data, num.hidden = 64, name = 'fc1')
relu1 <- mx.symbol.Activation(data = fc1, act_type = "relu", name = 'relu1')

encoder_mean <- mx.symbol.FullyConnected(data = relu1, num.hidden = 8, name = 'encoder_mean')
encoder_lnsd <- mx.symbol.FullyConnected(data = relu1, num.hidden = 8, name = 'encoder_lnsd')
encoder_sd <- mx.symbol.exp(encoder_lnsd, name = 'encoder_sd')
encoder_noise <- mx.symbol.random_normal(loc = 0, scale = 1, shape = c(8, 20))
encoder <- encoder_noise * encoder_sd + encoder_mean

# Decoder

fc2 <- mx.symbol.FullyConnected(data = encoder, num.hidden = 64, name = 'fc2')
relu2 <- mx.symbol.Activation(data = fc2, act_type = "relu", name = 'relu2')

fc3 <- mx.symbol.FullyConnected(data = relu2, num.hidden = 784, name = 'fc3')
sigmoid_out <- mx.symbol.Activation(data = fc3, act_type = "sigmoid", name = 'sigmoid_out')

decoder <- mx.symbol.reshape(data = sigmoid_out, shape = c(28, 28, 1, -1), name = 'decoder')

# MSE loss

label <- mx.symbol.Variable(name = 'label')

residual <- mx.symbol.broadcast_minus(lhs = label, rhs = decoder) 
square_residual <- mx.symbol.square(data = residual)
mean_square_residual <- mx.symbol.mean(data = square_residual, axis = 0:3, keepdims = FALSE)

# Encoder loss

square_encoder_mean <- mx.symbol.square(data = encoder_mean)
loss_encoder <- mx.symbol.mean(1 + encoder_lnsd - square_encoder_mean - encoder_sd, axis = 0:1, keepdims = FALSE)

my_loss <- mx.symbol.MakeLoss(data = mean_square_residual - 5e-4 * loss_encoder, name = 'loss')

第五節：使用變分自編碼器來進行異常檢測(3)

訓練模型：

my.eval.metric.loss <- mx.metric.custom(
  name = "my-loss", 
  function(real, pred) {
    return(as.array(pred))
  }
)

mx.set.seed(0)

model <- mx.model.FeedForward.create(symbol = my_loss, X = my_iter, optimizer = my_optimizer,
                                     eval.metric = my.eval.metric.loss,
                                     array.batch.size = 20, ctx = mx.gpu(), num.round = 20)

讓我們把Encoder跟Decoder都弄出來：

# Encoder

all_layers <- model$symbol$get.internals()
encoder_output <- which(all_layers$outputs == 'encoder_mean_output') %>% all_layers$get.output()

encoder_model <- model
encoder_model$symbol <- encoder_output
encoder_model$arg.params <- encoder_model$arg.params[names(encoder_model$arg.params) %in% names(mx.symbol.infer.shape(encoder_output, data = c(28, 28, 1, 7))$arg.shapes)]
encoder_model$aux.params <- encoder_model$aux.params[names(encoder_model$aux.params) %in% names(mx.symbol.infer.shape(encoder_output, data = c(28, 28, 1, 7))$aux.shapes)]

# Decoder

data <- mx.symbol.Variable('data')

fc2 <- mx.symbol.FullyConnected(data = data, num.hidden = 64, name = 'fc2')
relu2 <- mx.symbol.Activation(data = fc2, act_type = "relu", name = 'relu2')

fc3 <- mx.symbol.FullyConnected(data = relu2, num.hidden = 784, name = 'fc3')
sigmoid_out <- mx.symbol.Activation(data = fc3, act_type = "sigmoid", name = 'sigmoid_out')

decoder_output <- mx.symbol.reshape(data = sigmoid_out, shape = c(28, 28, 1, -1), name = 'decoder')

decoder_model <- model
decoder_model$symbol <- decoder_output
decoder_model$arg.params <- decoder_model$arg.params[names(decoder_model$arg.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(8, 7))$arg.shapes)]
decoder_model$aux.params <- decoder_model$aux.params[names(decoder_model$aux.params) %in% names(mx.symbol.infer.shape(decoder_output, data = c(8, 7))$aux.shapes)]

第五節：使用變分自編碼器來進行異常檢測(4)

對於已經存在的樣本，那他的圖還原出來是完全一致的：

Valid_code <- predict(encoder_model, Valid_X)
Valid_img <- predict(decoder_model, Valid_code)

Valid_img[Valid_img > 1] <- 1
Valid_img[Valid_img < 0] <- 0

par(mar = rep(0,4), mfcol = c(2, 5))

for (i in 1:5) {
  plot(NA, xlim = c(0.04, 0.96), ylim = c(0.04, 0.96), xaxt = "n", yaxt = "n", bty = "n")
  rasterImage(Valid_X[,,,which(!Valid_Y %in% 2:3)[i]], 0, 0, 1, 1, interpolate=FALSE)
  plot(NA, xlim = c(0.04, 0.96), ylim = c(0.04, 0.96), xaxt = "n", yaxt = "n", bty = "n")
  rasterImage(Valid_img[,,,which(!Valid_Y %in% 2:3)[i]], 0, 0, 1, 1, interpolate=FALSE)
}

對於不存在的樣本，那他的圖還原出來會有點困難：

par(mar = rep(0,4), mfcol = c(2, 5))

for (i in 1:5) {
  plot(NA, xlim = c(0.04, 0.96), ylim = c(0.04, 0.96), xaxt = "n", yaxt = "n", bty = "n")
  rasterImage(Valid_X[,,,which(Valid_Y %in% 2:3)[i]], 0, 0, 1, 1, interpolate=FALSE)
  plot(NA, xlim = c(0.04, 0.96), ylim = c(0.04, 0.96), xaxt = "n", yaxt = "n", bty = "n")
  rasterImage(Valid_img[,,,which(Valid_Y %in% 2:3)[i]], 0, 0, 1, 1, interpolate=FALSE)
}

這樣新圖與舊圖的差異就可以用來作為異常偵測的指標：

第五節：使用變分自編碼器來進行異常檢測(5)

讓我們將所有驗證集的樣本計算出原圖與還原圖的「差異」：

Valid_code <- predict(encoder_model, Valid_X)
Valid_img <- predict(decoder_model, Valid_code)
Valid_diff <- Valid_diff <- apply((Valid_X - Valid_img)^2, 4, sum)

library(pROC)

roc_valid <- roc((Valid_Y %in% 2:3) ~ Valid_diff)
plot(roc_valid)
text(0.5, 0.5, paste0('AUC = ', formatC(roc_valid[['auc']], 4, format = 'f')), col = 'red')

我們可以透過這種方式知道最佳切點在哪：

best_pos <- which.max(roc_valid$sensitivities + roc_valid$specificities)
best_cut <- roc_valid$thresholds[best_pos]
print(best_cut)

## [1] 38.72186

第五節：使用變分自編碼器來進行異常檢測(6)

將結果用於測試樣本：

Test_code <- predict(encoder_model, Test_X)
Test_img <- predict(decoder_model, Test_code)

Test_diff <- apply((Test_img - Test_X)^2, 4, sum)

library(pROC)

tab_test <- table(factor(Test_diff >= best_cut, levels = c(FALSE, TRUE)), Test_Y %in% 2:3)
sens <- tab_test[2,2] / sum(tab_test[,2])
spec <- tab_test[1,1] / sum(tab_test[,1])

roc_test <- roc((Test_Y %in% 2:3) ~ Test_diff)
plot(roc_test)
points(spec, sens, col = 'red', pch = 19)
text(0.5, 0.5, paste0('Sens = ', formatC(sens, digits = 3, format = 'f'),
                      '\nSpec = ', formatC(spec, digits = 3, format = 'f'),
                      '\nAUC = ', formatC(roc_test$auc, digits = 3, format = 'f')), col = 'red')

分層分析的結果，發現偵測3比偵測2還更準呢：

par(mfrow = c(1, 2))

roc_test.1 <- roc((Test_Y[Test_Y != 2] %in% 2:3) ~ Test_diff[Test_Y != 2])
plot(roc_test.1)
text(0.5, 0.5, paste0('AUC = ', formatC(roc_test.1[['auc']], 4, format = 'f')), col = 'red')

roc_test.2 <- roc((Test_Y[Test_Y != 3] %in% 2:3) ~ Test_diff[Test_Y != 3])
plot(roc_test.2)
text(0.5, 0.5, paste0('AUC = ', formatC(roc_test.2[['auc']], 4, format = 'f')), col = 'red')

結語

原來自編碼器的效果這麼好，你有沒有覺得這些進階應用真的讓人腦洞大開?

– 這堂課最有趣的是最後的異常檢測，原來我們可以透過這種方式來獲得異常檢測的方式，而這種方式讓我們不需要有足夠的異常樣本即可進行。

儘管VAE相較於傳統的自編碼器已經有很好的生成效果，但我們仍不滿意，我們必須找到其他更好的方式進行。

– 看過一些深度學習應用之後，你將不得不佩服電腦科學家的厲害。未來在面對你自己的研究時，希望你也能想出有趣的演算法來解決問題！

深度學習理論與實務

第一節：生成模型的原理(1)

第一節：生成模型的原理(2)

第一節：生成模型的原理(3)

第一節：生成模型的原理(4)

第一節：生成模型的原理(5)

第一節：生成模型的原理(6)

第一節：生成模型的原理(7)

第一節：生成模型的原理(8)

第二節：分布限制自編碼器(1)

第二節：分布限制自編碼器(2)

第二節：分布限制自編碼器(3)

第三節：變分自編碼器(1)

第三節：變分自編碼器(2)

第三節：變分自編碼器(3)

第三節：變分自編碼器(4)

第三節：變分自編碼器(5)

練習1：在VAE中增加隱碼數目

練習1答案(1)

練習1答案(2)

練習1答案(3)

第四節：異常檢測(1)

第四節：異常檢測(2)

第四節：異常檢測(3)

第四節：異常檢測(3)

第四節：異常檢測(4)

第五節：使用變分自編碼器來進行異常檢測(1)

第五節：使用變分自編碼器來進行異常檢測(2)

第五節：使用變分自編碼器來進行異常檢測(3)

第五節：使用變分自編碼器來進行異常檢測(4)

第五節：使用變分自編碼器來進行異常檢測(5)

第五節：使用變分自編碼器來進行異常檢測(6)

結語