오토 인코더 네트워크는 일반 분류 자 MLP 네트워크보다 훨씬 까다로운 것으로 보입니다. Lasagne를 사용하여 여러 번 시도한 후에 재구성 된 출력에서 얻는 것은 입력 숫자가 실제로 무엇인지에 대한 구분없이 MNIST 데이터베이스 의 모든 이미지의 모호한 평균과 유사 합니다.
내가 선택한 네트워크 구조는 다음 캐스케이드 계층입니다.
- 입력 레이어 (28x28)
- 2D 컨볼 루션 레이어, 필터 크기 7x7
- 최대 풀링 레이어, 크기 3x3, 보폭 2x2
- 고밀도 (완전히 연결된) 평탄화 층, 10 단위 (병목 현상)
- 고밀도 (완전히 연결된) 레이어, 121 단위
- 레이어를 11x11로 재구성
- 2D 컨볼 루션 레이어, 필터 크기 3x3
- 2D 업 스케일링 레이어 팩터 2
- 2D 컨볼 루션 레이어, 필터 크기 3x3
- 2D 업 스케일링 레이어 팩터 2
- 2D 컨볼 루션 레이어, 필터 크기 5x5
- 기능 최대 풀링 (31x28x28에서 28x28)
모든 2D 컨볼 루션 레이어에는 바이어스가없고, 시그 모이 드 활성화와 31 개의 필터가 있습니다.
완전히 연결된 모든 층에는 시그 모이 드 활성화가 있습니다.
사용 된 손실 함수는 제곱 오차 이며 업데이트 기능은 adagrad
입니다. 학습을위한 청크의 길이는 100 개의 샘플로 1000 에포크에 곱해집니다.
다음은 문제의 예입니다. 상단 행은 네트워크의 입력으로 설정된 일부 샘플이고 하단 행은 재구성입니다.
완전성을 위해 다음은 내가 사용한 코드입니다.
import theano.tensor as T
import theano
import sys
sys.path.insert(0,'./Lasagne') # local checkout of Lasagne
import lasagne
from theano import pp
from theano import function
import gzip
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
def load_mnist():
def load_mnist_images(filename):
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
# The inputs are vectors now, we reshape them to monochrome 2D images,
# following the shape convention: (examples, channels, rows, columns)
data = data.reshape(-1, 1, 28, 28)
# The inputs come as bytes, we convert them to float32 in range [0,1].
# (Actually to range [0, 255/256], for compatibility to the version
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
return data / np.float32(256)
def load_mnist_labels(filename):
# Read the labels in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=8)
# The labels are vectors of integers now, that's exactly what we want.
return data
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
return X_train, y_train, X_test, y_test
def plot_filters(conv_layer):
W = conv_layer.get_params()[0]
W_fn = theano.function([],W)
params = W_fn()
ks = np.squeeze(params)
kstack = np.vstack(ks)
plt.imshow(kstack,interpolation='none')
plt.show()
def main():
#theano.config.exception_verbosity="high"
#theano.config.optimizer='None'
X_train, y_train, X_test, y_test = load_mnist()
ohe = OneHotEncoder()
y_train = ohe.fit_transform(np.expand_dims(y_train,1)).toarray()
chunk_len = 100
visamount = 10
num_epochs = 1000
num_filters=31
dropout_p=.0
print "X_train.shape",X_train.shape,"y_train.shape",y_train.shape
input_var = T.tensor4('X')
output_var = T.tensor4('X')
conv_nonlinearity = lasagne.nonlinearities.sigmoid
net = lasagne.layers.InputLayer((chunk_len,1,28,28), input_var)
conv1 = net = lasagne.layers.Conv2DLayer(net,num_filters,(7,7),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.MaxPool2DLayer(net,(3,3),stride=(2,2))
net = lasagne.layers.DropoutLayer(net,p=dropout_p)
#conv2_layer = lasagne.layers.Conv2DLayer(dropout_layer,num_filters,(3,3),nonlinearity=conv_nonlinearity)
#pool2_layer = lasagne.layers.MaxPool2DLayer(conv2_layer,(3,3),stride=(2,2))
net = lasagne.layers.DenseLayer(net,10,nonlinearity=lasagne.nonlinearities.sigmoid)
#augment_layer1 = lasagne.layers.DenseLayer(reduction_layer,33,nonlinearity=lasagne.nonlinearities.sigmoid)
net = lasagne.layers.DenseLayer(net,121,nonlinearity=lasagne.nonlinearities.sigmoid)
net = lasagne.layers.ReshapeLayer(net,(chunk_len,1,11,11))
net = lasagne.layers.Conv2DLayer(net,num_filters,(3,3),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.Upscale2DLayer(net,2)
net = lasagne.layers.Conv2DLayer(net,num_filters,(3,3),nonlinearity=conv_nonlinearity,untie_biases=True)
#pool_after0 = lasagne.layers.MaxPool2DLayer(conv_after1,(3,3),stride=(2,2))
net = lasagne.layers.Upscale2DLayer(net,2)
net = lasagne.layers.DropoutLayer(net,p=dropout_p)
#conv_after2 = lasagne.layers.Conv2DLayer(upscale_layer1,num_filters,(3,3),nonlinearity=conv_nonlinearity,untie_biases=True)
#pool_after1 = lasagne.layers.MaxPool2DLayer(conv_after2,(3,3),stride=(1,1))
#upscale_layer2 = lasagne.layers.Upscale2DLayer(pool_after1,4)
net = lasagne.layers.Conv2DLayer(net,num_filters,(5,5),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.FeaturePoolLayer(net,num_filters,pool_function=theano.tensor.max)
print "output_shape:",lasagne.layers.get_output_shape(net)
params = lasagne.layers.get_all_params(net, trainable=True)
prediction = lasagne.layers.get_output(net)
loss = lasagne.objectives.squared_error(prediction, output_var)
#loss = lasagne.objectives.binary_crossentropy(prediction, output_var)
aggregated_loss = lasagne.objectives.aggregate(loss)
updates = lasagne.updates.adagrad(aggregated_loss,params)
train_fn = theano.function([input_var, output_var], loss, updates=updates)
test_prediction = lasagne.layers.get_output(net, deterministic=True)
predict_fn = theano.function([input_var], test_prediction)
print "starting training..."
for epoch in range(num_epochs):
selected = list(set(np.random.random_integers(0,59999,chunk_len*4)))[:chunk_len]
X_train_sub = X_train[selected,:]
_loss = train_fn(X_train_sub, X_train_sub)
print("Epoch %d: Loss %g" % (epoch + 1, np.sum(_loss) / len(X_train)))
"""
chunk = X_train[0:chunk_len,:,:,:]
result = predict_fn(chunk)
vis1 = np.hstack([chunk[j,0,:,:] for j in range(visamount)])
vis2 = np.hstack([result[j,0,:,:] for j in range(visamount)])
plt.imshow(np.vstack([vis1,vis2]))
plt.show()
"""
print "done."
chunk = X_train[0:chunk_len,:,:,:]
result = predict_fn(chunk)
print "chunk.shape",chunk.shape
print "result.shape",result.shape
plot_filters(conv1)
for i in range(chunk_len/visamount):
vis1 = np.hstack([chunk[i*visamount+j,0,:,:] for j in range(visamount)])
vis2 = np.hstack([result[i*visamount+j,0,:,:] for j in range(visamount)])
plt.imshow(np.vstack([vis1,vis2]))
plt.show()
import ipdb; ipdb.set_trace()
if __name__ == "__main__":
main()
이 네트워크를 개선하여 합리적으로 작동하는 자동 인코더를 얻는 방법에 대한 아이디어가 있습니까?
문제 해결됨!
상당히 다른 구현으로 컨벌루션 레이어에서 시그 모이 드 함수 대신 누수 정류기를 사용하면 병목 레이어에 2 (!!) 노드 만 있고 맨 끝에 1x1 커널과 컨볼 루션이 있습니다.
다음은 일부 재구성의 결과입니다.
암호:
import theano.tensor as T
import theano
import sys
sys.path.insert(0,'./Lasagne') # local checkout of Lasagne
import lasagne
from theano import pp
from theano import function
import theano.tensor.nnet
import gzip
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
def load_mnist():
def load_mnist_images(filename):
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=16)
# The inputs are vectors now, we reshape them to monochrome 2D images,
# following the shape convention: (examples, channels, rows, columns)
data = data.reshape(-1, 1, 28, 28)
# The inputs come as bytes, we convert them to float32 in range [0,1].
# (Actually to range [0, 255/256], for compatibility to the version
# provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
return data / np.float32(256)
def load_mnist_labels(filename):
# Read the labels in Yann LeCun's binary format.
with gzip.open(filename, 'rb') as f:
data = np.frombuffer(f.read(), np.uint8, offset=8)
# The labels are vectors of integers now, that's exactly what we want.
return data
X_train = load_mnist_images('train-images-idx3-ubyte.gz')
y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
return X_train, y_train, X_test, y_test
def main():
X_train, y_train, X_test, y_test = load_mnist()
ohe = OneHotEncoder()
y_train = ohe.fit_transform(np.expand_dims(y_train,1)).toarray()
chunk_len = 100
num_epochs = 10000
num_filters=7
input_var = T.tensor4('X')
output_var = T.tensor4('X')
#conv_nonlinearity = lasagne.nonlinearities.sigmoid
#conv_nonlinearity = lasagne.nonlinearities.rectify
conv_nonlinearity = lasagne.nonlinearities.LeakyRectify(.1)
softplus = theano.tensor.nnet.softplus
#conv_nonlinearity = theano.tensor.nnet.softplus
net = lasagne.layers.InputLayer((chunk_len,1,28,28), input_var)
conv1 = net = lasagne.layers.Conv2DLayer(net,num_filters,(7,7),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.MaxPool2DLayer(net,(3,3),stride=(2,2))
net = lasagne.layers.DenseLayer(net,2,nonlinearity=lasagne.nonlinearities.sigmoid)
net = lasagne.layers.DenseLayer(net,49,nonlinearity=lasagne.nonlinearities.sigmoid)
net = lasagne.layers.ReshapeLayer(net,(chunk_len,1,7,7))
net = lasagne.layers.Conv2DLayer(net,num_filters,(3,3),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.MaxPool2DLayer(net,(3,3),stride=(1,1))
net = lasagne.layers.Upscale2DLayer(net,4)
net = lasagne.layers.Conv2DLayer(net,num_filters,(3,3),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.MaxPool2DLayer(net,(3,3),stride=(1,1))
net = lasagne.layers.Upscale2DLayer(net,4)
net = lasagne.layers.Conv2DLayer(net,num_filters,(5,5),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.Conv2DLayer(net,num_filters,(1,1),nonlinearity=conv_nonlinearity,untie_biases=True)
net = lasagne.layers.FeaturePoolLayer(net,num_filters,pool_function=theano.tensor.max)
net = lasagne.layers.Conv2DLayer(net,1,(1,1),nonlinearity=conv_nonlinearity,untie_biases=True)
print "output shape:",net.output_shape
params = lasagne.layers.get_all_params(net, trainable=True)
prediction = lasagne.layers.get_output(net)
loss = lasagne.objectives.squared_error(prediction, output_var)
#loss = lasagne.objectives.binary_hinge_loss(prediction, output_var)
aggregated_loss = lasagne.objectives.aggregate(loss)
#updates = lasagne.updates.adagrad(aggregated_loss,params)
updates = lasagne.updates.nesterov_momentum(aggregated_loss,params,0.5)#.005
train_fn = theano.function([input_var, output_var], loss, updates=updates)
test_prediction = lasagne.layers.get_output(net, deterministic=True)
predict_fn = theano.function([input_var], test_prediction)
print "starting training..."
for epoch in range(num_epochs):
selected = list(set(np.random.random_integers(0,59999,chunk_len*4)))[:chunk_len]
X_train_sub = X_train[selected,:]
_loss = train_fn(X_train_sub, X_train_sub)
print("Epoch %d: Loss %g" % (epoch + 1, np.sum(_loss) / len(X_train)))
print "done."
chunk = X_train[0:chunk_len,:,:,:]
result = predict_fn(chunk)
print "chunk.shape",chunk.shape
print "result.shape",result.shape
visamount = 10
for i in range(10):
vis1 = np.hstack([chunk[i*visamount+j,0,:,:] for j in range(visamount)])
vis2 = np.hstack([result[i*visamount+j,0,:,:] for j in range(visamount)])
plt.imshow(np.vstack([vis1,vis2]))
plt.show()
import ipdb; ipdb.set_trace()
if __name__ == "__main__":
main()