1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- # Copyright (c) 2023 Felix Kleinsteuber and Computer Vision Group, Friedrich Schiller University Jena
- # This is the initial autoencoder architecture.
- # Convolutional with 5 conv layers + 1 dense layer per encoder and decoder.
- # relu on hidden layers, tanh on output layer
- # Number of latent features: 512
- from torch import nn
- class Autoencoder(nn.Module):
- def __init__(self):
- super(Autoencoder, self).__init__()
- self.encoder = nn.Sequential(
- nn.Conv2d(3, 128, kernel_size=7, stride=4, padding=2),
- nn.ReLU(True),
- nn.Conv2d(128, 64, kernel_size=3, stride=2, padding=1),
- nn.ReLU(True),
- nn.Conv2d(64, 32, kernel_size=3, stride=2, padding=1),
- nn.ReLU(True),
- nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
- nn.ReLU(True),
- nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
- nn.ReLU(True),
- nn.Flatten(),
- nn.Linear(2048, 512),
- nn.ReLU(True), # see https://stackoverflow.com/questions/50187127/is-it-necessary-to-use-a-linear-bottleneck-layer-for-autoencoder
- )
- self.decoder = nn.Sequential(
- nn.Linear(512, 2048),
- nn.ReLU(True),
- nn.Unflatten(1, (128, 4, 4)),
- nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
- nn.ReLU(True),
- nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
- nn.ReLU(True),
- nn.ConvTranspose2d(32, 32, kernel_size=4, stride=2, padding=1),
- nn.ReLU(True),
- nn.ConvTranspose2d(32, 32, kernel_size=4, stride=2, padding=1),
- nn.ReLU(True),
- nn.ConvTranspose2d(32, 3, kernel_size=8, stride=4, padding=2),
- nn.Tanh(),
- )
-
- def forward(self, x):
- x = self.encoder(x)
- x = self.decoder(x)
- return x
|