Autoencoder.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. # Copyright (c) 2023 Felix Kleinsteuber and Computer Vision Group, Friedrich Schiller University Jena
  2. # This is the initial autoencoder architecture.
  3. # Convolutional with 5 conv layers + 1 dense layer per encoder and decoder.
  4. # relu on hidden layers, tanh on output layer
  5. # Number of latent features: 512
  6. from torch import nn
  7. class Autoencoder(nn.Module):
  8. def __init__(self):
  9. super(Autoencoder, self).__init__()
  10. self.encoder = nn.Sequential(
  11. nn.Conv2d(3, 128, kernel_size=7, stride=4, padding=2),
  12. nn.ReLU(True),
  13. nn.Conv2d(128, 64, kernel_size=3, stride=2, padding=1),
  14. nn.ReLU(True),
  15. nn.Conv2d(64, 32, kernel_size=3, stride=2, padding=1),
  16. nn.ReLU(True),
  17. nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
  18. nn.ReLU(True),
  19. nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
  20. nn.ReLU(True),
  21. nn.Flatten(),
  22. nn.Linear(2048, 512),
  23. nn.ReLU(True), # see https://stackoverflow.com/questions/50187127/is-it-necessary-to-use-a-linear-bottleneck-layer-for-autoencoder
  24. )
  25. self.decoder = nn.Sequential(
  26. nn.Linear(512, 2048),
  27. nn.ReLU(True),
  28. nn.Unflatten(1, (128, 4, 4)),
  29. nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
  30. nn.ReLU(True),
  31. nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
  32. nn.ReLU(True),
  33. nn.ConvTranspose2d(32, 32, kernel_size=4, stride=2, padding=1),
  34. nn.ReLU(True),
  35. nn.ConvTranspose2d(32, 32, kernel_size=4, stride=2, padding=1),
  36. nn.ReLU(True),
  37. nn.ConvTranspose2d(32, 3, kernel_size=8, stride=4, padding=2),
  38. nn.Tanh(),
  39. )
  40. def forward(self, x):
  41. x = self.encoder(x)
  42. x = self.decoder(x)
  43. return x