Autoencoder.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. # This is the initial autoencoder architecture.
  2. # Convolutional with 5 conv layers + 1 dense layer per encoder and decoder.
  3. # relu on hidden layers, tanh on output layer
  4. # Number of latent features: 512
  5. from torch import nn
  6. class Autoencoder(nn.Module):
  7. def __init__(self):
  8. super(Autoencoder, self).__init__()
  9. self.encoder = nn.Sequential(
  10. nn.Conv2d(3, 128, kernel_size=7, stride=4, padding=2),
  11. nn.ReLU(True),
  12. nn.Conv2d(128, 64, kernel_size=3, stride=2, padding=1),
  13. nn.ReLU(True),
  14. nn.Conv2d(64, 32, kernel_size=3, stride=2, padding=1),
  15. nn.ReLU(True),
  16. nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
  17. nn.ReLU(True),
  18. nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
  19. nn.ReLU(True),
  20. nn.Flatten(),
  21. nn.Linear(2048, 512),
  22. nn.ReLU(True), # see https://stackoverflow.com/questions/50187127/is-it-necessary-to-use-a-linear-bottleneck-layer-for-autoencoder
  23. )
  24. self.decoder = nn.Sequential(
  25. nn.Linear(512, 2048),
  26. nn.ReLU(True),
  27. nn.Unflatten(1, (128, 4, 4)),
  28. nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
  29. nn.ReLU(True),
  30. nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
  31. nn.ReLU(True),
  32. nn.ConvTranspose2d(32, 32, kernel_size=4, stride=2, padding=1),
  33. nn.ReLU(True),
  34. nn.ConvTranspose2d(32, 32, kernel_size=4, stride=2, padding=1),
  35. nn.ReLU(True),
  36. nn.ConvTranspose2d(32, 3, kernel_size=8, stride=4, padding=2),
  37. nn.Tanh(),
  38. )
  39. def forward(self, x):
  40. x = self.encoder(x)
  41. x = self.decoder(x)
  42. return x