Autoencoder2.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. # Copyright (c) 2023 Felix Kleinsteuber and Computer Vision Group, Friedrich Schiller University Jena
  2. # This is the preferred autoencoder architecture.
  3. # Fully convolutional with 7 layer encoder and decoder.
  4. # Dropout, relu on hidden layers, tanh on output layer
  5. # Allows multiples of 16 as number of latent features
  6. from torch import nn
  7. class Autoencoder(nn.Module):
  8. def __init__(self, dropout=0.1, latent_features=512):
  9. super(Autoencoder, self).__init__()
  10. if latent_features % 16 != 0:
  11. raise ValueError("latent_features must be a multiple of 16 in this architecture.")
  12. latent_channels = latent_features // 16
  13. self.encoder = nn.Sequential(
  14. nn.Dropout(dropout),
  15. nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3),
  16. nn.ReLU(True),
  17. nn.Dropout(dropout),
  18. nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=2),
  19. nn.ReLU(True),
  20. nn.Dropout(dropout),
  21. nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
  22. nn.ReLU(True),
  23. nn.Dropout(dropout),
  24. nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1),
  25. nn.ReLU(True),
  26. nn.Dropout(dropout),
  27. nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
  28. nn.ReLU(True),
  29. nn.Dropout(dropout),
  30. nn.Conv2d(128, 128, kernel_size=3, stride=2, padding=1),
  31. nn.ReLU(True),
  32. nn.Dropout(dropout),
  33. nn.Conv2d(128, latent_channels, kernel_size=3, padding="same"),
  34. nn.ReLU(True),
  35. )
  36. self.decoder = nn.Sequential(
  37. nn.Dropout(dropout),
  38. nn.Conv2d(latent_channels, 128, kernel_size=3, padding="same"),
  39. nn.ReLU(True),
  40. nn.Dropout(dropout),
  41. nn.ConvTranspose2d(128, 128, kernel_size=4, stride=2, padding=1),
  42. nn.ReLU(True),
  43. nn.Dropout(dropout),
  44. nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
  45. nn.ReLU(True),
  46. nn.Dropout(dropout),
  47. nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
  48. nn.ReLU(True),
  49. nn.Dropout(dropout),
  50. nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
  51. nn.ReLU(True),
  52. nn.Dropout(dropout),
  53. nn.ConvTranspose2d(64, 32, kernel_size=6, stride=2, padding=2),
  54. nn.ReLU(True),
  55. nn.Dropout(dropout),
  56. nn.ConvTranspose2d(32, 16, kernel_size=8, stride=2, padding=3),
  57. nn.ReLU(True),
  58. nn.Dropout(dropout),
  59. nn.Conv2d(16, 3, kernel_size=3, stride=1, padding="same"),
  60. nn.Tanh(),
  61. )
  62. def forward(self, x):
  63. x = self.encoder(x)
  64. x = self.decoder(x)
  65. return x