Hello, below is my code for an AlexNet assignment. If someone could look over, it to ensure it is optimal or could be improved on. Thank You import torch import torch.nn as nn import torchvision.datasets as datasets import torch.optim as optim import torchvision.transforms as transforms from torchvision.models import AlexNet # define data transforms transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # load the data trainset = datasets.CIFAR100(root='./data', train=True, download=True, transform=transform) testset = datasets.CIFAR100(root='./data', train=False, download=True, transform=transform) #define the network architecture class AlexNet(nn.Module): def __init__(self): super(AlexNet, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) self.classifier = nn.Sequential( nn.Dropout(), nn.Linear(256 * 6 * 6, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Linear(4096, 1000), ) def forward(self, x): x = self.features(x) x = self.avgpool(x) x = torch.flatten(x, 1) x = self.classifier(x) return x # set device to GPU if available, otherwise use CPU device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # initialize network, loss function, and optimizer net = AlexNet(num_classes=1000) net.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) # train the network num_epochs = 10 for epoch in range(num_epochs): running_loss = 0.0 for i, data in enumerate(trainset, 0): inputs, labels = data if not isinstance(inputs, torch.Tensor): inputs = torch.tensor(inputs) if not isinstance(labels, torch.Tensor): labels = torch.tensor(labels) inputs, labels = inputs.to(device), labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 2000 == 1999: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0.