Trained

2020-01-27 20:58:35 +01:00 · 2020-01-27 20:58:35 +01:00 · 55cff9b18f
commit 55cff9b18f
parent 3ce16b7010
287 changed files with 115778 additions and 177 deletions
--- a/mnist_classifier.py
+++ b/mnist_classifier.py
@ -17,7 +17,7 @@ test = datasets.MNIST('./datasets', train=False, download=True,
                          transforms.ToTensor()
                      ]))

-trainset = torch.utils.data.DataLoader(train, batch_size=15, shuffle=True)
+trainset = torch.utils.data.DataLoader(train, batch_size=200, shuffle=True)
 testset = torch.utils.data.DataLoader(test, batch_size=10, shuffle=False)


@ -42,26 +42,36 @@ class Net(nn.Module):
 net = Net()
 wandb.watch(net)

+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+print('runnning on %s' % device)
+
+net = net.to(device)
+
 loss_function = nn.CrossEntropyLoss()
 optimizer = optim.Adam(net.parameters(), lr=0.001)

-for epoch in range(10):  # 10 full passes over the data
+for epoch in range(200):  # 10 full passes over the data
    for data in tqdm(trainset):  # `data` is a batch of data
        X, y = data  # X is the batch of features, y is the batch of targets.
        net.zero_grad()  # sets gradients to 0 before loss calc. You will do this likely every step.
+        X = X.to(device)
        output = net(X.view(-1, 784))  # pass in the reshaped batch (recall they are 28x28 atm)
+        output = output.cpu()
        loss = loss_function(output, y)  # calc and grab the loss value
        loss.backward()  # apply this loss backwards thru the network's parameters
        optimizer.step()  # attempt to optimize weights to account for loss/gradients
        wandb.log({'loss': loss})
-
-    # torch.save(net, './nets/net_' + str(epoch) + ".pt")
+    net = net.cpu()
+    torch.save(net, './nets/net_gpu_large_batch_' + str(epoch) + ".pt")
+    net = net.to(device)
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testset:
            X, y = data
+            X = X.to(device)
            output = net(X.view(-1, 784))
+            output = output.cpu()
            for idx, i in enumerate(output):
                if torch.argmax(i) == y[idx]:
                    correct += 1