diff --git a/mnist_classifier.py b/mnist_classifier.py
new file mode 100644
index 0000000..1c51548
--- /dev/null
+++ b/mnist_classifier.py
@@ -0,0 +1,66 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+from torchvision import transforms, datasets
+
+train = datasets.MNIST('./datasets', train=True, download=True,
+                       transform=transforms.Compose([
+                           transforms.ToTensor()
+                       ]))
+
+test = datasets.MNIST('./datasets', train=False, download=True,
+                      transform=transforms.Compose([
+                          transforms.ToTensor()
+                      ]))
+
+trainset = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)
+testset = torch.utils.data.DataLoader(test, batch_size=10, shuffle=False)
+
+class Net(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.fc1 = nn.Linear(28 * 28, 64)
+        self.fc2 = nn.Linear(64, 120)
+        self.fc3 = nn.Linear(120, 120)
+        self.fc4 = nn.Linear(120, 64)
+        self.fc5 = nn.Linear(64, 10)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = F.relu(self.fc3(x))
+        x = F.relu(self.fc4(x))
+        x = self.fc5(x)
+        return F.log_softmax(x, dim=1)
+
+
+net = Net()
+
+loss_function = nn.CrossEntropyLoss()
+optimizer = optim.Adam(net.parameters(), lr=0.001)
+
+for epoch in range(10):  # 3 full passes over the data
+    for data in trainset:  # `data` is a batch of data
+        X, y = data  # X is the batch of features, y is the batch of targets.
+        net.zero_grad()  # sets gradients to 0 before loss calc. You will do this likely every step.
+        output = net(X.view(-1, 784))  # pass in the reshaped batch (recall they are 28x28 atm)
+        loss = F.nll_loss(output, y)  # calc and grab the loss value
+        loss.backward()  # apply this loss backwards thru the network's parameters
+        optimizer.step()  # attempt to optimize weights to account for loss/gradients
+
+    print(loss)  # print loss. We hope loss (a measure of wrong-ness) declines!
+    torch.save(net, './nets/net_' + str(epoch) + ".pt")
+    correct = 0
+    total = 0
+    with torch.no_grad():
+        for data in testset:
+            X, y = data
+            output = net(X.view(-1, 784))
+            # print(output)
+            for idx, i in enumerate(output):
+                # print(torch.argmax(i), y[idx])
+                if torch.argmax(i) == y[idx]:
+                    correct += 1
+                total += 1
+    print("Accuracy: ", round(correct / total, 3))
diff --git a/nets/net_97.7.pt b/nets/net_97.7.pt
new file mode 100644
index 0000000..f63caa2
Binary files /dev/null and b/nets/net_97.7.pt differ