pytorch-ai/TicTacToe_AI/Net/wandb/dryrun-20200128_134219-mz8btidj/output.log

running on cpu
Loading file...
986410
Generating testset...

  0%|                                                           | 0/10000 [00:00<?, ?it/s]
  4%|█▋                                             | 361/10000 [00:00<00:02, 3604.33it/s]
  7%|███▍                                           | 729/10000 [00:00<00:02, 3624.99it/s]
 11%|█████                                         | 1101/10000 [00:00<00:02, 3650.41it/s]
 15%|██████▊                                       | 1469/10000 [00:00<00:02, 3659.16it/s]
 18%|████████▍                                     | 1841/10000 [00:00<00:02, 3674.99it/s]
 22%|██████████▏                                   | 2210/10000 [00:00<00:02, 3677.73it/s]
 26%|███████████▊                                  | 2577/10000 [00:00<00:02, 3672.67it/s]
 29%|█████████████▌                                | 2947/10000 [00:00<00:01, 3679.11it/s]
 33%|███████████████▎                              | 3319/10000 [00:00<00:01, 3689.61it/s]
 37%|████████████████▉                             | 3689/10000 [00:01<00:01, 3690.11it/s]
 41%|██████████████████▋                           | 4056/10000 [00:01<00:01, 3682.18it/s]
 44%|████████████████████▎                         | 4418/10000 [00:01<00:01, 3659.66it/s]
 48%|██████████████████████                        | 4783/10000 [00:01<00:01, 3654.69it/s]
 51%|███████████████████████▋                      | 5147/10000 [00:01<00:01, 3649.97it/s]
 55%|█████████████████████████▎                    | 5510/10000 [00:01<00:02, 2050.04it/s]
 59%|███████████████████████████                   | 5881/10000 [00:01<00:01, 2367.55it/s]
 62%|████████████████████████████▋                 | 6249/10000 [00:01<00:01, 2650.87it/s]
 66%|██████████████████████████████▍               | 6615/10000 [00:02<00:01, 2889.26it/s]
 70%|████████████████████████████████▏             | 6984/10000 [00:02<00:00, 3090.42it/s]
 74%|█████████████████████████████████▊            | 7354/10000 [00:02<00:00, 3249.29it/s]
 77%|███████████████████████████████████▌          | 7724/10000 [00:02<00:00, 3370.40it/s]
 81%|█████████████████████████████████████▏        | 8094/10000 [00:02<00:00, 3462.78it/s]
 85%|██████████████████████████████████████▉       | 8463/10000 [00:02<00:00, 3526.45it/s]
 88%|████████████████████████████████████████▋     | 8834/10000 [00:02<00:00, 3579.25it/s]
 92%|██████████████████████████████████████████▎   | 9202/10000 [00:02<00:00, 3608.71it/s]
 96%|████████████████████████████████████████████  | 9569/10000 [00:02<00:00, 3617.34it/s]
 99%|█████████████████████████████████████████████▋| 9935/10000 [00:02<00:00, 3628.72it/s]
100%|█████████████████████████████████████████████| 10000/10000 [00:02<00:00, 3354.47it/s]
Generating trainset...

  0%|                                                            | 0/9999 [00:00<?, ?it/s]
  4%|█▋                                              | 353/9999 [00:00<00:02, 3526.10it/s]
  7%|███▍                                            | 721/9999 [00:00<00:02, 3568.92it/s]
 11%|█████                                          | 1090/9999 [00:00<00:02, 3604.37it/s]
 15%|██████▊                                        | 1457/9999 [00:00<00:02, 3622.26it/s]
 18%|████████▌                                      | 1825/9999 [00:00<00:02, 3637.99it/s]
 22%|██████████▎                                    | 2194/9999 [00:00<00:02, 3652.40it/s]
 26%|████████████                                   | 2565/9999 [00:00<00:02, 3667.72it/s]
 29%|█████████████▊                                 | 2939/9999 [00:00<00:01, 3687.59it/s]
 33%|███████████████▌                               | 3306/9999 [00:00<00:01, 3681.77it/s]
 37%|█████████████████▎                             | 3677/9999 [00:01<00:01, 3688.69it/s]
 40%|██████████████████▉                            | 4036/9999 [00:01<00:01, 3613.27it/s]
 44%|████████████████████▋                          | 4407/9999 [00:01<00:01, 3639.94it/s]
 48%|██████████████████████▍                        | 4776/9999 [00:01<00:01, 3653.43it/s]
 51%|████████████████████████▏                      | 5139/9999 [00:01<00:01, 3565.88it/s]
 55%|█████████████████████████▊                     | 5496/9999 [00:01<00:01, 3565.19it/s]
 59%|███████████████████████████▌                   | 5856/9999 [00:01<00:01, 3575.41it/s]
 62%|█████████████████████████████▏                 | 6214/9999 [00:01<00:01, 3574.95it/s]
 66%|██████████████████████████████▉                | 6576/9999 [00:01<00:00, 3587.09it/s]
 69%|████████████████████████████████▌              | 6937/9999 [00:01<00:00, 3591.44it/s]
 73%|██████████████████████████████████▎            | 7298/9999 [00:02<00:00, 3595.32it/s]
 77%|████████████████████████████████████           | 7662/9999 [00:02<00:00, 3605.98it/s]
 80%|█████████████████████████████████████▋         | 8023/9999 [00:02<00:00, 3605.04it/s]
 84%|███████████████████████████████████████▍       | 8384/9999 [00:02<00:00, 3604.00it/s]
 87%|█████████████████████████████████████████      | 8746/9999 [00:02<00:00, 3608.62it/s]
 91%|██████████████████████████████████████████▊    | 9107/9999 [00:02<00:00, 3571.15it/s]
 95%|████████████████████████████████████████████▌  | 9476/9999 [00:02<00:00, 3604.83it/s]
 98%|██████████████████████████████████████████████▎| 9843/9999 [00:02<00:00, 3624.02it/s]
100%|███████████████████████████████████████████████| 9999/9999 [00:02<00:00, 3621.02it/s]
Epoch: 0

  0%|                                                            | 0/3333 [00:00<?, ?it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([3, 9, 9])

  0%|                                                    | 1/3333 [00:00<20:35,  2.70it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 2])

  0%|                                                    | 2/3333 [00:00<16:21,  3.40it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 2])

  0%|                                                    | 3/3333 [00:00<13:06,  4.23it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([6, 6, 7])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([9, 2, 6])

  0%|                                                    | 5/3333 [00:00<10:50,  5.12it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([5, 9, 9])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([5, 9, 9])

  0%|                                                    | 7/3333 [00:00<09:14,  6.00it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([1, 1, 9])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 7])

  0%|▏                                                   | 9/3333 [00:01<08:07,  6.82it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([7, 9, 8])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([1, 1, 6])

  0%|▏                                                  | 11/3333 [00:01<07:20,  7.54it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([8, 5, 3])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([7, 2, 2])

  0%|▏                                                  | 13/3333 [00:01<06:48,  8.13it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([6, 0, 9])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([3, 8, 9])

  0%|▏                                                  | 15/3333 [00:01<06:25,  8.61it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([3, 2, 0])

  0%|▏                                                  | 16/3333 [00:02<11:06,  4.97it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([7, 5, 5])

  1%|▎                                                  | 17/3333 [00:02<11:05,  4.98it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([5, 5, 9])

  1%|▎                                                  | 18/3333 [00:02<09:36,  5.75it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([5, 7, 5])

  1%|▎                                                  | 19/3333 [00:02<08:44,  6.32it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([1, 2, 5])

  1%|▎                                                  | 20/3333 [00:02<07:57,  6.94it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([1, 6, 9])

  1%|▎                                                  | 21/3333 [00:02<07:35,  7.27it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([8, 4, 9])

  1%|▎                                                  | 22/3333 [00:02<07:20,  7.52it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([9, 1, 2])

  1%|▎                                                  | 23/3333 [00:03<07:09,  7.71it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 2])

  1%|▎                                                  | 24/3333 [00:03<06:50,  8.06it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([8, 9, 8])

  1%|▍                                                  | 25/3333 [00:03<06:48,  8.10it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([9, 2, 7])

  1%|▍                                                  | 26/3333 [00:03<06:36,  8.35it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 9])

  1%|▍                                                  | 27/3333 [00:03<06:27,  8.53it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([5, 1, 9])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([3, 9, 9])

  1%|▍                                                  | 29/3333 [00:03<06:10,  8.93it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([3, 6, 9])

  1%|▍                                                  | 30/3333 [00:03<06:09,  8.95it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([5, 9, 9])

  1%|▍                                                  | 31/3333 [00:03<06:08,  8.97it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([0, 9, 8])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([7, 1, 9])

  1%|▌                                                  | 33/3333 [00:04<05:57,  9.24it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([4, 9, 9])

  1%|▌                                                  | 34/3333 [00:04<06:20,  8.66it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 8])

  1%|▌                                                  | 35/3333 [00:04<07:00,  7.84it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([9, 6, 9])

  1%|▌                                                  | 36/3333 [00:04<06:33,  8.38it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([9, 5, 9])

  1%|▌                                                  | 37/3333 [00:04<07:00,  7.84it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([5, 5, 0])

  1%|▌                                                  | 38/3333 [00:04<06:33,  8.38it/s]tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
       grad_fn=<LogSoftmaxBackward>)
tensor([0, 3, 3])