Implemented batching for TicTacToe AI

This commit is contained in:
Clemens Dautermann 2020-01-28 14:45:00 +01:00
parent 55cff9b18f
commit 56ee2635b5
96 changed files with 8426 additions and 7 deletions

View file

@ -0,0 +1,9 @@
wandb_version: 1
_wandb:
desc: null
value:
cli_version: 0.8.22
framework: torch
is_jupyter_run: false
python_version: 3.7.5

View file

@ -0,0 +1,645 @@
running on cpu
Loading file...
986410
Generating testset...
0%| | 0/10000 [00:00<?, ?it/s] 4%|█▉ | 402/10000 [00:00<00:02, 4017.93it/s] 8%|███▊ | 819/10000 [00:00<00:02, 4061.37it/s] 12%|█████▋ | 1237/10000 [00:00<00:02, 4094.77it/s] 17%|███████▌ | 1657/10000 [00:00<00:02, 4123.99it/s] 21%|█████████▌ | 2073/10000 [00:00<00:01, 4132.77it/s] 25%|███████████▍ | 2489/10000 [00:00<00:01, 4138.15it/s] 29%|█████████████▎ | 2905/10000 [00:00<00:01, 4143.16it/s] 33%|███████████████▎ | 3318/10000 [00:00<00:01, 4138.55it/s] 37%|█████████████████▏ | 3735/10000 [00:00<00:01, 4145.31it/s] 41%|███████████████████ | 4134/10000 [00:01<00:01, 4036.11it/s] 45%|████████████████████▊ | 4538/10000 [00:01<00:01, 4036.50it/s] 50%|██████████████████████▊ | 4952/10000 [00:01<00:01, 4066.81it/s] 54%|████████████████████████▋ | 5366/10000 [00:01<00:01, 4086.13it/s] 58%|██████████████████████████▌ | 5772/10000 [00:01<00:01, 2307.42it/s] 62%|████████████████████████████▍ | 6193/10000 [00:01<00:01, 2668.58it/s] 66%|██████████████████████████████▍ | 6613/10000 [00:01<00:01, 2995.75it/s] 70%|████████████████████████████████▎ | 7033/10000 [00:01<00:00, 3275.75it/s] 75%|██████████████████████████████████▎ | 7454/10000 [00:02<00:00, 3507.77it/s] 79%|████████████████████████████████████▏ | 7871/10000 [00:02<00:00, 3683.21it/s] 83%|██████████████████████████████████████ | 8288/10000 [00:02<00:00, 3816.74it/s] 87%|████████████████████████████████████████ | 8707/10000 [00:02<00:00, 3919.32it/s] 91%|█████████████████████████████████████████▉ | 9127/10000 [00:02<00:00, 3997.37it/s] 95%|███████████████████████████████████████████▉ | 9547/10000 [00:02<00:00, 4054.87it/s] 100%|█████████████████████████████████████████████▊| 9966/10000 [00:02<00:00, 4093.51it/s] 100%|█████████████████████████████████████████████| 10000/10000 [00:02<00:00, 3742.96it/s]
Generating trainset...
0%| | 0/9999 [00:00<?, ?it/s] 4%|█▉ | 400/9999 [00:00<00:02, 3999.16it/s] 8%|███▉ | 819/9999 [00:00<00:02, 4052.20it/s] 12%|█████▊ | 1234/9999 [00:00<00:02, 4080.38it/s] 17%|███████▊ | 1654/9999 [00:00<00:02, 4114.16it/s] 21%|█████████▋ | 2074/9999 [00:00<00:01, 4139.48it/s] 25%|███████████▋ | 2494/9999 [00:00<00:01, 4156.09it/s] 29%|█████████████▋ | 2912/9999 [00:00<00:01, 4161.87it/s] 33%|███████████████▋ | 3327/9999 [00:00<00:01, 4157.75it/s] 37%|█████████████████▌ | 3740/9999 [00:00<00:01, 4146.98it/s] 41%|███████████████████▍ | 4139/9999 [00:01<00:01, 4051.77it/s] 46%|█████████████████████▍ | 4556/9999 [00:01<00:01, 4084.80it/s] 50%|███████████████████████▍ | 4973/9999 [00:01<00:01, 4109.31it/s] 54%|█████████████████████████▎ | 5389/9999 [00:01<00:01, 4124.27it/s] 58%|███████████████████████████▎ | 5808/9999 [00:01<00:01, 4142.49it/s] 62%|█████████████████████████████▏ | 6221/9999 [00:01<00:00, 4138.40it/s] 66%|███████████████████████████████▏ | 6640/9999 [00:01<00:00, 4152.86it/s] 71%|█████████████████████████████████▏ | 7059/9999 [00:01<00:00, 4163.62it/s] 75%|███████████████████████████████████▏ | 7480/9999 [00:01<00:00, 4174.81it/s] 79%|█████████████████████████████████████▏ | 7900/9999 [00:01<00:00, 4181.11it/s] 83%|███████████████████████████████████████ | 8320/9999 [00:02<00:00, 4185.33it/s] 87%|█████████████████████████████████████████ | 8740/9999 [00:02<00:00, 4186.97it/s] 92%|███████████████████████████████████████████ | 9161/9999 [00:02<00:00, 4192.61it/s] 96%|█████████████████████████████████████████████ | 9582/9999 [00:02<00:00, 4194.70it/s] 100%|███████████████████████████████████████████████| 9999/9999 [00:02<00:00, 4157.34it/s]
Epoch: 0
0%| | 0/3333 [00:00<?, ?it/s]tensor([[-2.2606, -2.3016, -2.3324, -2.1078, -2.4331, -2.3292, -2.4555, -2.1577,
-2.3959, -2.3086],
[-2.2527, -2.3219, -2.3240, -2.1096, -2.4330, -2.3107, -2.4580, -2.1627,
-2.3925, -2.3163],
[-2.3171, -2.3093, -2.3601, -2.0109, -2.4205, -2.4251, -2.4450, -2.1626,
-2.3624, -2.2971]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 9])
0%| | 1/3333 [00:00<17:50, 3.11it/s]tensor([[-2.2756, -2.3164, -2.3221, -2.0758, -2.4144, -2.3594, -2.4396, -2.1908,
-2.3865, -2.3002],
[-2.2879, -2.3256, -2.3406, -2.0495, -2.4257, -2.3662, -2.4585, -2.1805,
-2.3813, -2.2777],
[-2.2534, -2.3107, -2.3269, -2.1052, -2.4295, -2.3340, -2.4431, -2.1795,
-2.3939, -2.3015]], grad_fn=<LogSoftmaxBackward>)
tensor([4, 9, 9])
0%| | 2/3333 [00:00<14:31, 3.82it/s]tensor([[-2.2714, -2.3200, -2.3438, -2.0782, -2.4236, -2.3574, -2.4690, -2.1722,
-2.3955, -2.2591],
[-2.3185, -2.3122, -2.3693, -2.0265, -2.4150, -2.4169, -2.4660, -2.1623,
-2.3694, -2.2528],
[-2.2624, -2.3141, -2.3460, -2.0996, -2.4266, -2.3471, -2.4651, -2.1675,
-2.3957, -2.2612]], grad_fn=<LogSoftmaxBackward>)
tensor([2, 9, 5])
0%| | 3/3333 [00:00<12:11, 4.55it/s]tensor([[-2.2861, -2.3200, -2.3502, -2.0903, -2.4236, -2.3607, -2.4754, -2.1649,
-2.3871, -2.2320],
[-2.3026, -2.3210, -2.3485, -2.0647, -2.4148, -2.3664, -2.4793, -2.1702,
-2.3897, -2.2383],
[-2.2406, -2.3362, -2.3302, -2.1319, -2.4198, -2.2811, -2.4863, -2.1779,
-2.4037, -2.2727]], grad_fn=<LogSoftmaxBackward>)
tensor([6, 3, 9])
0%| | 4/3333 [00:00<10:33, 5.25it/s]tensor([[-2.2478, -2.3374, -2.3269, -2.1172, -2.4215, -2.3116, -2.4681, -2.1904,
-2.4018, -2.2560],
[-2.2880, -2.3389, -2.3378, -2.0648, -2.4154, -2.3548, -2.4689, -2.1920,
-2.3918, -2.2385],
[-2.2581, -2.3288, -2.3277, -2.1122, -2.4221, -2.3202, -2.4582, -2.1883,
-2.3994, -2.2625]], grad_fn=<LogSoftmaxBackward>)
tensor([7, 4, 9])
0%| | 5/3333 [00:00<09:25, 5.88it/s]tensor([[-2.3107, -2.3244, -2.3572, -2.0726, -2.4097, -2.3645, -2.4784, -2.1731,
-2.3932, -2.2114],
[-2.3267, -2.3186, -2.3651, -2.0596, -2.4089, -2.3640, -2.4796, -2.1745,
-2.3893, -2.2124],
[-2.2623, -2.3268, -2.3336, -2.1178, -2.4183, -2.3215, -2.4645, -2.1790,
-2.4061, -2.2494]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 9])
0%| | 6/3333 [00:00<08:37, 6.43it/s]tensor([[-2.3081, -2.3159, -2.3551, -2.0756, -2.4174, -2.3747, -2.4694, -2.1609,
-2.4064, -2.2136],
[-2.2806, -2.3176, -2.3370, -2.1004, -2.4165, -2.3380, -2.4641, -2.1725,
-2.4148, -2.2431],
[-2.3250, -2.3180, -2.3603, -2.0373, -2.4065, -2.4279, -2.4522, -2.1708,
-2.3891, -2.2189]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 2, 8])
0%| | 7/3333 [00:01<08:04, 6.87it/s]tensor([[-2.2817, -2.3275, -2.3342, -2.1107, -2.4127, -2.3317, -2.4715, -2.1759,
-2.4121, -2.2256],
[-2.3241, -2.3238, -2.3538, -2.0402, -2.4024, -2.4277, -2.4504, -2.1772,
-2.3881, -2.2160],
[-2.3170, -2.3439, -2.3584, -2.0741, -2.4081, -2.3626, -2.4867, -2.1711,
-2.3946, -2.1838]], grad_fn=<LogSoftmaxBackward>)
tensor([2, 5, 5])
0%| | 8/3333 [00:01<07:29, 7.39it/s]tensor([[-2.2864, -2.3414, -2.3356, -2.1119, -2.4130, -2.3283, -2.4789, -2.1768,
-2.4127, -2.2019],
[-2.3035, -2.3849, -2.3451, -2.0847, -2.4183, -2.3249, -2.5108, -2.1680,
-2.4071, -2.1607],
[-2.3567, -2.3474, -2.3607, -2.0231, -2.3987, -2.4192, -2.4883, -2.1724,
-2.3933, -2.1634]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 3])
tensor([[-2.3214, -2.3341, -2.3440, -2.0693, -2.4097, -2.3730, -2.4662, -2.1748,
-2.4060, -2.1984],
[-2.2913, -2.3438, -2.3343, -2.1234, -2.4194, -2.3207, -2.4808, -2.1723,
-2.4118, -2.1895],
[-2.3261, -2.3620, -2.3193, -2.0527, -2.3749, -2.3780, -2.4794, -2.2136,
-2.3985, -2.1923]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 9])
0%|▏ | 10/3333 [00:01<06:54, 8.02it/s]tensor([[-2.3560, -2.3741, -2.3582, -2.0290, -2.4012, -2.3669, -2.5343, -2.1787,
-2.3985, -2.1345],
[-2.3232, -2.3676, -2.3245, -2.0907, -2.3900, -2.3326, -2.4869, -2.2045,
-2.4054, -2.1670],
[-2.3370, -2.3664, -2.3384, -2.0707, -2.4053, -2.3475, -2.4946, -2.1856,
-2.4100, -2.1508]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 4])
tensor([[-2.2873, -2.3531, -2.3239, -2.1248, -2.4094, -2.3020, -2.4832, -2.1868,
-2.4149, -2.1986],
[-2.3729, -2.3631, -2.3615, -2.0280, -2.3982, -2.4023, -2.5033, -2.1804,
-2.3984, -2.1224],
[-2.3551, -2.3759, -2.3400, -2.0557, -2.3883, -2.3579, -2.5027, -2.1997,
-2.4041, -2.1335]], grad_fn=<LogSoftmaxBackward>)
tensor([1, 4, 3])
0%|▏ | 12/3333 [00:01<06:35, 8.41it/s]tensor([[-2.3229, -2.3882, -2.3401, -2.0903, -2.4072, -2.3122, -2.5321, -2.1808,
-2.4281, -2.1161],
[-2.3191, -2.3677, -2.3229, -2.1030, -2.3919, -2.3170, -2.4828, -2.2059,
-2.4081, -2.1700],
[-2.3181, -2.3723, -2.3336, -2.0893, -2.3976, -2.3284, -2.5013, -2.1994,
-2.4202, -2.1421]], grad_fn=<LogSoftmaxBackward>)
tensor([8, 4, 2])
0%|▏ | 13/3333 [00:01<06:16, 8.83it/s]tensor([[-2.3643, -2.3715, -2.3515, -2.0656, -2.3907, -2.3614, -2.5088, -2.1961,
-2.4012, -2.1070],
[-2.3424, -2.3575, -2.3301, -2.0730, -2.3820, -2.3563, -2.4875, -2.2074,
-2.4067, -2.1559],
[-2.3225, -2.3725, -2.3349, -2.1063, -2.4044, -2.3132, -2.5183, -2.1845,
-2.4226, -2.1275]], grad_fn=<LogSoftmaxBackward>)
tensor([1, 0, 3])
tensor([[-2.3413, -2.4002, -2.3512, -2.0830, -2.3920, -2.3197, -2.5432, -2.2075,
-2.4043, -2.0825],
[-2.3238, -2.3654, -2.3269, -2.1013, -2.3910, -2.3167, -2.5043, -2.2039,
-2.4172, -2.1464],
[-2.2936, -2.3716, -2.3201, -2.1313, -2.4004, -2.2874, -2.4984, -2.1999,
-2.4144, -2.1702]], grad_fn=<LogSoftmaxBackward>)
tensor([5, 9, 2])
0%|▏ | 15/3333 [00:01<06:02, 9.15it/s]tensor([[-2.3649, -2.3669, -2.3403, -2.0573, -2.3789, -2.3549, -2.5068, -2.2148,
-2.4065, -2.1220],
[-2.3131, -2.3718, -2.3193, -2.1177, -2.3995, -2.2970, -2.5006, -2.2019,
-2.4146, -2.1565],
[-2.3689, -2.3832, -2.3237, -2.0500, -2.3535, -2.3604, -2.5303, -2.2353,
-2.4074, -2.1083]], grad_fn=<LogSoftmaxBackward>)
tensor([6, 0, 9])
0%|▏ | 16/3333 [00:01<05:53, 9.39it/s]tensor([[-2.3848, -2.3738, -2.3524, -2.0436, -2.3752, -2.3689, -2.5305, -2.2147,
-2.4059, -2.0834],
[-2.3562, -2.3779, -2.3441, -2.0836, -2.3828, -2.3315, -2.5272, -2.2160,
-2.4068, -2.0908],
[-2.3750, -2.3813, -2.3533, -2.0481, -2.3854, -2.3705, -2.5488, -2.2051,
-2.4106, -2.0646]], grad_fn=<LogSoftmaxBackward>)
tensor([1, 3, 3])
tensor([[-2.3394, -2.3727, -2.3114, -2.0750, -2.3653, -2.3333, -2.5086, -2.2442,
-2.4177, -2.1346],
[-2.3788, -2.3742, -2.3518, -2.0418, -2.3774, -2.3731, -2.5431, -2.2177,
-2.4114, -2.0706],
[-2.3775, -2.3678, -2.3574, -2.0595, -2.3785, -2.3520, -2.5314, -2.2192,
-2.4014, -2.0824]], grad_fn=<LogSoftmaxBackward>)
tensor([0, 9, 3])
1%|▎ | 18/3333 [00:02<05:47, 9.55it/s]tensor([[-2.3366, -2.3906, -2.3306, -2.0914, -2.3870, -2.3065, -2.5437, -2.2276,
-2.4294, -2.0791],
[-2.3471, -2.3639, -2.3331, -2.0711, -2.3817, -2.3334, -2.5249, -2.2218,
-2.4212, -2.1154],
[-2.3776, -2.3806, -2.3331, -2.0165, -2.3443, -2.4071, -2.5192, -2.2579,
-2.3914, -2.1035]], grad_fn=<LogSoftmaxBackward>)
tensor([6, 9, 0])
1%|▎ | 19/3333 [00:02<05:53, 9.37it/s]tensor([[-2.3218, -2.3660, -2.3282, -2.1020, -2.3896, -2.3077, -2.5298, -2.2237,
-2.4309, -2.1090],
[-2.3337, -2.4280, -2.3441, -2.0773, -2.3949, -2.2880, -2.5762, -2.2384,
-2.4255, -2.0413],
[-2.3494, -2.3619, -2.3375, -2.0671, -2.3861, -2.3456, -2.5414, -2.2209,
-2.4291, -2.0871]], grad_fn=<LogSoftmaxBackward>)
tensor([0, 9, 9])
tensor([[-2.3692, -2.3648, -2.3461, -2.0246, -2.3710, -2.3894, -2.5360, -2.2400,
-2.4223, -2.0774],
[-2.3524, -2.3868, -2.3187, -2.0489, -2.3477, -2.3573, -2.5255, -2.2748,
-2.4129, -2.0955],
[-2.3995, -2.3848, -2.3778, -2.0116, -2.3871, -2.3974, -2.5629, -2.2311,
-2.4127, -2.0123]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 6, 9])
1%|▎ | 21/3333 [00:02<05:46, 9.55it/s]tensor([[-2.3640, -2.3936, -2.3584, -2.0486, -2.3946, -2.3617, -2.5563, -2.2384,
-2.4259, -2.0168],
[-2.3231, -2.3716, -2.3352, -2.0939, -2.3952, -2.3205, -2.5348, -2.2323,
-2.4352, -2.0781],
[-2.3513, -2.4259, -2.3660, -2.0467, -2.3882, -2.3166, -2.5870, -2.2476,
-2.4190, -2.0172]], grad_fn=<LogSoftmaxBackward>)
tensor([0, 0, 8])
tensor([[-2.3869, -2.3927, -2.3850, -2.0029, -2.3957, -2.4121, -2.5707, -2.2404,
-2.4190, -1.9876],
[-2.3162, -2.3832, -2.3211, -2.0673, -2.3795, -2.3325, -2.5156, -2.2643,
-2.4304, -2.1035],
[-2.3074, -2.3662, -2.3317, -2.0963, -2.3955, -2.3188, -2.5247, -2.2369,
-2.4339, -2.0995]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 5])
1%|▎ | 23/3333 [00:02<05:41, 9.68it/s]tensor([[-2.3135, -2.3787, -2.3322, -2.0889, -2.3872, -2.3268, -2.5215, -2.2581,
-2.4293, -2.0792],
[-2.3497, -2.3871, -2.3547, -2.0359, -2.3827, -2.3729, -2.5389, -2.2635,
-2.4254, -2.0377],
[-2.3680, -2.3759, -2.3737, -2.0220, -2.3827, -2.3948, -2.5359, -2.2566,
-2.4155, -2.0313]], grad_fn=<LogSoftmaxBackward>)
tensor([0, 6, 7])
1%|▎ | 24/3333 [00:02<05:38, 9.77it/s]tensor([[-2.3508, -2.4103, -2.3546, -2.0191, -2.3758, -2.3772, -2.5503, -2.2845,
-2.4254, -2.0164],
[-2.2979, -2.4187, -2.3499, -2.0935, -2.4057, -2.3145, -2.5536, -2.2610,
-2.4383, -2.0138],
[-2.3423, -2.4031, -2.3423, -2.0337, -2.3575, -2.3819, -2.5286, -2.2958,
-2.4104, -2.0462]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 1, 9])
1%|▍ | 25/3333 [00:02<05:36, 9.84it/s]tensor([[-2.3313, -2.3699, -2.3699, -2.0525, -2.3986, -2.3809, -2.5286, -2.2496,
-2.4244, -2.0373],
[-2.3241, -2.4048, -2.3655, -2.0695, -2.4170, -2.3568, -2.5489, -2.2503,
-2.4312, -1.9929],
[-2.3310, -2.4001, -2.3705, -2.0666, -2.4004, -2.3513, -2.5412, -2.2659,
-2.4182, -2.0060]], grad_fn=<LogSoftmaxBackward>)
tensor([6, 4, 3])
tensor([[-2.3339, -2.3793, -2.3625, -2.0312, -2.3812, -2.4055, -2.5053, -2.2739,
-2.4142, -2.0523],
[-2.2823, -2.4205, -2.3584, -2.0941, -2.4100, -2.3169, -2.5584, -2.2697,
-2.4449, -1.9994],
[-2.2781, -2.3708, -2.3387, -2.1102, -2.4048, -2.3206, -2.5083, -2.2551,
-2.4396, -2.0836]], grad_fn=<LogSoftmaxBackward>)
tensor([3, 9, 0])
1%|▍ | 27/3333 [00:03<05:39, 9.73it/s]tensor([[-2.3139, -2.4264, -2.3952, -2.0653, -2.4072, -2.3370, -2.5566, -2.2748,
-2.4087, -1.9818],
[-2.2635, -2.4030, -2.3448, -2.1148, -2.4121, -2.3047, -2.5232, -2.2591,
-2.4359, -2.0600],
[-2.3185, -2.4049, -2.3740, -2.0536, -2.3979, -2.3629, -2.5329, -2.2772,
-2.4241, -2.0076]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 3, 2])
1%|▍ | 28/3333 [00:03<05:58, 9.21it/s]tensor([[-2.2879, -2.3890, -2.3416, -2.0846, -2.3928, -2.3432, -2.5075, -2.2819,
-2.4363, -2.0570],
[-2.2857, -2.3832, -2.3505, -2.0962, -2.4034, -2.3384, -2.5163, -2.2669,
-2.4372, -2.0471],
[-2.2979, -2.3874, -2.3391, -2.0623, -2.3832, -2.3669, -2.4995, -2.2890,
-2.4350, -2.0638]], grad_fn=<LogSoftmaxBackward>)
tensor([5, 1, 3])
1%|▍ | 29/3333 [00:03<06:12, 8.88it/s]tensor([[-2.3017, -2.4018, -2.3528, -2.0636, -2.3897, -2.3568, -2.5303, -2.2920,
-2.4412, -2.0165],
[-2.3201, -2.4089, -2.3590, -2.0330, -2.3650, -2.4046, -2.5084, -2.3165,
-2.4115, -2.0218],
[-2.2972, -2.4291, -2.3774, -2.0674, -2.4205, -2.3478, -2.5466, -2.2720,
-2.4288, -1.9805]], grad_fn=<LogSoftmaxBackward>)
tensor([2, 5, 7])
1%|▍ | 30/3333 [00:03<06:21, 8.66it/s]tensor([[-2.3346, -2.4107, -2.4052, -2.0234, -2.4195, -2.4090, -2.5537, -2.2785,
-2.4352, -1.9361],
[-2.2963, -2.3953, -2.3665, -2.0670, -2.4135, -2.3531, -2.5269, -2.2718,
-2.4463, -2.0126],
[-2.3295, -2.3910, -2.3792, -2.0173, -2.3893, -2.4168, -2.5127, -2.2906,
-2.4263, -2.0103]], grad_fn=<LogSoftmaxBackward>)
tensor([4, 7, 9])
1%|▍ | 31/3333 [00:03<06:27, 8.51it/s]tensor([[-2.3296, -2.4346, -2.3938, -2.0315, -2.4148, -2.3866, -2.5514, -2.2957,
-2.4434, -1.9257],
[-2.3095, -2.3789, -2.3784, -2.0580, -2.4105, -2.3804, -2.5164, -2.2697,
-2.4402, -2.0090],
[-2.2732, -2.4413, -2.3781, -2.0893, -2.4294, -2.3287, -2.5659, -2.2794,
-2.4566, -1.9447]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 8, 3])
1%|▍ | 32/3333 [00:03<06:32, 8.41it/s]tensor([[-2.3149, -2.4493, -2.4028, -2.0365, -2.4206, -2.3716, -2.5821, -2.2923,
-2.4474, -1.9068],
[-2.3046, -2.4133, -2.3605, -2.0619, -2.3862, -2.3583, -2.5299, -2.3073,
-2.4401, -1.9937],
[-2.2838, -2.4111, -2.3769, -2.0874, -2.4297, -2.3517, -2.5468, -2.2698,
-2.4509, -1.9633]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 9])
1%|▌ | 33/3333 [00:03<06:35, 8.34it/s]tensor([[-2.3187, -2.3893, -2.3897, -2.0319, -2.4086, -2.4010, -2.5262, -2.2756,
-2.4428, -1.9879],
[-2.3647, -2.4236, -2.4449, -1.9853, -2.4164, -2.4604, -2.5669, -2.2978,
-2.4125, -1.8848],
[-2.2934, -2.4445, -2.3868, -2.0596, -2.4206, -2.3522, -2.5557, -2.2940,
-2.4501, -1.9375]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 2, 6])
1%|▌ | 34/3333 [00:03<06:37, 8.30it/s]tensor([[-2.3490, -2.4703, -2.4302, -1.9863, -2.4241, -2.4139, -2.6156, -2.3080,
-2.4360, -1.8538],
[-2.2673, -2.4744, -2.3827, -2.0785, -2.4358, -2.3061, -2.5752, -2.2983,
-2.4545, -1.9301],
[-2.2742, -2.4024, -2.3638, -2.0991, -2.4237, -2.3339, -2.5231, -2.2723,
-2.4508, -2.0026]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 1, 3])
1%|▌ | 35/3333 [00:04<06:28, 8.50it/s]tensor([[-2.3067, -2.3893, -2.3721, -2.0541, -2.4112, -2.3771, -2.5094, -2.2842,
-2.4509, -2.0002],
[-2.3246, -2.4008, -2.3864, -2.0172, -2.4022, -2.4242, -2.5345, -2.2938,
-2.4467, -1.9610],
[-2.3098, -2.4089, -2.3908, -2.0480, -2.4268, -2.3905, -2.5527, -2.2783,
-2.4534, -1.9378]], grad_fn=<LogSoftmaxBackward>)
tensor([4, 9, 3])
tensor([[-2.3407, -2.4508, -2.3991, -2.0006, -2.4063, -2.4069, -2.5594, -2.3272,
-2.4502, -1.8969],
[-2.3328, -2.4345, -2.3793, -2.0141, -2.3794, -2.4118, -2.5405, -2.3351,
-2.4406, -1.9361],
[-2.3076, -2.4432, -2.4002, -2.0549, -2.4401, -2.3767, -2.5664, -2.2883,
-2.4611, -1.8890]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 3])
1%|▌ | 37/3333 [00:04<06:10, 8.90it/s]tensor([[-2.3614, -2.4425, -2.4385, -2.0024, -2.4282, -2.4162, -2.5828, -2.3179,
-2.4404, -1.8462],
[-2.2751, -2.3994, -2.3599, -2.0840, -2.4184, -2.3368, -2.5218, -2.2805,
-2.4582, -2.0118],
[-2.3266, -2.4445, -2.4089, -2.0303, -2.4328, -2.4001, -2.5679, -2.3018,
-2.4617, -1.8718]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 0, 1])
1%|▌ | 38/3333 [00:04<05:58, 9.20it/s]tensor([[-2.2898, -2.4147, -2.3862, -2.0711, -2.4340, -2.3604, -2.5525, -2.2860,
-2.4683, -1.9315],
[-2.3077, -2.3964, -2.3851, -2.0431, -2.4228, -2.3875, -2.5454, -2.2827,
-2.4655, -1.9533],
[-2.3304, -2.4316, -2.3908, -2.0184, -2.3995, -2.4133, -2.5579, -2.3267,
-2.4560, -1.9019]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 4, 4])
tensor([[-2.3798, -2.4426, -2.4536, -1.9705, -2.4204, -2.4696, -2.5834, -2.3325,
-2.4400, -1.8208],
[-2.4080, -2.4654, -2.4854, -1.9233, -2.4217, -2.5535, -2.6041, -2.3390,
-2.4338, -1.7665],
[-2.3451, -2.4138, -2.4015, -1.9943, -2.3869, -2.4550, -2.5343, -2.3270,
-2.4426, -1.9215]], grad_fn=<LogSoftmaxBackward>)
tensor([6, 9, 1])
1%|▌ | 40/3333 [00:04<05:49, 9.43it/s]tensor([[-2.3346, -2.4312, -2.3799, -2.0035, -2.3727, -2.4330, -2.5473, -2.3511,
-2.4586, -1.9127],
[-2.2813, -2.4675, -2.4033, -2.0664, -2.4450, -2.3502, -2.5803, -2.3127,
-2.4850, -1.8589],
[-2.3012, -2.4814, -2.4153, -2.0463, -2.4472, -2.3689, -2.5892, -2.3173,
-2.4767, -1.8330]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 0, 9])
1%|▋ | 41/3333 [00:04<05:43, 9.59it/s]tensor([[-2.3254, -2.4641, -2.4346, -2.0341, -2.4464, -2.4068, -2.5995, -2.3183,
-2.4883, -1.7943],
[-2.3477, -2.4431, -2.4040, -1.9984, -2.3798, -2.4409, -2.5597, -2.3611,
-2.4603, -1.8650],
[-2.2995, -2.4107, -2.3789, -2.0531, -2.4070, -2.3770, -2.5402, -2.3185,
-2.4785, -1.9317]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 9])
1%|▋ | 42/3333 [00:04<05:38, 9.71it/s]tensor([[-2.3239, -2.4547, -2.4348, -2.0264, -2.4372, -2.4148, -2.5947, -2.3317,
-2.4920, -1.7989],
[-2.3603, -2.4626, -2.4338, -1.9831, -2.4088, -2.4474, -2.5837, -2.3609,
-2.4769, -1.8032],
[-2.3990, -2.4736, -2.4844, -1.9458, -2.4272, -2.5287, -2.6186, -2.3561,
-2.4741, -1.7222]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 9])
tensor([[-2.3918, -2.4713, -2.4809, -1.9667, -2.4206, -2.5071, -2.6118, -2.3666,
-2.4759, -1.7214],
[-2.3318, -2.4543, -2.4428, -2.0268, -2.4310, -2.4116, -2.5888, -2.3306,
-2.4740, -1.8076],
[-2.3272, -2.4084, -2.4252, -2.0357, -2.4284, -2.4245, -2.5599, -2.3094,
-2.4812, -1.8550]], grad_fn=<LogSoftmaxBackward>)
tensor([0, 0, 0])
1%|▋ | 44/3333 [00:04<05:35, 9.79it/s]tensor([[-2.2970, -2.4606, -2.4179, -2.0474, -2.4392, -2.3894, -2.5738, -2.3220,
-2.4822, -1.8384],
[-2.3575, -2.4162, -2.4651, -1.9924, -2.4253, -2.4962, -2.5773, -2.3284,
-2.4717, -1.7957],
[-2.3281, -2.4065, -2.4344, -2.0251, -2.4244, -2.4483, -2.5633, -2.3174,
-2.4805, -1.8420]], grad_fn=<LogSoftmaxBackward>)
tensor([7, 1, 0])
tensor([[-2.3114, -2.4068, -2.4138, -2.0316, -2.4093, -2.4195, -2.5352, -2.3240,
-2.4754, -1.8966],
[-2.3023, -2.5068, -2.4694, -2.0455, -2.4597, -2.4097, -2.6337, -2.3571,
-2.5000, -1.7115],
[-2.3087, -2.4395, -2.4305, -2.0337, -2.4276, -2.4250, -2.5792, -2.3396,
-2.5012, -1.8100]], grad_fn=<LogSoftmaxBackward>)
tensor([1, 9, 7])
1%|▋ | 46/3333 [00:05<05:33, 9.86it/s]tensor([[-2.3410, -2.4354, -2.4856, -2.0133, -2.4479, -2.4983, -2.6035, -2.3312,
-2.4901, -1.7340],
[-2.2617, -2.4473, -2.4258, -2.0788, -2.4447, -2.3873, -2.5824, -2.3233,
-2.5020, -1.8219],
[-2.2896, -2.4200, -2.4209, -2.0665, -2.4225, -2.4179, -2.5725, -2.3327,
-2.4952, -1.8286]], grad_fn=<LogSoftmaxBackward>)
tensor([7, 5, 3])
1%|▋ | 47/3333 [00:05<05:44, 9.55it/s]tensor([[-2.2920, -2.4299, -2.4500, -2.0620, -2.4479, -2.4303, -2.5874, -2.3195,
-2.5057, -1.7852],
[-2.3059, -2.4386, -2.4168, -2.0344, -2.4006, -2.4390, -2.5768, -2.3672,
-2.5016, -1.8113],
[-2.3174, -2.4141, -2.4592, -2.0284, -2.4346, -2.4739, -2.5879, -2.3242,
-2.4973, -1.7853]], grad_fn=<LogSoftmaxBackward>)
tensor([1, 8, 1])
1%|▋ | 48/3333 [00:05<05:39, 9.68it/s]tensor([[-2.3217, -2.4487, -2.4809, -2.0146, -2.4458, -2.4818, -2.6382, -2.3392,
-2.5159, -1.7175],
[-2.3108, -2.4528, -2.4586, -2.0427, -2.4300, -2.4679, -2.6167, -2.3603,
-2.5087, -1.7263],
[-2.3443, -2.4744, -2.5279, -2.0176, -2.4586, -2.4859, -2.6572, -2.3687,
-2.4921, -1.6514]], grad_fn=<LogSoftmaxBackward>)
tensor([7, 3, 5])
tensor([[-2.3364, -2.4279, -2.4982, -2.0058, -2.4475, -2.5179, -2.6179, -2.3375,
-2.5085, -1.7134],
[-2.2649, -2.4602, -2.4742, -2.0795, -2.4687, -2.4180, -2.6215, -2.3346,
-2.5272, -1.7243],
[-2.3129, -2.4374, -2.4619, -2.0397, -2.4263, -2.4674, -2.6099, -2.3558,
-2.5064, -1.7419]], grad_fn=<LogSoftmaxBackward>)
tensor([3, 8, 1])
2%|▊ | 50/3333 [00:05<05:35, 9.77it/s]tensor([[-2.2549, -2.4156, -2.4481, -2.0850, -2.4507, -2.3995, -2.5904, -2.3162,
-2.5129, -1.8118],
[-2.2836, -2.4151, -2.4677, -2.0680, -2.4580, -2.4425, -2.6101, -2.3129,
-2.5137, -1.7633],
[-2.2631, -2.4276, -2.4585, -2.0763, -2.4512, -2.3978, -2.6021, -2.3266,
-2.5101, -1.7918]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 2])
2%|▊ | 51/3333 [00:05<05:33, 9.84it/s]tensor([[-2.3640, -2.4467, -2.5526, -1.9956, -2.4743, -2.5504, -2.6538, -2.3535,
-2.5135, -1.6251],
[-2.3429, -2.4072, -2.5186, -2.0011, -2.4543, -2.5468, -2.6211, -2.3286,
-2.5016, -1.7050],
[-2.3138, -2.4230, -2.5021, -2.0327, -2.4696, -2.4853, -2.6349, -2.3216,
-2.5194, -1.7077]], grad_fn=<LogSoftmaxBackward>)
tensor([1, 9, 1])
tensor([[-2.3304, -2.4357, -2.5053, -2.0240, -2.4491, -2.5146, -2.6526, -2.3635,
-2.5161, -1.6669],
[-2.3111, -2.4158, -2.4665, -2.0463, -2.4262, -2.4626, -2.6118, -2.3604,
-2.5052, -1.7462],
[-2.3348, -2.4346, -2.5180, -2.0182, -2.4637, -2.4957, -2.6339, -2.3481,
-2.5103, -1.6827]], grad_fn=<LogSoftmaxBackward>)
tensor([2, 5, 6])
2%|▊ | 53/3333 [00:05<05:31, 9.89it/s]tensor([[-2.3605, -2.4211, -2.5263, -1.9878, -2.4426, -2.5504, -2.6344, -2.3729,
-2.5070, -1.6693],
[-2.3225, -2.4013, -2.5061, -2.0390, -2.4710, -2.4916, -2.6252, -2.3278,
-2.5165, -1.7055],
[-2.3540, -2.4459, -2.5712, -2.0160, -2.5056, -2.5547, -2.6872, -2.3488,
-2.5307, -1.5787]], grad_fn=<LogSoftmaxBackward>)
tensor([0, 9, 9])
tensor([[-2.2792, -2.3949, -2.4899, -2.0906, -2.4870, -2.4422, -2.6184, -2.3110,
-2.5238, -1.7285],
[-2.3742, -2.4223, -2.5583, -1.9861, -2.4614, -2.5969, -2.6587, -2.3775,
-2.5080, -1.6121],
[-2.3286, -2.3824, -2.5165, -2.0243, -2.4749, -2.5137, -2.6286, -2.3205,
-2.5143, -1.7095]], grad_fn=<LogSoftmaxBackward>)
tensor([6, 7, 9])
2%|▊ | 55/3333 [00:06<05:35, 9.76it/s]tensor([[-2.2444, -2.4150, -2.4537, -2.0959, -2.4790, -2.3749, -2.6013, -2.3084,
-2.5116, -1.8072],
[-2.3573, -2.4411, -2.5813, -2.0151, -2.5211, -2.5704, -2.7044, -2.3498,
-2.5440, -1.5533],
[-2.3249, -2.4656, -2.5575, -2.0341, -2.5141, -2.4952, -2.7048, -2.3504,
-2.5381, -1.5882]], grad_fn=<LogSoftmaxBackward>)
tensor([8, 9, 2])
2%|▊ | 56/3333 [00:06<05:55, 9.23it/s]tensor([[-2.2963, -2.4706, -2.5473, -2.0726, -2.5316, -2.4626, -2.6903, -2.3530,
-2.5398, -1.5890],
[-2.2605, -2.3736, -2.4605, -2.1005, -2.4770, -2.4160, -2.5944, -2.3089,
-2.5197, -1.7901],
[-2.3786, -2.4433, -2.5826, -1.9862, -2.5031, -2.6136, -2.7098, -2.3753,
-2.5411, -1.5407]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 5, 9])
2%|▊ | 57/3333 [00:06<06:08, 8.89it/s]tensor([[-2.3172, -2.3770, -2.5108, -2.0645, -2.4970, -2.4976, -2.6242, -2.3165,
-2.5213, -1.6904],
[-2.3005, -2.3843, -2.4802, -2.0677, -2.4772, -2.4652, -2.6233, -2.3274,
-2.5247, -1.7245],
[-2.3558, -2.4179, -2.5340, -2.0178, -2.4770, -2.5681, -2.6626, -2.3776,
-2.5254, -1.6067]], grad_fn=<LogSoftmaxBackward>)
tensor([2, 0, 0])
2%|▉ | 58/3333 [00:06<06:17, 8.67it/s]tensor([[-2.2985, -2.3756, -2.4931, -2.0709, -2.4924, -2.4611, -2.6147, -2.3099,
-2.5118, -1.7357],
[-2.2778, -2.3913, -2.4976, -2.1083, -2.5165, -2.4545, -2.6270, -2.3130,
-2.5327, -1.6880],
[-2.2611, -2.3952, -2.4794, -2.1135, -2.5065, -2.4251, -2.6144, -2.3149,
-2.5259, -1.7259]], grad_fn=<LogSoftmaxBackward>)
tensor([8, 6, 4])
2%|▉ | 59/3333 [00:06<06:24, 8.52it/s]tensor([[-2.3457, -2.4646, -2.5807, -2.0556, -2.5438, -2.5287, -2.7415, -2.3915,
-2.5300, -1.5074],
[-2.2683, -2.3827, -2.4765, -2.1078, -2.5036, -2.4356, -2.6161, -2.3134,
-2.5256, -1.7299],
[-2.3695, -2.4242, -2.5793, -2.0206, -2.5335, -2.6020, -2.6878, -2.3627,
-2.5384, -1.5379]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 4, 9])
2%|▉ | 60/3333 [00:06<06:29, 8.41it/s]tensor([[-2.3842, -2.4542, -2.6009, -2.0180, -2.5486, -2.6474, -2.7232, -2.3900,
-2.5510, -1.4676],
[-2.3729, -2.4441, -2.5869, -2.0234, -2.5320, -2.6198, -2.7090, -2.3858,
-2.5413, -1.5013],
[-2.2787, -2.3894, -2.4645, -2.0945, -2.4827, -2.4480, -2.6098, -2.3406,
-2.5252, -1.7267]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 9])
2%|▉ | 61/3333 [00:06<06:32, 8.34it/s]tensor([[-2.3799, -2.4402, -2.6095, -2.0388, -2.5459, -2.6446, -2.7169, -2.3967,
-2.5197, -1.4717],
[-2.3215, -2.4407, -2.5614, -2.0939, -2.5531, -2.5344, -2.6898, -2.3635,
-2.5460, -1.5286],
[-2.3096, -2.3752, -2.5082, -2.0763, -2.5089, -2.5154, -2.6286, -2.3256,
-2.5225, -1.6684]], grad_fn=<LogSoftmaxBackward>)
tensor([3, 9, 0])
2%|▉ | 62/3333 [00:06<06:34, 8.30it/s]tensor([[-2.3461, -2.4392, -2.5792, -2.0690, -2.5522, -2.5815, -2.6978, -2.3746,
-2.5418, -1.5042],
[-2.3075, -2.4121, -2.4686, -2.0778, -2.4550, -2.4935, -2.6227, -2.3971,
-2.5177, -1.6701],
[-2.2561, -2.4252, -2.4809, -2.1209, -2.5134, -2.4152, -2.6321, -2.3338,
-2.5129, -1.6985]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 3, 9])
2%|▉ | 63/3333 [00:07<06:24, 8.49it/s]tensor([[-2.3663, -2.4473, -2.5793, -2.0407, -2.5281, -2.6162, -2.6942, -2.4112,
-2.5395, -1.4927],
[-2.3210, -2.4621, -2.5462, -2.0849, -2.5428, -2.5383, -2.6962, -2.3937,
-2.5592, -1.5137],
[-2.3019, -2.4403, -2.5233, -2.1033, -2.5333, -2.5200, -2.6794, -2.3746,
-2.5502, -1.5560]], grad_fn=<LogSoftmaxBackward>)
tensor([8, 2, 7])
tensor([[-2.3817, -2.4637, -2.6069, -2.0340, -2.5551, -2.6650, -2.7228, -2.4188,
-2.5471, -1.4371],
[-2.3334, -2.4273, -2.5267, -2.0555, -2.5047, -2.5926, -2.6714, -2.3980,
-2.5300, -1.5580],
[-2.2930, -2.4320, -2.5336, -2.1177, -2.5570, -2.5242, -2.6858, -2.3503,
-2.5485, -1.5505]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 2, 9])
2%|▉ | 65/3333 [00:07<06:07, 8.90it/s]tensor([[-2.3214, -2.4840, -2.5838, -2.1162, -2.5970, -2.5768, -2.7298, -2.3831,
-2.5564, -1.4401],
[-2.3130, -2.4602, -2.5357, -2.1007, -2.5527, -2.5419, -2.6930, -2.3836,
-2.5494, -1.5167],
[-2.3326, -2.4942, -2.5973, -2.1029, -2.5973, -2.5881, -2.7465, -2.3880,
-2.5440, -1.4285]], grad_fn=<LogSoftmaxBackward>)
tensor([5, 8, 9])
2%|█ | 66/3333 [00:07<05:55, 9.20it/s]tensor([[-2.2865, -2.4138, -2.5047, -2.1230, -2.5434, -2.5163, -2.6637, -2.3459,
-2.5307, -1.5940],
[-2.3179, -2.4412, -2.5159, -2.0943, -2.5253, -2.5574, -2.6833, -2.3961,
-2.5281, -1.5430],
[-2.3036, -2.4738, -2.5399, -2.1147, -2.5618, -2.5221, -2.6996, -2.3896,
-2.5375, -1.5102]], grad_fn=<LogSoftmaxBackward>)
tensor([6, 9, 8])
tensor([[-2.3064, -2.5133, -2.5707, -2.1424, -2.5957, -2.5224, -2.7460, -2.4092,
-2.5321, -1.4392],
[-2.3168, -2.4775, -2.5657, -2.1311, -2.6002, -2.5688, -2.7224, -2.3841,
-2.5494, -1.4483],
[-2.3607, -2.4710, -2.5676, -2.0652, -2.5551, -2.6773, -2.7271, -2.4298,
-2.5284, -1.4357]], grad_fn=<LogSoftmaxBackward>)
tensor([2, 2, 0])
2%|█ | 68/3333 [00:07<05:46, 9.43it/s]tensor([[-2.3638, -2.5032, -2.5969, -2.0946, -2.6084, -2.6587, -2.7651, -2.4258,
-2.5400, -1.3762],
[-2.3533, -2.5096, -2.6064, -2.1141, -2.6290, -2.6608, -2.7805, -2.4173,
-2.5474, -1.3563],
[-2.2815, -2.4469, -2.5022, -2.1465, -2.5579, -2.5154, -2.6767, -2.3726,
-2.5268, -1.5499]], grad_fn=<LogSoftmaxBackward>)
tensor([3, 2, 9])
2%|█ | 69/3333 [00:07<05:40, 9.59it/s]tensor([[-2.3399, -2.5824, -2.6122, -2.1381, -2.6441, -2.6037, -2.8404, -2.4772,
-2.5246, -1.3120],
[-2.3366, -2.4803, -2.5481, -2.1032, -2.5893, -2.6064, -2.7263, -2.4117,
-2.5288, -1.4457],
[-2.2588, -2.4715, -2.4798, -2.1519, -2.5587, -2.4542, -2.6724, -2.3702,
-2.4994, -1.5945]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 6, 9])
2%|█ | 70/3333 [00:07<05:36, 9.71it/s]tensor([[-2.3027, -2.4447, -2.4873, -2.1148, -2.5572, -2.5674, -2.6824, -2.3898,
-2.5137, -1.5409],
[-2.2756, -2.5262, -2.5158, -2.1664, -2.6139, -2.5274, -2.7154, -2.4091,
-2.5314, -1.4560],
[-2.3322, -2.4985, -2.5371, -2.1128, -2.5879, -2.5806, -2.7185, -2.4287,
-2.5135, -1.4494]], grad_fn=<LogSoftmaxBackward>)
tensor([2, 0, 8])
2%|█ | 71/3333 [00:07<05:33, 9.79it/s]tensor([[-2.3262, -2.5320, -2.5443, -2.1319, -2.6129, -2.5777, -2.7615, -2.4478,
-2.5129, -1.4036],
[-2.3917, -2.5528, -2.5981, -2.0942, -2.6501, -2.7433, -2.7921, -2.4871,
-2.5363, -1.2929],
[-2.2699, -2.4529, -2.4502, -2.1473, -2.5534, -2.4978, -2.6511, -2.3773,
-2.5042, -1.5977]], grad_fn=<LogSoftmaxBackward>)
tensor([3, 6, 8])
tensor([[-2.3463, -2.5159, -2.5360, -2.1015, -2.6229, -2.6916, -2.7469, -2.4525,
-2.5172, -1.3832],
[-2.2671, -2.4592, -2.4640, -2.1641, -2.5779, -2.5201, -2.6658, -2.3830,
-2.5062, -1.5552],
[-2.3508, -2.5403, -2.5498, -2.1191, -2.6391, -2.6554, -2.7485, -2.4595,
-2.5212, -1.3623]], grad_fn=<LogSoftmaxBackward>)
tensor([4, 3, 4])
2%|█ | 73/3333 [00:08<05:30, 9.85it/s]tensor([[-2.3376, -2.5161, -2.5195, -2.1086, -2.6171, -2.6921, -2.7368, -2.4522,
-2.5095, -1.3951],
[-2.3400, -2.5211, -2.5044, -2.0994, -2.5878, -2.6879, -2.7245, -2.4781,
-2.5019, -1.4089],
[-2.2785, -2.4678, -2.4613, -2.1521, -2.5748, -2.5166, -2.6590, -2.3909,
-2.4876, -1.5623]], grad_fn=<LogSoftmaxBackward>)
tensor([2, 8, 3])
tensor([[-2.3096, -2.5028, -2.5017, -2.1259, -2.6028, -2.5993, -2.7189, -2.4298,
-2.4814, -1.4629],
[-2.3486, -2.5456, -2.5453, -2.1305, -2.6545, -2.7073, -2.7596, -2.4617,
-2.5062, -1.3409],
[-2.2702, -2.4809, -2.4460, -2.1580, -2.5757, -2.5300, -2.6583, -2.3987,
-2.4927, -1.5536]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 7, 6])
2%|█▏ | 75/3333 [00:08<05:29, 9.90it/s]tensor([[-2.2434, -2.4624, -2.4254, -2.1698, -2.5562, -2.4891, -2.6308, -2.3887,
-2.4833, -1.6185],
[-2.3562, -2.5674, -2.5461, -2.1246, -2.6595, -2.7036, -2.7703, -2.4806,
-2.5064, -1.3251],
[-2.3210, -2.5039, -2.4780, -2.1076, -2.5905, -2.6425, -2.6821, -2.4379,
-2.4815, -1.4737]], grad_fn=<LogSoftmaxBackward>)
tensor([2, 9, 9])
2%|█▏ | 76/3333 [00:08<05:38, 9.61it/s]tensor([[-2.3616, -2.5699, -2.5275, -2.1154, -2.6439, -2.7113, -2.7430, -2.4891,
-2.4933, -1.3424],
[-2.3738, -2.5885, -2.5355, -2.1125, -2.6478, -2.7438, -2.7631, -2.5159,
-2.4985, -1.3080],
[-2.3187, -2.5299, -2.4626, -2.1253, -2.5832, -2.6307, -2.6948, -2.4687,
-2.4823, -1.4527]], grad_fn=<LogSoftmaxBackward>)
tensor([8, 5, 9])
2%|█▏ | 77/3333 [00:08<05:46, 9.39it/s]tensor([[-2.3121, -2.5422, -2.4675, -2.1336, -2.6109, -2.6139, -2.7092, -2.4455,
-2.4813, -1.4464],
[-2.3835, -2.5707, -2.5357, -2.0954, -2.6490, -2.7819, -2.7475, -2.5070,
-2.4699, -1.3234],
[-2.3309, -2.5379, -2.4504, -2.1010, -2.5644, -2.6556, -2.6850, -2.4931,
-2.4711, -1.4585]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 7, 7])
tensor([[-2.3068, -2.5187, -2.4589, -2.1469, -2.6125, -2.6035, -2.6743, -2.4181,
-2.4741, -1.4791],
[-2.3657, -2.6106, -2.5376, -2.1568, -2.6771, -2.7030, -2.7780, -2.5077,
-2.4831, -1.2910],
[-2.3414, -2.6261, -2.5283, -2.1694, -2.6732, -2.6434, -2.7716, -2.4987,
-2.4652, -1.3186]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 9, 9])
2%|█▏ | 79/3333 [00:08<05:40, 9.56it/s]tensor([[-2.3008, -2.5602, -2.4683, -2.1812, -2.6392, -2.6023, -2.7032, -2.4324,
-2.4882, -1.4209],
[-2.3485, -2.6416, -2.5301, -2.1764, -2.6827, -2.6536, -2.7781, -2.5025,
-2.4625, -1.3015],
[-2.3472, -2.5404, -2.4656, -2.1026, -2.5936, -2.6836, -2.6835, -2.4679,
-2.4641, -1.4389]], grad_fn=<LogSoftmaxBackward>)
tensor([0, 9, 9])
2%|█▏ | 80/3333 [00:08<05:35, 9.69it/s]tensor([[-2.3804, -2.6282, -2.5123, -2.1237, -2.6535, -2.7411, -2.7479, -2.5134,
-2.4716, -1.3086],
[-2.3206, -2.5436, -2.4596, -2.1499, -2.6180, -2.6332, -2.6837, -2.4317,
-2.4622, -1.4482],
[-2.3088, -2.5476, -2.4277, -2.1381, -2.5796, -2.5827, -2.6635, -2.4484,
-2.4620, -1.4985]], grad_fn=<LogSoftmaxBackward>)
tensor([6, 6, 9])
tensor([[-2.3109, -2.5660, -2.4439, -2.1595, -2.6116, -2.6187, -2.7052, -2.4485,
-2.4729, -1.4361],
[-2.3843, -2.6124, -2.5129, -2.1182, -2.6570, -2.8022, -2.7404, -2.5026,
-2.4678, -1.3049],
[-2.3230, -2.6245, -2.4829, -2.1756, -2.6594, -2.6239, -2.7221, -2.4620,
-2.4609, -1.3718]], grad_fn=<LogSoftmaxBackward>)
tensor([8, 0, 7])
2%|█▎ | 82/3333 [00:09<05:43, 9.47it/s]tensor([[-2.3748, -2.6983, -2.5576, -2.2175, -2.7519, -2.7620, -2.7936, -2.5131,
-2.5001, -1.1973],
[-2.3850, -2.6367, -2.5196, -2.1327, -2.6685, -2.7426, -2.7283, -2.5051,
-2.4546, -1.3070],
[-2.4253, -2.7282, -2.5942, -2.1764, -2.7548, -2.8471, -2.8511, -2.5717,
-2.4693, -1.1470]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 4, 9])
2%|█▎ | 83/3333 [00:09<05:59, 9.05it/s]tensor([[-2.4353, -2.7428, -2.6093, -2.1978, -2.7779, -2.8913, -2.8469, -2.5673,
-2.4767, -1.1179],
[-2.3964, -2.6894, -2.5657, -2.1875, -2.7429, -2.8266, -2.7948, -2.5058,
-2.4822, -1.1970],
[-2.3223, -2.5508, -2.4562, -2.1592, -2.6123, -2.6475, -2.6558, -2.4307,
-2.4501, -1.4522]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 4, 2])
3%|█▎ | 84/3333 [00:09<06:10, 8.77it/s]tensor([[-2.3584, -2.6184, -2.4728, -2.1430, -2.6022, -2.7513, -2.6864, -2.5065,
-2.4390, -1.3627],
[-2.3049, -2.6914, -2.4967, -2.2496, -2.7027, -2.6500, -2.7258, -2.4777,
-2.4798, -1.2954],
[-2.3348, -2.5994, -2.4782, -2.1777, -2.6443, -2.6998, -2.7021, -2.4536,
-2.4589, -1.3672]], grad_fn=<LogSoftmaxBackward>)
tensor([8, 5, 3])
3%|█▎ | 85/3333 [00:09<06:18, 8.59it/s]tensor([[-2.3289, -2.6237, -2.4386, -2.1772, -2.5866, -2.6540, -2.6767, -2.4986,
-2.4527, -1.3995],
[-2.4184, -2.7931, -2.6031, -2.2342, -2.7671, -2.8258, -2.8758, -2.5893,
-2.4588, -1.1102],
[-2.3213, -2.5920, -2.4459, -2.1766, -2.6065, -2.6478, -2.6731, -2.4553,
-2.4529, -1.4213]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 1, 2])
3%|█▎ | 86/3333 [00:09<06:23, 8.46it/s]tensor([[-2.3014, -2.6185, -2.4643, -2.2339, -2.6543, -2.6346, -2.6938, -2.4378,
-2.4686, -1.3744],
[-2.3322, -2.6303, -2.4378, -2.1835, -2.5881, -2.6590, -2.6789, -2.5024,
-2.4506, -1.3906],
[-2.3133, -2.6787, -2.4946, -2.2512, -2.6977, -2.6648, -2.7385, -2.4693,
-2.4861, -1.2906]], grad_fn=<LogSoftmaxBackward>)
tensor([1, 9, 7])
3%|█▎ | 87/3333 [00:09<06:27, 8.38it/s]tensor([[-2.3468, -2.6792, -2.5061, -2.2423, -2.6977, -2.6996, -2.7364, -2.4778,
-2.4686, -1.2734],
[-2.4423, -2.7821, -2.6084, -2.2458, -2.7850, -2.8928, -2.8566, -2.5677,
-2.4825, -1.0883],
[-2.4107, -2.7326, -2.5661, -2.2299, -2.7361, -2.8027, -2.7950, -2.5340,
-2.4699, -1.1702]], grad_fn=<LogSoftmaxBackward>)
tensor([7, 0, 9])
3%|█▎ | 88/3333 [00:09<06:30, 8.32it/s]tensor([[-2.3660, -2.6584, -2.5105, -2.2142, -2.6929, -2.7753, -2.7469, -2.4713,
-2.4643, -1.2661],
[-2.3601, -2.6830, -2.5049, -2.2252, -2.6921, -2.7188, -2.7402, -2.4808,
-2.4675, -1.2702],
[-2.3560, -2.6609, -2.4686, -2.2019, -2.6342, -2.7384, -2.7198, -2.5082,
-2.4517, -1.3089]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 4, 9])
3%|█▎ | 89/3333 [00:09<06:31, 8.28it/s]tensor([[-2.2974, -2.6173, -2.4351, -2.2304, -2.6192, -2.6020, -2.6634, -2.4391,
-2.4595, -1.4188],
[-2.3701, -2.6294, -2.5132, -2.2043, -2.6690, -2.7725, -2.7238, -2.4593,
-2.4456, -1.2966],
[-2.4078, -2.7623, -2.5597, -2.2571, -2.7460, -2.8186, -2.8154, -2.5411,
-2.4912, -1.1409]], grad_fn=<LogSoftmaxBackward>)
tensor([9, 4, 9])
3%|█▍ | 90/3333 [00:09<06:22, 8.48it/s]tensor([[-2.2968, -2.6156, -2.4479, -2.2457, -2.6375, -2.6136, -2.6841, -2.4171,
-2.4614, -1.4009],
[-2.4039, -2.7562, -2.5761, -2.2876, -2.7593, -2.8236, -2.8156, -2.5230,
-2.4924, -1.1301],
[-2.4842, -2.8470, -2.6563, -2.2925, -2.8194, -2.9860, -2.9215, -2.6054,
-2.4969, -1.0019]], grad_fn=<LogSoftmaxBackward>)
tensor([8, 2, 1])

View file

@ -0,0 +1 @@
{"system.cpu": 39.15, "system.memory": 47.64, "system.disk": 8.1, "system.proc.memory.availableMB": 4034.14, "system.proc.memory.rssMB": 146.94, "system.proc.memory.percent": 1.91, "system.proc.cpu.threads": 4.2, "system.network.sent": 15067, "system.network.recv": 64594, "_wandb": true, "_timestamp": 1580219012, "_runtime": 18}

View file

@ -0,0 +1,92 @@
{"epoch": 0, "_runtime": 9.942976951599121, "_timestamp": 1580219002.7513394, "_step": 0}
{"loss": 2.307335615158081, "_runtime": 10.22628402709961, "_timestamp": 1580219003.0346465, "_step": 1}
{"loss": 2.3311939239501953, "_runtime": 10.366636991500854, "_timestamp": 1580219003.1749995, "_step": 2}
{"loss": 2.3145415782928467, "_runtime": 10.487778663635254, "_timestamp": 1580219003.2961411, "_step": 3}
{"loss": 2.270946741104126, "_runtime": 10.610984563827515, "_timestamp": 1580219003.419347, "_step": 4}
{"loss": 2.289417028427124, "_runtime": 10.730233669281006, "_timestamp": 1580219003.5385962, "_step": 5}
{"loss": 2.224419355392456, "_runtime": 10.855008840560913, "_timestamp": 1580219003.6633713, "_step": 6}
{"loss": 2.313235282897949, "_runtime": 10.978448867797852, "_timestamp": 1580219003.7868114, "_step": 7}
{"loss": 2.374844789505005, "_runtime": 11.098386526107788, "_timestamp": 1580219003.906749, "_step": 8}
{"loss": 2.1285240650177, "_runtime": 11.208126544952393, "_timestamp": 1580219004.016489, "_step": 9}
{"loss": 2.193406343460083, "_runtime": 11.308474063873291, "_timestamp": 1580219004.1168365, "_step": 10}
{"loss": 2.235577344894409, "_runtime": 11.407753229141235, "_timestamp": 1580219004.2161157, "_step": 11}
{"loss": 2.268996477127075, "_runtime": 11.508171796798706, "_timestamp": 1580219004.3165343, "_step": 12}
{"loss": 2.3845322132110596, "_runtime": 11.61876368522644, "_timestamp": 1580219004.4271262, "_step": 13}
{"loss": 2.273387908935547, "_runtime": 11.720426797866821, "_timestamp": 1580219004.5287893, "_step": 14}
{"loss": 2.262080192565918, "_runtime": 11.821348428726196, "_timestamp": 1580219004.629711, "_step": 15}
{"loss": 2.309389591217041, "_runtime": 11.917869567871094, "_timestamp": 1580219004.726232, "_step": 16}
{"loss": 2.168505907058716, "_runtime": 12.019653558731079, "_timestamp": 1580219004.828016, "_step": 17}
{"loss": 2.1565160751342773, "_runtime": 12.119675636291504, "_timestamp": 1580219004.9280381, "_step": 18}
{"loss": 2.345561981201172, "_runtime": 12.217541217803955, "_timestamp": 1580219005.0259037, "_step": 19}
{"loss": 2.1500587463378906, "_runtime": 12.332592964172363, "_timestamp": 1580219005.1409554, "_step": 20}
{"loss": 2.205068349838257, "_runtime": 12.430001020431519, "_timestamp": 1580219005.2383635, "_step": 21}
{"loss": 2.3686747550964355, "_runtime": 12.53206181526184, "_timestamp": 1580219005.3404243, "_step": 22}
{"loss": 2.136617422103882, "_runtime": 12.632176876068115, "_timestamp": 1580219005.4405394, "_step": 23}
{"loss": 2.3696722984313965, "_runtime": 12.732112646102905, "_timestamp": 1580219005.5404751, "_step": 24}
{"loss": 2.160412549972534, "_runtime": 12.832097053527832, "_timestamp": 1580219005.6404595, "_step": 25}
{"loss": 2.3374061584472656, "_runtime": 12.933876752853394, "_timestamp": 1580219005.7422392, "_step": 26}
{"loss": 2.1028594970703125, "_runtime": 13.033409595489502, "_timestamp": 1580219005.841772, "_step": 27}
{"loss": 2.1568334102630615, "_runtime": 13.14561915397644, "_timestamp": 1580219005.9539816, "_step": 28}
{"loss": 2.2629356384277344, "_runtime": 13.267441749572754, "_timestamp": 1580219006.0758042, "_step": 29}
{"loss": 2.343139410018921, "_runtime": 13.387838363647461, "_timestamp": 1580219006.1962008, "_step": 30}
{"loss": 2.2338523864746094, "_runtime": 13.511196374893188, "_timestamp": 1580219006.3195589, "_step": 31}
{"loss": 2.151728391647339, "_runtime": 13.63111662864685, "_timestamp": 1580219006.439479, "_step": 32}
{"loss": 1.9545854330062866, "_runtime": 13.754342317581177, "_timestamp": 1580219006.5627048, "_step": 33}
{"loss": 2.329495668411255, "_runtime": 13.876088619232178, "_timestamp": 1580219006.684451, "_step": 34}
{"loss": 2.142441749572754, "_runtime": 13.999814748764038, "_timestamp": 1580219006.8081772, "_step": 35}
{"loss": 2.1400437355041504, "_runtime": 14.110645055770874, "_timestamp": 1580219006.9190075, "_step": 36}
{"loss": 1.9626344442367554, "_runtime": 14.211005449295044, "_timestamp": 1580219007.019368, "_step": 37}
{"loss": 2.1885862350463867, "_runtime": 14.309916019439697, "_timestamp": 1580219007.1182785, "_step": 38}
{"loss": 2.2512786388397217, "_runtime": 14.409563064575195, "_timestamp": 1580219007.2179255, "_step": 39}
{"loss": 2.2545621395111084, "_runtime": 14.508445501327515, "_timestamp": 1580219007.316808, "_step": 40}
{"loss": 2.0090219974517822, "_runtime": 14.611153602600098, "_timestamp": 1580219007.419516, "_step": 41}
{"loss": 1.863661766052246, "_runtime": 14.7111496925354, "_timestamp": 1580219007.5195122, "_step": 42}
{"loss": 1.774759292602539, "_runtime": 14.811184883117676, "_timestamp": 1580219007.6195474, "_step": 43}
{"loss": 2.3502585887908936, "_runtime": 14.908552646636963, "_timestamp": 1580219007.7169151, "_step": 44}
{"loss": 2.3554484844207764, "_runtime": 15.011493682861328, "_timestamp": 1580219007.8198562, "_step": 45}
{"loss": 2.1526503562927246, "_runtime": 15.10797643661499, "_timestamp": 1580219007.916339, "_step": 46}
{"loss": 2.2616689205169678, "_runtime": 15.208127498626709, "_timestamp": 1580219008.01649, "_step": 47}
{"loss": 2.448517084121704, "_runtime": 15.32047438621521, "_timestamp": 1580219008.1288369, "_step": 48}
{"loss": 2.2892744541168213, "_runtime": 15.420961141586304, "_timestamp": 1580219008.2293236, "_step": 49}
{"loss": 2.323457956314087, "_runtime": 15.523219585418701, "_timestamp": 1580219008.331582, "_step": 50}
{"loss": 2.011204719543457, "_runtime": 15.621654510498047, "_timestamp": 1580219008.430017, "_step": 51}
{"loss": 2.191546678543091, "_runtime": 15.721709489822388, "_timestamp": 1580219008.530072, "_step": 52}
{"loss": 2.5339272022247314, "_runtime": 15.821038007736206, "_timestamp": 1580219008.6294005, "_step": 53}
{"loss": 1.8815513849258423, "_runtime": 15.923426389694214, "_timestamp": 1580219008.7317889, "_step": 54}
{"loss": 2.2351624965667725, "_runtime": 16.02270269393921, "_timestamp": 1580219008.8310652, "_step": 55}
{"loss": 2.207448720932007, "_runtime": 16.135317087173462, "_timestamp": 1580219008.9436796, "_step": 56}
{"loss": 1.8485578298568726, "_runtime": 16.256279468536377, "_timestamp": 1580219009.064642, "_step": 57}
{"loss": 2.3890020847320557, "_runtime": 16.37800097465515, "_timestamp": 1580219009.1863635, "_step": 58}
{"loss": 2.548426866531372, "_runtime": 16.498754739761353, "_timestamp": 1580219009.3071172, "_step": 59}
{"loss": 1.8496440649032593, "_runtime": 16.62080430984497, "_timestamp": 1580219009.4291668, "_step": 60}
{"loss": 1.5652227401733398, "_runtime": 16.744091510772705, "_timestamp": 1580219009.552454, "_step": 61}
{"loss": 1.9590145349502563, "_runtime": 16.867318391799927, "_timestamp": 1580219009.6756809, "_step": 62}
{"loss": 1.76015043258667, "_runtime": 16.987404823303223, "_timestamp": 1580219009.7957673, "_step": 63}
{"loss": 2.4867560863494873, "_runtime": 17.099939823150635, "_timestamp": 1580219009.9083023, "_step": 64}
{"loss": 1.8381038904190063, "_runtime": 17.198379516601562, "_timestamp": 1580219010.006742, "_step": 65}
{"loss": 2.184875249862671, "_runtime": 17.298781633377075, "_timestamp": 1580219010.107144, "_step": 66}
{"loss": 2.2480363845825195, "_runtime": 17.39970374107361, "_timestamp": 1580219010.2080662, "_step": 67}
{"loss": 2.4989969730377197, "_runtime": 17.500070571899414, "_timestamp": 1580219010.308433, "_step": 68}
{"loss": 2.0835931301116943, "_runtime": 17.59912419319153, "_timestamp": 1580219010.4074867, "_step": 69}
{"loss": 1.8775931596755981, "_runtime": 17.69990587234497, "_timestamp": 1580219010.5082684, "_step": 70}
{"loss": 2.4254353046417236, "_runtime": 17.799625635147095, "_timestamp": 1580219010.607988, "_step": 71}
{"loss": 2.476066827774048, "_runtime": 17.899600505828857, "_timestamp": 1580219010.707963, "_step": 72}
{"loss": 2.475334882736206, "_runtime": 18.00012469291687, "_timestamp": 1580219010.8084872, "_step": 73}
{"loss": 2.391148805618286, "_runtime": 18.099634647369385, "_timestamp": 1580219010.9079971, "_step": 74}
{"loss": 2.1943094730377197, "_runtime": 18.199618577957153, "_timestamp": 1580219011.007981, "_step": 75}
{"loss": 1.7414013147354126, "_runtime": 18.29889965057373, "_timestamp": 1580219011.1072621, "_step": 76}
{"loss": 2.2299211025238037, "_runtime": 18.409764766693115, "_timestamp": 1580219011.2181273, "_step": 77}
{"loss": 2.148847818374634, "_runtime": 18.52345585823059, "_timestamp": 1580219011.3318183, "_step": 78}
{"loss": 1.3628978729248047, "_runtime": 18.623080730438232, "_timestamp": 1580219011.4314432, "_step": 79}
{"loss": 1.6803736686706543, "_runtime": 18.723894119262695, "_timestamp": 1580219011.5322566, "_step": 80}
{"loss": 2.3100497722625732, "_runtime": 18.82351851463318, "_timestamp": 1580219011.631881, "_step": 81}
{"loss": 2.439739227294922, "_runtime": 18.920521020889282, "_timestamp": 1580219011.7288835, "_step": 82}
{"loss": 1.6709295511245728, "_runtime": 19.04705786705017, "_timestamp": 1580219011.8554204, "_step": 83}
{"loss": 2.105680465698242, "_runtime": 19.167895078659058, "_timestamp": 1580219011.9762576, "_step": 84}
{"loss": 2.422238826751709, "_runtime": 19.287487745285034, "_timestamp": 1580219012.0958502, "_step": 85}
{"loss": 2.212815046310425, "_runtime": 19.411945343017578, "_timestamp": 1580219012.2203078, "_step": 86}
{"loss": 2.159456968307495, "_runtime": 19.533960819244385, "_timestamp": 1580219012.3423233, "_step": 87}
{"loss": 2.0301051139831543, "_runtime": 19.655529260635376, "_timestamp": 1580219012.4638917, "_step": 88}
{"loss": 1.7556778192520142, "_runtime": 19.77916431427002, "_timestamp": 1580219012.5875268, "_step": 89}
{"loss": 1.7429243326187134, "_runtime": 19.902369260787964, "_timestamp": 1580219012.7107317, "_step": 90}
{"loss": 2.62817120552063, "_runtime": 20.012404918670654, "_timestamp": 1580219012.8207674, "_step": 91}

View file

@ -0,0 +1,23 @@
{
"root": "/home/clemens/repositorys/pytorch-ai",
"program": "pytorch_ai.py",
"git": {
"remote": "git@github.com:Clemens-Dautermann/pytorch-ai.git",
"commit": "55cff9b18f8558ae7a9170e56a3d5c6f6665d9ab"
},
"email": "clemens.dautermann@gmail.com",
"startedAt": "2020-01-28T13:43:14.042432",
"host": "ubuntu-laptop",
"username": "clemens",
"executable": "/usr/bin/python3",
"os": "Linux-5.3.0-26-generic-x86_64-with-Ubuntu-19.10-eoan",
"python": "3.7.5",
"cpu_count": 2,
"args": [],
"state": "killed",
"jobType": null,
"mode": "dryrun",
"project": "tictactoe",
"heartbeatAt": "2020-01-28T13:43:33.198554",
"exitcode": 255
}

View file

@ -0,0 +1 @@
{"_runtime": 19.902369260787964, "epoch": 0, "_timestamp": 1580219012.7107317, "_step": 90, "graph_0": {"_type": "graph", "format": "torch", "nodes": [{"name": "fc1", "id": 140306656036880, "class_name": "Linear(in_features=9, out_features=9, bias=True)", "parameters": [["weight", [9, 9]], ["bias", [9]]], "output_shape": [[3, 9]], "num_parameters": [81, 9]}, {"name": "fc2", "id": 140306656037072, "class_name": "Linear(in_features=9, out_features=20, bias=True)", "parameters": [["weight", [20, 9]], ["bias", [20]]], "output_shape": [[3, 20]], "num_parameters": [180, 20]}, {"name": "fc3", "id": 140306656037008, "class_name": "Linear(in_features=20, out_features=50, bias=True)", "parameters": [["weight", [50, 20]], ["bias", [50]]], "output_shape": [[3, 50]], "num_parameters": [1000, 50]}, {"name": "fc4", "id": 140306656036688, "class_name": "Linear(in_features=50, out_features=10, bias=True)", "parameters": [["weight", [10, 50]], ["bias", [10]]], "output_shape": [[3, 10]], "num_parameters": [500, 10]}], "edges": []}, "loss": 1.7429243326187134}