Minor Fixes for GPU training

This commit is contained in:
Clemens-Dautermann 2020-01-28 15:23:25 +01:00
parent 56ee2635b5
commit 459caccfe2
47 changed files with 9862 additions and 26 deletions

View file

@ -0,0 +1,9 @@
wandb_version: 1
_wandb:
desc: null
value:
cli_version: 0.8.22
framework: torch
is_jupyter_run: false
python_version: 3.6.9

View file

@ -0,0 +1,97 @@
diff --git a/TicTacToe_AI/Net/pytorch_ai.py b/TicTacToe_AI/Net/pytorch_ai.py
index c10ea0e..3bbba77 100644
--- a/TicTacToe_AI/Net/pytorch_ai.py
+++ b/TicTacToe_AI/Net/pytorch_ai.py
@@ -8,7 +8,7 @@ import wandb
wandb.init(project="tictactoe")
-BATCH_SIZE = 3
+BATCH_SIZE = 200
def to_set(raw_list):
@@ -85,7 +85,7 @@ def buildsets():
testset = to_batched_set(alllines[0:10000])
print('Generating trainset...')
- trainset = to_batched_set(alllines[10001:20000])
+ trainset = to_batched_set(alllines[10001:])
return trainset, testset
@@ -134,18 +134,20 @@ loss_function = nn.CrossEntropyLoss()
trainset, testset = buildsets()
-for epoch in range(100):
+for epoch in range(300):
print('Epoch: ' + str(epoch))
wandb.log({'epoch': epoch})
for X, label in tqdm(trainset):
net.zero_grad()
- X.to(device)
+ X = X.to(device)
output = net(X)
- output.cpu()
+ output = output.cpu()
loss = loss_function(output.view(-1, 10), label)
loss.backward()
optimizer.step()
wandb.log({'loss': loss})
+ net = net.cpu()
torch.save(net, './nets/gpunets/net_' + str(epoch) + '.pt')
+ net = net.to(device)
testnet(net, testset)
diff --git a/TicTacToe_AI/Net/wandb/debug.log b/TicTacToe_AI/Net/wandb/debug.log
index 51ac5d0..0af4662 100644
--- a/TicTacToe_AI/Net/wandb/debug.log
+++ b/TicTacToe_AI/Net/wandb/debug.log
@@ -1,18 +1,18 @@
-2020-01-28 14:43:14,846 DEBUG MainThread:32731 [wandb_config.py:_load_defaults():111] no defaults not found in config-defaults.yaml
-2020-01-28 14:43:14,864 DEBUG MainThread:32731 [cmd.py:execute():728] Popen(['git', 'cat-file', '--batch-check'], cwd=/home/clemens/repositorys/pytorch-ai, universal_newlines=False, shell=None, istream=<valid stream>)
-2020-01-28 14:43:14,877 DEBUG MainThread:32731 [cmd.py:execute():728] Popen(['git', 'rev-parse', '--show-toplevel'], cwd=/home/clemens/repositorys/pytorch-ai, universal_newlines=False, shell=None, istream=None)
-2020-01-28 14:43:14,887 DEBUG MainThread:32731 [cmd.py:execute():728] Popen(['git', 'status', '--porcelain', '--untracked-files'], cwd=/home/clemens/repositorys/pytorch-ai, universal_newlines=False, shell=None, istream=None)
-2020-01-28 14:43:14,906 DEBUG MainThread:32731 [run_manager.py:__init__():535] Initialized sync for tictactoe/mvx4evw0
-2020-01-28 14:43:14,912 INFO MainThread:32731 [run_manager.py:wrap_existing_process():1133] wrapping existing process 32725
-2020-01-28 14:43:14,913 WARNING MainThread:32731 [io_wrap.py:register():104] SIGWINCH handler was not None: <Handlers.SIG_DFL: 0>
-2020-01-28 14:43:14,919 DEBUG MainThread:32731 [connectionpool.py:_new_conn():815] Starting new HTTPS connection (1): pypi.org:443
-2020-01-28 14:43:15,060 DEBUG MainThread:32731 [connectionpool.py:_make_request():393] https://pypi.org:443 "GET /pypi/wandb/json HTTP/1.1" 200 39767
-2020-01-28 14:43:15,179 INFO MainThread:32731 [run_manager.py:init_run():918] system metrics and metadata threads started
-2020-01-28 14:43:15,181 INFO MainThread:32731 [run_manager.py:wrap_existing_process():1150] informing user process we are ready to proceed
-2020-01-28 14:43:15,183 INFO MainThread:32731 [run_manager.py:_sync_etc():1257] entering loop for messages from user process
-2020-01-28 14:43:15,862 INFO Thread-3 :32731 [run_manager.py:_on_file_modified():682] file/dir modified: /home/clemens/repositorys/pytorch-ai/TicTacToe_AI/Net/wandb/dryrun-20200128_134313-mvx4evw0/config.yaml
-2020-01-28 14:43:32,850 INFO MainThread:32731 [run_manager.py:_sync_etc():1313] process received interrupt signal, shutting down
-2020-01-28 14:43:32,850 INFO MainThread:32731 [run_manager.py:_sync_etc():1366] closing log streams and sending exitcode to W&B
-2020-01-28 14:43:32,851 INFO MainThread:32731 [run_manager.py:shutdown():1057] shutting down system stats and metadata service
- MainThread:32731 [mvx4evw0:run_manager.py:_sync_etc():1366] closing log streams and sending exitcode to W&B
-2020-01-28 14:43:32,851 INFO MainThread:32731 [mvx4evw0:run_manager.py:shutdown():1057] shutting down system stats and metadata service
+2020-01-28 15:11:43,421 DEBUG MainThread:25378 [wandb_config.py:_load_defaults():111] no defaults not found in config-defaults.yaml
+2020-01-28 15:11:43,433 DEBUG MainThread:25378 [cmd.py:execute():728] Popen(['git', 'cat-file', '--batch-check'], cwd=/home/clemens/Dokumente/repos/pytorch-ai, universal_newlines=False, shell=None, istream=<valid stream>)
+2020-01-28 15:11:43,439 DEBUG MainThread:25378 [cmd.py:execute():728] Popen(['git', 'rev-parse', '--show-toplevel'], cwd=/home/clemens/Dokumente/repos/pytorch-ai, universal_newlines=False, shell=None, istream=None)
+2020-01-28 15:11:43,445 DEBUG MainThread:25378 [cmd.py:execute():728] Popen(['git', 'status', '--porcelain', '--untracked-files'], cwd=/home/clemens/Dokumente/repos/pytorch-ai, universal_newlines=False, shell=None, istream=None)
+2020-01-28 15:11:43,463 DEBUG MainThread:25378 [run_manager.py:__init__():535] Initialized sync for tictactoe/3mybeyuz
+2020-01-28 15:11:43,467 INFO MainThread:25378 [run_manager.py:wrap_existing_process():1133] wrapping existing process 25368
+2020-01-28 15:11:43,468 WARNING MainThread:25378 [io_wrap.py:register():104] SIGWINCH handler was not None: <Handlers.SIG_DFL: 0>
+2020-01-28 15:11:43,473 DEBUG MainThread:25378 [connectionpool.py:_new_conn():824] Starting new HTTPS connection (1): pypi.org
+2020-01-28 15:11:43,571 DEBUG MainThread:25378 [connectionpool.py:_make_request():396] https://pypi.org:443 "GET /pypi/wandb/json HTTP/1.1" 200 39767
+2020-01-28 15:11:43,600 INFO MainThread:25378 [run_manager.py:init_run():918] system metrics and metadata threads started
+2020-01-28 15:11:43,600 INFO MainThread:25378 [run_manager.py:init_run():952] upserting run before process can begin, waiting at most 10 seconds
+2020-01-28 15:11:43,615 DEBUG Thread-14 :25378 [connectionpool.py:_new_conn():824] Starting new HTTPS connection (1): api.wandb.ai
+2020-01-28 15:11:43,896 DEBUG Thread-14 :25378 [connectionpool.py:_make_request():396] https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 543
+2020-01-28 15:11:43,909 INFO Thread-14 :25378 [run_manager.py:_upsert_run():1037] saving patches
+2020-01-28 15:11:43,910 DEBUG Thread-14 :25378 [cmd.py:execute():728] Popen(['git', 'rev-parse', '--show-toplevel'], cwd=/home/clemens/Dokumente/repos/pytorch-ai, universal_newlines=False, shell=None, istream=None)
+2020-01-28 15:11:43,916 DEBUG Thread-14 :25378 [cmd.py:execute():728] Popen(['git', 'diff', '--cached', '--abbrev=40', '--full-index', '--raw'], cwd=/home/clemens/Dokumente/repos/pytorch-ai, universal_newlines=False, shell=None, istream=None)
+2020-01-28 15:11:43,923 DEBUG Thread-14 :25378 [cmd.py:execute():728] Popen(['git', 'diff', '--abbrev=40', '--full-index', '--raw'], cwd=/home/clemens/Dokumente/repos/pytorch-ai, universal_newlines=False, shell=None, istream=None)
+2020-01-28 15:11:43,931 DEBUG Thread-14 :25378 [cmd.py:execute():728] Popen(['git', 'version'], cwd=/home/clemens/Dokumente/repos/pytorch-ai, universal_newlines=False, shell=None, istream=None)
diff --git a/TicTacToe_AI/Net/wandb/settings b/TicTacToe_AI/Net/wandb/settings
index 26efadd..a700e2e 100644
--- a/TicTacToe_AI/Net/wandb/settings
+++ b/TicTacToe_AI/Net/wandb/settings
@@ -1,5 +1,4 @@
[default]
project = tictactoe
entity = cdautermann
-disabled = true

View file

@ -0,0 +1,5 @@
running on cuda:0
Loading file...
986410
Generating testset...
0%| | 0/10000 [00:00<?, ?it/s] 6%|██▋ | 565/10000 [00:00<00:01, 5649.12it/s] 12%|█████▍ | 1172/10000 [00:00<00:01, 5858.63it/s] 18%|████████▏ | 1777/10000 [00:00<00:01, 5921.87it/s] 24%|██████████▉ | 2385/10000 [00:00<00:01, 5960.03it/s] 30%|█████████████▊ | 2995/10000 [00:00<00:01, 5986.67it/s] 36%|████████████████▌ | 3604/10000 [00:00<00:01, 6002.68it/s] 42%|███████████████████▍ | 4213/10000 [00:00<00:00, 6014.75it/s] 48%|██████████████████████▏ | 4820/10000 [00:00<00:00, 6020.54it/s] 54%|████████████████████████▉ | 5425/10000 [00:00<00:00, 6023.06it/s] 60%|███████████████████████████▊ | 6037/10000 [00:01<00:00, 6032.24it/s] 66%|██████████████████████████████▌ | 6644/10000 [00:01<00:00, 6035.51it/s] 73%|█████████████████████████████████▎ | 7252/10000 [00:01<00:00, 6039.02it/s] 79%|████████████████████████████████████▏ | 7855/10000 [00:01<00:00, 6037.69it/s] 85%|██████████████████████████████████████▉ | 8459/10000 [00:01<00:00, 6037.69it/s] 91%|█████████████████████████████████████████▋ | 9063/10000 [00:01<00:00, 6037.35it/s] 97%|████████████████████████████████████████████▍ | 9668/10000 [00:01<00:00, 6038.05it/s]

View file

@ -0,0 +1,323 @@
absl-py==0.7.1
adal==1.2.1
advancedhtmlparser==8.1.6
aenum==2.1.2
altgraph==0.16.1
amqp==1.4.9
anyjson==0.3.3
apturl==0.5.2
asn1crypto==0.24.0
astor==0.8.0
atomicwrites==1.3.0
attrs==19.1.0
autopep8==1.3.3
azure-applicationinsights==0.1.0
azure-batch==4.1.3
azure-common==1.1.18
azure-cosmosdb-nspkg==2.0.2
azure-cosmosdb-table==1.0.5
azure-datalake-store==0.0.41
azure-eventgrid==1.2.0
azure-graphrbac==0.40.0
azure-keyvault==1.1.0
azure-loganalytics==0.1.0
azure-mgmt-advisor==1.0.1
azure-mgmt-applicationinsights==0.1.1
azure-mgmt-authorization==0.50.0
azure-mgmt-batch==5.0.1
azure-mgmt-batchai==2.0.0
azure-mgmt-billing==0.2.0
azure-mgmt-cdn==3.0.0
azure-mgmt-cognitiveservices==3.0.0
azure-mgmt-commerce==1.0.1
azure-mgmt-compute==4.4.0
azure-mgmt-consumption==2.0.0
azure-mgmt-containerinstance==1.4.0
azure-mgmt-containerregistry==2.7.0
azure-mgmt-containerservice==4.4.0
azure-mgmt-cosmosdb==0.4.1
azure-mgmt-datafactory==0.6.0
azure-mgmt-datalake-analytics==0.6.0
azure-mgmt-datalake-nspkg==3.0.1
azure-mgmt-datalake-store==0.5.0
azure-mgmt-datamigration==1.0.0
azure-mgmt-devspaces==0.1.0
azure-mgmt-devtestlabs==2.2.0
azure-mgmt-dns==2.1.0
azure-mgmt-eventgrid==1.0.0
azure-mgmt-eventhub==2.3.0
azure-mgmt-hanaonazure==0.1.1
azure-mgmt-iotcentral==0.1.0
azure-mgmt-iothub==0.5.0
azure-mgmt-iothubprovisioningservices==0.2.0
azure-mgmt-keyvault==1.1.0
azure-mgmt-loganalytics==0.2.0
azure-mgmt-logic==3.0.0
azure-mgmt-machinelearningcompute==0.4.1
azure-mgmt-managementgroups==0.1.0
azure-mgmt-managementpartner==0.1.0
azure-mgmt-maps==0.1.0
azure-mgmt-marketplaceordering==0.1.0
azure-mgmt-media==1.0.0
azure-mgmt-monitor==0.5.2
azure-mgmt-msi==0.2.0
azure-mgmt-network==2.5.1
azure-mgmt-notificationhubs==2.0.0
azure-mgmt-nspkg==3.0.2
azure-mgmt-policyinsights==0.1.0
azure-mgmt-powerbiembedded==2.0.0
azure-mgmt-rdbms==1.5.0
azure-mgmt-recoveryservices==0.3.0
azure-mgmt-recoveryservicesbackup==0.3.0
azure-mgmt-redis==5.0.0
azure-mgmt-relay==0.1.0
azure-mgmt-reservations==0.2.1
azure-mgmt-resource==2.1.0
azure-mgmt-scheduler==2.0.0
azure-mgmt-search==2.0.0
azure-mgmt-servicebus==0.5.3
azure-mgmt-servicefabric==0.2.0
azure-mgmt-signalr==0.1.1
azure-mgmt-sql==0.9.1
azure-mgmt-storage==2.0.0
azure-mgmt-subscription==0.2.0
azure-mgmt-trafficmanager==0.50.0
azure-mgmt-web==0.35.0
azure-mgmt==4.0.0
azure-nspkg==3.0.2
azure-servicebus==0.21.1
azure-servicefabric==6.3.0.0
azure-servicemanagement-legacy==0.20.6
azure-storage-blob==1.5.0
azure-storage-common==1.4.0
azure-storage-file==1.4.0
azure-storage-queue==1.4.0
azure==4.0.0
backcall==0.1.0
bcrypt==3.1.4
beautifulsoup4==4.6.0
billiard==3.3.0.23
binwalk==2.1.1
bleach==1.5.0
blinker==1.4
brlapi==0.6.6
browser-cookie3==0.6.4
celery==3.1.26.post2
certifi==2017.11.5
cffi==1.11.5
chardet==3.0.4
click==7.0
cloudpickle==1.2.1
command-not-found==0.3
configparser==4.0.2
cryptography==2.1.4
cupshelpers==1.0
cycler==0.10.0
dataclasses==0.6
dbf==0.97.11
dbfread==2.0.7
decorator==4.4.0
defer==1.0.6
defusedxml==0.5.0
distro-info==0.18ubuntu0.18.04.1
django-celery==3.2.2
django==2.1
djongo==1.2.29
docker-pycreds==0.4.0
docopt==0.6.2
docx==0.2.4
entrypoints==0.3
enum34==1.1.6
f.lux-indicator-applet==1.1.11-pre
feedparser==5.2.1
flask==1.0.2
future==0.16.0
gast==0.2.2
gephistreamer==2.0.3
gitdb2==2.0.6
gitpython==3.0.5
gql==0.2.0
graphql-core==1.1
grpcio==1.11.1
html2markdown==0.1.7
html5lib==0.9999999
httplib2==0.9.2
idna==2.6
ifaddr==0.1.4
imageio==2.5.0
importlib-metadata==0.19
ipaddress==1.0.22
ipykernel==5.1.1
ipython-genutils==0.2.0
ipython==7.5.0
ipywidgets==7.4.2
isodate==0.6.0
itsdangerous==1.1.0
jedi==0.13.3
jinja2==2.10.1
jsonschema==3.0.1
jupyter-client==5.2.4
jupyter-console==6.0.0
jupyter-core==4.4.0
jupyter==1.0.0
keyring==10.6.0
keyrings.alt==3.0
kiwisolver==1.0.1
kombu==3.0.37
language-selector==0.1
launchpadlib==1.10.6
lazr.restfulclient==0.13.5
lazr.uri==1.0.3
llvmlite==0.29.0
louis==3.5.0
lxml==4.2.1
macaroonbakery==1.1.3
macholib==1.11
mako==1.0.7
markdown==3.1.1
markupsafe==1.0
matplotlib==2.2.0
mechanize==0.2.5
mistune==0.8.4
mlagents-envs==0.9.1
mlagents==0.9.1
more-itertools==7.2.0
msrest==0.6.4
msrestazure==0.6.0
nbconvert==5.5.0
nbformat==4.4.0
netifaces==0.10.4
networkx==2.3
notebook==5.7.8
numba==0.44.1
numpy==1.14.2
nvidia-ml-py3==7.352.0
oauth==1.0.1
oauthlib==3.0.1
olefile==0.45.1
pandocfilters==1.4.2
paramiko==2.4.1
parso==0.4.0
pathtools==0.1.2
pbkdf2==1.3
pefile==2018.8.8
pexpect==4.2.1
pickleshare==0.7.5
pillow==5.2.0
pip==19.0.1
plotly==3.9.0
pluggy==0.12.0
pocketsphinx==0.1.3
prometheus-client==0.6.0
promise==2.3
prompt-toolkit==2.0.9
protobuf==3.6.1
psutil==5.6.7
psycopg2==2.7.5
ptyprocess==0.6.0
py==1.8.0
pyaes==1.6.1
pyasn1==0.4.2
pyaudio==0.2.11
pycairo==1.16.2
pycodestyle==2.3.1
pycparser==2.18
pycrypto==2.6.1
pycups==1.9.73
pydeepl==0.9
pyglet==1.4.1
pygments==2.3.1
pygobject==3.26.1
pyinstaller==3.4
pyjwt==1.5.3
pymacaroons==0.13.0
pymongo==3.7.1
pynacl==1.1.2
pyopengl==3.1.0
pyparsing==2.2.0
pypdf2==1.26.0
pyqtgraph==0.10.0
pyrfc3339==1.0
pyrsistent==0.15.2
pytest==3.10.1
python-apt==1.6.5+ubuntu0.2
python-dateutil==2.7.3
python-debian==0.1.32
python-docx==0.8.6
python-gitlab==1.3.0
pytz==2018.4
pywavelets==1.0.3
pyxdg==0.25
pyyaml==3.12
pyzmq==18.0.1
qtconsole==4.4.4
queryablelist==3.1.0
reportlab==3.4.0
requests-oauthlib==1.2.0
requests-unixsocket==0.1.5
requests==2.21.0
retrying==1.3.3
scikit-image==0.15.0
scipy==1.3.0
scour==0.36
screen-resolution-extra==0.0.0
secretstorage==2.3.1
selenium==3.7.0
send2trash==1.5.0
sentry-sdk==0.14.1
setuptools==38.4.0
shortuuid==0.5.0
simplejson==3.13.2
six==1.11.0
smmap2==2.0.5
speechrecognition==3.8.1
sqlparse==0.2.4
ssh-import-id==5.7
subprocess32==3.5.4
system-service==0.3
systemd-python==234
tensorboard==1.7.0
tensorflow==1.7.1
termcolor==1.1.0
terminado==0.8.2
testpath==0.4.2
torch==1.4.0
torchfile==0.1.0
torchvision==0.5.0
tornado==6.0.1
tqdm==4.23.4
traitlets==4.3.2
ubuntu-drivers-common==0.0.0
ufw==0.36
unattended-upgrades==0.1
unity-scope-calculator==0.1
unity-scope-chromiumbookmarks==0.1
unity-scope-colourlovers==0.1
unity-scope-devhelp==0.1
unity-scope-firefoxbookmarks==0.1
unity-scope-manpages==0.1
unity-scope-openclipart==0.1
unity-scope-texdoc==0.1
unity-scope-tomboy==0.1
unity-scope-virtualbox==0.1
unity-scope-yelp==0.1
unity-scope-zotero==0.1
urllib3==1.22
usb-creator==0.3.3
vine==1.1.4
visdom==0.1.8.8
wadllib==1.3.2
wakeonlan==1.1.6
wandb==0.8.22
watchdog==0.10.0
wcwidth==0.1.7
webencodings==0.5.1
websocket-client==0.55.0
werkzeug==0.15.2
wheel==0.30.0
widgetsnbextension==3.4.2
ws4py==0.5.1
xkit==0.0.0
zeroconf==0.21.3
zipp==0.5.2
zope.interface==4.3.2

View file

@ -0,0 +1 @@
{"system.gpu.0.gpu": 0.8, "system.gpu.0.memory": 0.8, "system.gpu.0.memoryAllocated": 8.6, "system.gpu.0.temp": 53.8, "system.gpu.0.powerWatts": 23.58, "system.gpu.0.powerPercent": 13.1, "system.cpu": 19.46, "system.memory": 23.18, "system.disk": 4.8, "system.proc.memory.availableMB": 6140.65, "system.proc.memory.rssMB": 1221.76, "system.proc.memory.percent": 15.29, "system.proc.cpu.threads": 3.8, "system.network.sent": 38056, "system.network.recv": 85146, "_wandb": true, "_timestamp": 1580220709, "_runtime": 6}

View file

@ -0,0 +1,25 @@
{
"root": "/home/clemens/Dokumente/repos/pytorch-ai",
"program": "pytorch_ai.py",
"git": {
"remote": "git@github.com:Clemens-Dautermann/pytorch-ai.git",
"commit": "56ee2635b5fec0a3976a4e7ddc55a89d4dea93bc"
},
"email": "clemens.dautermann@t-online.de",
"startedAt": "2020-01-28T14:11:42.792037",
"host": "clemens-ubuntu",
"username": "clemens",
"executable": "/usr/bin/python3",
"os": "Linux-4.15.0-58-generic-x86_64-with-Ubuntu-18.04-bionic",
"python": "3.6.9",
"gpu": "GeForce GTX 960",
"gpu_count": 1,
"cpu_count": 4,
"args": [],
"state": "killed",
"jobType": null,
"mode": "run",
"project": "tictactoe",
"heartbeatAt": "2020-01-28T14:11:50.607712",
"exitcode": 255
}