Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
probml
GitHub Repository: probml/pyprobml
Path: blob/master/notebooks/book2/35/supplementary/rl_demos_tf.ipynb
1193 views
Kernel: Python 3

Open In Colab

GitHub

Colab authors: Kevin P. Murphy ([email protected]) and Mahmoud Soliman ([email protected])

# Attribution # This notebook is based on the following: # https://github.com/mjsML/VizDoom-Keras-RL # https://colab.research.google.com/github/keras-team/keras-io/blob/master/examples/rl/ipynb/actor_critic_cartpole.ipynb
# Imports from tensorflow.python.client import device_lib from psutil import virtual_memory import cv2 from google.colab.patches import cv2_imshow %tensorflow_version 2.x import tensorflow as tf import os from sklearn.neighbors import KNeighborsClassifier as KNN from sklearn.model_selection import cross_val_score from sklearn.datasets.samples_generator import make_blobs from IPython import display from matplotlib import pyplot as plt import numpy as np import pathlib import shutil import tempfile from tqdm import tqdm
/usr/local/lib/python3.6/dist-packages/sklearn/utils/deprecation.py:144: FutureWarning: The sklearn.datasets.samples_generator module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.datasets. Anything that cannot be imported from sklearn.datasets is now part of the private API. warnings.warn(message, FutureWarning)
# title Hardware check def find_accelerator(): mem = virtual_memory() devices = device_lib.list_local_devices() RAM = "Physical RAM: {:.2f} GB".format(mem.total / (1024 * 1024 * 1024)) try: tpu = tf.distribute.cluster_resolver.TPUClusterResolver() device = ["TPU at " + str(tpu.cluster_spec().as_dict()["worker"])] except ValueError: device = [d.physical_device_desc for d in devices if d.device_type == "GPU"] if not device: return None, RAM return device, RAM a, r = find_accelerator() print("Please make sure that the statement below says Accelerator found") print("Accelerator found:", a, r)
Please make sure that the statement below says Accelerator found Accelerator found: ['device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5'] Physical RAM: 12.72 GB
#title Install the extra required packages if any # Installation of libs as per # https://stackoverflow.com/questions/50667565/how-to-install-vizdoom-using-google-colab %%bash # Install deps from # https://github.com/mwydmuch/ViZDoom/blob/master/doc/Building.md#-linux apt-get install build-essential zlib1g-dev libsdl2-dev libjpeg-dev \ nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \ libopenal-dev timidity libwildmidi-dev unzip apt-get install libboost-all-dev apt-get install liblua5.1-dev
Reading package lists... Building dependency tree... Reading state information... build-essential is already the newest version (12.4ubuntu1). libjpeg-dev is already the newest version (8c-2ubuntu8). libjpeg-dev set to manually installed. zlib1g-dev is already the newest version (1:1.2.11.dfsg-0ubuntu2). zlib1g-dev set to manually installed. cmake is already the newest version (3.10.2-1ubuntu2.18.04.1). git is already the newest version (1:2.17.1-1ubuntu0.7). libbz2-dev is already the newest version (1.0.6-8.1ubuntu0.2). libbz2-dev set to manually installed. tar is already the newest version (1.29b-2ubuntu0.1). unzip is already the newest version (6.0-21ubuntu1.1). The following additional packages will be installed: autoconf automake autopoint autotools-dev debhelper dh-autoreconf dh-strip-nondeterminism file freepats gettext gettext-base gir1.2-atk-1.0 gir1.2-freedesktop gir1.2-gdkpixbuf-2.0 gir1.2-gtk-2.0 gir1.2-ibus-1.0 gir1.2-pango-1.0 intltool-debian libarchive-cpio-perl libarchive-zip-perl libatk1.0-dev libaudio2 libcairo-script-interpreter2 libcairo2-dev libcapnp-0.6.1 libdbus-1-dev libfile-stripnondeterminism-perl libfluidsynth1 libgail-common libgail18 libgdk-pixbuf2.0-dev libgtk2.0-0 libgtk2.0-bin libgtk2.0-common libibus-1.0-5 libibus-1.0-dev libmagic-mgc libmagic1 libmail-sendmail-perl libmirclient-dev libmirclient9 libmircommon-dev libmircommon7 libmircookie-dev libmircookie2 libmircore-dev libmircore1 libmirprotobuf3 libpango1.0-dev libpangoxft-1.0-0 libpixman-1-dev libprotobuf-dev libprotobuf-lite10 libpulse-dev libpulse-mainloop-glib0 libsigsegv2 libsndio-dev libsys-hostname-long-perl libtimedate-perl libtool libudev-dev libwildmidi-config libwildmidi2 libxcb-shm0-dev libxcomposite-dev libxcursor-dev libxinerama-dev libxkbcommon-dev libxml2-utils libxrandr-dev libxv-dev m4 po-debconf timidity-daemon x11proto-composite-dev x11proto-randr-dev x11proto-xinerama-dev Suggested packages: autoconf-archive gnu-standards autoconf-doc dh-make dwz gettext-doc libasprintf-dev libgettextpo-dev nas libcairo2-doc fluidr3mono-gm-soundfont | timgm6mb-soundfont | fluid-soundfont-gm gvfs libgtk2.0-doc imagemagick libpango1.0-doc libtool-doc gcj-jdk m4-doc libmail-box-perl fluid-soundfont-gm fluid-soundfont-gs pmidi The following NEW packages will be installed: autoconf automake autopoint autotools-dev debhelper dh-autoreconf dh-strip-nondeterminism file freepats gettext gettext-base gir1.2-atk-1.0 gir1.2-freedesktop gir1.2-gdkpixbuf-2.0 gir1.2-gtk-2.0 gir1.2-ibus-1.0 gir1.2-pango-1.0 intltool-debian libarchive-cpio-perl libarchive-zip-perl libatk1.0-dev libaudio2 libcairo-script-interpreter2 libcairo2-dev libcapnp-0.6.1 libdbus-1-dev libfile-stripnondeterminism-perl libfluidsynth-dev libfluidsynth1 libgail-common libgail18 libgdk-pixbuf2.0-dev libgme-dev libgtk2.0-0 libgtk2.0-bin libgtk2.0-common libgtk2.0-dev libibus-1.0-5 libibus-1.0-dev libmagic-mgc libmagic1 libmail-sendmail-perl libmirclient-dev libmirclient9 libmircommon-dev libmircommon7 libmircookie-dev libmircookie2 libmircore-dev libmircore1 libmirprotobuf3 libopenal-dev libpango1.0-dev libpangoxft-1.0-0 libpixman-1-dev libprotobuf-dev libprotobuf-lite10 libpulse-dev libpulse-mainloop-glib0 libsdl2-dev libsigsegv2 libsndio-dev libsys-hostname-long-perl libtimedate-perl libtool libudev-dev libwildmidi-config libwildmidi-dev libwildmidi2 libxcb-shm0-dev libxcomposite-dev libxcursor-dev libxinerama-dev libxkbcommon-dev libxml2-utils libxrandr-dev libxv-dev m4 nasm po-debconf timidity timidity-daemon x11proto-composite-dev x11proto-randr-dev x11proto-xinerama-dev 0 upgraded, 85 newly installed, 0 to remove and 15 not upgraded. Need to get 45.2 MB of archives. After this operation, 121 MB of additional disk space will be used. Get:1 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libmagic-mgc amd64 1:5.32-2ubuntu0.4 [184 kB] Get:2 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libmagic1 amd64 1:5.32-2ubuntu0.4 [68.6 kB] Get:3 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 file amd64 1:5.32-2ubuntu0.4 [22.1 kB] Get:4 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 gettext-base amd64 0.19.8.1-6ubuntu0.3 [113 kB] Get:5 http://archive.ubuntu.com/ubuntu bionic/main amd64 libsigsegv2 amd64 2.12-1 [14.7 kB] Get:6 http://archive.ubuntu.com/ubuntu bionic/main amd64 m4 amd64 1.4.18-1 [197 kB] Get:7 http://archive.ubuntu.com/ubuntu bionic/main amd64 autoconf all 2.69-11 [322 kB] Get:8 http://archive.ubuntu.com/ubuntu bionic/main amd64 autotools-dev all 20180224.1 [39.6 kB] Get:9 http://archive.ubuntu.com/ubuntu bionic/main amd64 automake all 1:1.15.1-3ubuntu2 [509 kB] Get:10 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 autopoint all 0.19.8.1-6ubuntu0.3 [426 kB] Get:11 http://archive.ubuntu.com/ubuntu bionic/main amd64 libtool all 2.4.6-2 [194 kB] Get:12 http://archive.ubuntu.com/ubuntu bionic/main amd64 dh-autoreconf all 17 [15.8 kB] Get:13 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libarchive-zip-perl all 1.60-1ubuntu0.1 [84.6 kB] Get:14 http://archive.ubuntu.com/ubuntu bionic/main amd64 libfile-stripnondeterminism-perl all 0.040-1.1~build1 [13.8 kB] Get:15 http://archive.ubuntu.com/ubuntu bionic/main amd64 libtimedate-perl all 2.3000-2 [37.5 kB] Get:16 http://archive.ubuntu.com/ubuntu bionic/main amd64 dh-strip-nondeterminism all 0.040-1.1~build1 [5,208 B] Get:17 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 gettext amd64 0.19.8.1-6ubuntu0.3 [1,293 kB] Get:18 http://archive.ubuntu.com/ubuntu bionic/main amd64 intltool-debian all 0.35.0+20060710.4 [24.9 kB] Get:19 http://archive.ubuntu.com/ubuntu bionic/main amd64 po-debconf all 1.0.20 [232 kB] Get:20 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 debhelper all 11.1.6ubuntu2 [902 kB] Get:21 http://archive.ubuntu.com/ubuntu bionic/universe amd64 freepats all 20060219-1 [29.0 MB] Get:22 http://archive.ubuntu.com/ubuntu bionic/main amd64 gir1.2-atk-1.0 amd64 2.28.1-1 [17.8 kB] Get:23 http://archive.ubuntu.com/ubuntu bionic/main amd64 gir1.2-freedesktop amd64 1.56.1-1 [9,080 B] Get:24 http://archive.ubuntu.com/ubuntu bionic/main amd64 gir1.2-gdkpixbuf-2.0 amd64 2.36.11-2 [7,748 B] Get:25 http://archive.ubuntu.com/ubuntu bionic/main amd64 libgtk2.0-common all 2.24.32-1ubuntu1 [125 kB] Get:26 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpangoxft-1.0-0 amd64 1.40.14-1ubuntu0.1 [15.0 kB] Get:27 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 gir1.2-pango-1.0 amd64 1.40.14-1ubuntu0.1 [21.6 kB] Get:28 http://archive.ubuntu.com/ubuntu bionic/main amd64 libgtk2.0-0 amd64 2.24.32-1ubuntu1 [1,769 kB] Get:29 http://archive.ubuntu.com/ubuntu bionic/main amd64 gir1.2-gtk-2.0 amd64 2.24.32-1ubuntu1 [172 kB] Get:30 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libibus-1.0-5 amd64 1.5.17-3ubuntu5.3 [133 kB] Get:31 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 gir1.2-ibus-1.0 amd64 1.5.17-3ubuntu5.3 [66.5 kB] Get:32 http://archive.ubuntu.com/ubuntu bionic/main amd64 libarchive-cpio-perl all 0.10-1 [9,644 B] Get:33 http://archive.ubuntu.com/ubuntu bionic/main amd64 libatk1.0-dev amd64 2.28.1-1 [79.9 kB] Get:34 http://archive.ubuntu.com/ubuntu bionic/main amd64 libaudio2 amd64 1.9.4-6 [50.3 kB] Get:35 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libcairo-script-interpreter2 amd64 1.15.10-2ubuntu0.1 [53.5 kB] Get:36 http://archive.ubuntu.com/ubuntu bionic/main amd64 libpixman-1-dev amd64 0.34.0-2 [244 kB] Get:37 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libxcb-shm0-dev amd64 1.13-2~ubuntu18.04 [6,684 B] Get:38 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libcairo2-dev amd64 1.15.10-2ubuntu0.1 [626 kB] Get:39 http://archive.ubuntu.com/ubuntu bionic/main amd64 libcapnp-0.6.1 amd64 0.6.1-1ubuntu1 [658 kB] Get:40 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libdbus-1-dev amd64 1.12.2-1ubuntu1.2 [165 kB] Get:41 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libfluidsynth1 amd64 1.1.9-1 [137 kB] Get:42 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libfluidsynth-dev amd64 1.1.9-1 [19.7 kB] Get:43 http://archive.ubuntu.com/ubuntu bionic/main amd64 libgail18 amd64 2.24.32-1ubuntu1 [14.2 kB] Get:44 http://archive.ubuntu.com/ubuntu bionic/main amd64 libgail-common amd64 2.24.32-1ubuntu1 [112 kB] Get:45 http://archive.ubuntu.com/ubuntu bionic/main amd64 libgdk-pixbuf2.0-dev amd64 2.36.11-2 [46.8 kB] Get:46 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgme-dev amd64 0.6.2-1 [5,796 B] Get:47 http://archive.ubuntu.com/ubuntu bionic/main amd64 libgtk2.0-bin amd64 2.24.32-1ubuntu1 [7,536 B] Get:48 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpango1.0-dev amd64 1.40.14-1ubuntu0.1 [288 kB] Get:49 http://archive.ubuntu.com/ubuntu bionic/main amd64 x11proto-xinerama-dev all 2018.4-4 [2,628 B] Get:50 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxinerama-dev amd64 2:1.1.3-1 [8,404 B] Get:51 http://archive.ubuntu.com/ubuntu bionic/main amd64 x11proto-randr-dev all 2018.4-4 [2,620 B] Get:52 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxrandr-dev amd64 2:1.5.1-1 [24.0 kB] Get:53 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxcursor-dev amd64 1:1.1.15-1 [26.5 kB] Get:54 http://archive.ubuntu.com/ubuntu bionic/main amd64 x11proto-composite-dev all 1:2018.4-4 [2,620 B] Get:55 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxcomposite-dev amd64 1:0.4.4-2 [9,136 B] Get:56 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libxml2-utils amd64 2.9.4+dfsg1-6.1ubuntu1.3 [35.9 kB] Get:57 http://archive.ubuntu.com/ubuntu bionic/main amd64 libgtk2.0-dev amd64 2.24.32-1ubuntu1 [2,652 kB] Get:58 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libibus-1.0-dev amd64 1.5.17-3ubuntu5.3 [145 kB] Get:59 http://archive.ubuntu.com/ubuntu bionic/main amd64 libsys-hostname-long-perl all 1.5-1 [11.7 kB] Get:60 http://archive.ubuntu.com/ubuntu bionic/main amd64 libmail-sendmail-perl all 0.80-1 [22.6 kB] Get:61 http://archive.ubuntu.com/ubuntu bionic/main amd64 libmircore1 amd64 0.31.1-0ubuntu1 [26.5 kB] Get:62 http://archive.ubuntu.com/ubuntu bionic/main amd64 libmircommon7 amd64 0.31.1-0ubuntu1 [73.9 kB] Get:63 http://archive.ubuntu.com/ubuntu bionic/main amd64 libprotobuf-lite10 amd64 3.0.0-9.1ubuntu1 [97.7 kB] Get:64 http://archive.ubuntu.com/ubuntu bionic/main amd64 libmirprotobuf3 amd64 0.31.1-0ubuntu1 [127 kB] Get:65 http://archive.ubuntu.com/ubuntu bionic/main amd64 libmirclient9 amd64 0.31.1-0ubuntu1 [199 kB] Get:66 http://archive.ubuntu.com/ubuntu bionic/main amd64 libmircore-dev amd64 0.31.1-0ubuntu1 [21.7 kB] Get:67 http://archive.ubuntu.com/ubuntu bionic/main amd64 libprotobuf-dev amd64 3.0.0-9.1ubuntu1 [959 kB] Get:68 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libxkbcommon-dev amd64 0.8.2-1~ubuntu18.04.1 [150 kB] Get:69 http://archive.ubuntu.com/ubuntu bionic/main amd64 libmircommon-dev amd64 0.31.1-0ubuntu1 [13.9 kB] Get:70 http://archive.ubuntu.com/ubuntu bionic/main amd64 libmircookie2 amd64 0.31.1-0ubuntu1 [19.7 kB] Get:71 http://archive.ubuntu.com/ubuntu bionic/main amd64 libmircookie-dev amd64 0.31.1-0ubuntu1 [4,392 B] Get:72 http://archive.ubuntu.com/ubuntu bionic/main amd64 libmirclient-dev amd64 0.31.1-0ubuntu1 [47.8 kB] Get:73 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libopenal-dev amd64 1:1.18.2-2 [20.9 kB] Get:74 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpulse-mainloop-glib0 amd64 1:11.1-1ubuntu7.11 [22.1 kB] Get:75 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libpulse-dev amd64 1:11.1-1ubuntu7.11 [81.5 kB] Get:76 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libsndio-dev amd64 1.1.0-3 [13.3 kB] Get:77 http://archive.ubuntu.com/ubuntu bionic-updates/main amd64 libudev-dev amd64 237-3ubuntu10.43 [19.1 kB] Get:78 http://archive.ubuntu.com/ubuntu bionic/main amd64 libxv-dev amd64 2:1.0.11-1 [32.5 kB] Get:79 http://archive.ubuntu.com/ubuntu bionic-updates/universe amd64 libsdl2-dev amd64 2.0.8+dfsg1-1ubuntu1.18.04.4 [683 kB] Get:80 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libwildmidi-config all 0.4.2-1 [7,212 B] Get:81 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libwildmidi2 amd64 0.4.2-1 [55.8 kB] Get:82 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libwildmidi-dev amd64 0.4.2-1 [86.4 kB] Get:83 http://archive.ubuntu.com/ubuntu bionic/universe amd64 nasm amd64 2.13.02-0.1 [359 kB] Get:84 http://archive.ubuntu.com/ubuntu bionic/universe amd64 timidity amd64 2.13.2-41 [585 kB] Get:85 http://archive.ubuntu.com/ubuntu bionic/universe amd64 timidity-daemon all 2.13.2-41 [5,984 B] Fetched 45.2 MB in 3s (17.4 MB/s) Selecting previously unselected package libmagic-mgc. (Reading database ... 145480 files and directories currently installed.) Preparing to unpack .../00-libmagic-mgc_1%3a5.32-2ubuntu0.4_amd64.deb ... Unpacking libmagic-mgc (1:5.32-2ubuntu0.4) ... Selecting previously unselected package libmagic1:amd64. Preparing to unpack .../01-libmagic1_1%3a5.32-2ubuntu0.4_amd64.deb ... Unpacking libmagic1:amd64 (1:5.32-2ubuntu0.4) ... Selecting previously unselected package file. Preparing to unpack .../02-file_1%3a5.32-2ubuntu0.4_amd64.deb ... Unpacking file (1:5.32-2ubuntu0.4) ... Selecting previously unselected package gettext-base. Preparing to unpack .../03-gettext-base_0.19.8.1-6ubuntu0.3_amd64.deb ... Unpacking gettext-base (0.19.8.1-6ubuntu0.3) ... Selecting previously unselected package libsigsegv2:amd64. Preparing to unpack .../04-libsigsegv2_2.12-1_amd64.deb ... Unpacking libsigsegv2:amd64 (2.12-1) ... Selecting previously unselected package m4. Preparing to unpack .../05-m4_1.4.18-1_amd64.deb ... Unpacking m4 (1.4.18-1) ... Selecting previously unselected package autoconf. Preparing to unpack .../06-autoconf_2.69-11_all.deb ... Unpacking autoconf (2.69-11) ... Selecting previously unselected package autotools-dev. Preparing to unpack .../07-autotools-dev_20180224.1_all.deb ... Unpacking autotools-dev (20180224.1) ... Selecting previously unselected package automake. Preparing to unpack .../08-automake_1%3a1.15.1-3ubuntu2_all.deb ... Unpacking automake (1:1.15.1-3ubuntu2) ... Selecting previously unselected package autopoint. Preparing to unpack .../09-autopoint_0.19.8.1-6ubuntu0.3_all.deb ... Unpacking autopoint (0.19.8.1-6ubuntu0.3) ... Selecting previously unselected package libtool. Preparing to unpack .../10-libtool_2.4.6-2_all.deb ... Unpacking libtool (2.4.6-2) ... Selecting previously unselected package dh-autoreconf. Preparing to unpack .../11-dh-autoreconf_17_all.deb ... Unpacking dh-autoreconf (17) ... Selecting previously unselected package libarchive-zip-perl. Preparing to unpack .../12-libarchive-zip-perl_1.60-1ubuntu0.1_all.deb ... Unpacking libarchive-zip-perl (1.60-1ubuntu0.1) ... Selecting previously unselected package libfile-stripnondeterminism-perl. Preparing to unpack .../13-libfile-stripnondeterminism-perl_0.040-1.1~build1_all.deb ... Unpacking libfile-stripnondeterminism-perl (0.040-1.1~build1) ... Selecting previously unselected package libtimedate-perl. Preparing to unpack .../14-libtimedate-perl_2.3000-2_all.deb ... Unpacking libtimedate-perl (2.3000-2) ... Selecting previously unselected package dh-strip-nondeterminism. Preparing to unpack .../15-dh-strip-nondeterminism_0.040-1.1~build1_all.deb ... Unpacking dh-strip-nondeterminism (0.040-1.1~build1) ... Selecting previously unselected package gettext. Preparing to unpack .../16-gettext_0.19.8.1-6ubuntu0.3_amd64.deb ... Unpacking gettext (0.19.8.1-6ubuntu0.3) ... Selecting previously unselected package intltool-debian. Preparing to unpack .../17-intltool-debian_0.35.0+20060710.4_all.deb ... Unpacking intltool-debian (0.35.0+20060710.4) ... Selecting previously unselected package po-debconf. Preparing to unpack .../18-po-debconf_1.0.20_all.deb ... Unpacking po-debconf (1.0.20) ... Selecting previously unselected package debhelper. Preparing to unpack .../19-debhelper_11.1.6ubuntu2_all.deb ... Unpacking debhelper (11.1.6ubuntu2) ... Selecting previously unselected package freepats. Preparing to unpack .../20-freepats_20060219-1_all.deb ... Unpacking freepats (20060219-1) ... Selecting previously unselected package gir1.2-atk-1.0:amd64. Preparing to unpack .../21-gir1.2-atk-1.0_2.28.1-1_amd64.deb ... Unpacking gir1.2-atk-1.0:amd64 (2.28.1-1) ... Selecting previously unselected package gir1.2-freedesktop:amd64. Preparing to unpack .../22-gir1.2-freedesktop_1.56.1-1_amd64.deb ... Unpacking gir1.2-freedesktop:amd64 (1.56.1-1) ... Selecting previously unselected package gir1.2-gdkpixbuf-2.0:amd64. Preparing to unpack .../23-gir1.2-gdkpixbuf-2.0_2.36.11-2_amd64.deb ... Unpacking gir1.2-gdkpixbuf-2.0:amd64 (2.36.11-2) ... Selecting previously unselected package libgtk2.0-common. Preparing to unpack .../24-libgtk2.0-common_2.24.32-1ubuntu1_all.deb ... Unpacking libgtk2.0-common (2.24.32-1ubuntu1) ... Selecting previously unselected package libpangoxft-1.0-0:amd64. Preparing to unpack .../25-libpangoxft-1.0-0_1.40.14-1ubuntu0.1_amd64.deb ... Unpacking libpangoxft-1.0-0:amd64 (1.40.14-1ubuntu0.1) ... Selecting previously unselected package gir1.2-pango-1.0:amd64. Preparing to unpack .../26-gir1.2-pango-1.0_1.40.14-1ubuntu0.1_amd64.deb ... Unpacking gir1.2-pango-1.0:amd64 (1.40.14-1ubuntu0.1) ... Selecting previously unselected package libgtk2.0-0:amd64. Preparing to unpack .../27-libgtk2.0-0_2.24.32-1ubuntu1_amd64.deb ... Unpacking libgtk2.0-0:amd64 (2.24.32-1ubuntu1) ... Selecting previously unselected package gir1.2-gtk-2.0. Preparing to unpack .../28-gir1.2-gtk-2.0_2.24.32-1ubuntu1_amd64.deb ... Unpacking gir1.2-gtk-2.0 (2.24.32-1ubuntu1) ... Selecting previously unselected package libibus-1.0-5:amd64. Preparing to unpack .../29-libibus-1.0-5_1.5.17-3ubuntu5.3_amd64.deb ... Unpacking libibus-1.0-5:amd64 (1.5.17-3ubuntu5.3) ... Selecting previously unselected package gir1.2-ibus-1.0:amd64. Preparing to unpack .../30-gir1.2-ibus-1.0_1.5.17-3ubuntu5.3_amd64.deb ... Unpacking gir1.2-ibus-1.0:amd64 (1.5.17-3ubuntu5.3) ... Selecting previously unselected package libarchive-cpio-perl. Preparing to unpack .../31-libarchive-cpio-perl_0.10-1_all.deb ... Unpacking libarchive-cpio-perl (0.10-1) ... Selecting previously unselected package libatk1.0-dev:amd64. Preparing to unpack .../32-libatk1.0-dev_2.28.1-1_amd64.deb ... Unpacking libatk1.0-dev:amd64 (2.28.1-1) ... Selecting previously unselected package libaudio2:amd64. Preparing to unpack .../33-libaudio2_1.9.4-6_amd64.deb ... Unpacking libaudio2:amd64 (1.9.4-6) ... Selecting previously unselected package libcairo-script-interpreter2:amd64. Preparing to unpack .../34-libcairo-script-interpreter2_1.15.10-2ubuntu0.1_amd64.deb ... Unpacking libcairo-script-interpreter2:amd64 (1.15.10-2ubuntu0.1) ... Selecting previously unselected package libpixman-1-dev:amd64. Preparing to unpack .../35-libpixman-1-dev_0.34.0-2_amd64.deb ... Unpacking libpixman-1-dev:amd64 (0.34.0-2) ... Selecting previously unselected package libxcb-shm0-dev:amd64. Preparing to unpack .../36-libxcb-shm0-dev_1.13-2~ubuntu18.04_amd64.deb ... Unpacking libxcb-shm0-dev:amd64 (1.13-2~ubuntu18.04) ... Selecting previously unselected package libcairo2-dev:amd64. Preparing to unpack .../37-libcairo2-dev_1.15.10-2ubuntu0.1_amd64.deb ... Unpacking libcairo2-dev:amd64 (1.15.10-2ubuntu0.1) ... Selecting previously unselected package libcapnp-0.6.1:amd64. Preparing to unpack .../38-libcapnp-0.6.1_0.6.1-1ubuntu1_amd64.deb ... Unpacking libcapnp-0.6.1:amd64 (0.6.1-1ubuntu1) ... Selecting previously unselected package libdbus-1-dev:amd64. Preparing to unpack .../39-libdbus-1-dev_1.12.2-1ubuntu1.2_amd64.deb ... Unpacking libdbus-1-dev:amd64 (1.12.2-1ubuntu1.2) ... Selecting previously unselected package libfluidsynth1:amd64. Preparing to unpack .../40-libfluidsynth1_1.1.9-1_amd64.deb ... Unpacking libfluidsynth1:amd64 (1.1.9-1) ... Selecting previously unselected package libfluidsynth-dev:amd64. Preparing to unpack .../41-libfluidsynth-dev_1.1.9-1_amd64.deb ... Unpacking libfluidsynth-dev:amd64 (1.1.9-1) ... Selecting previously unselected package libgail18:amd64. Preparing to unpack .../42-libgail18_2.24.32-1ubuntu1_amd64.deb ... Unpacking libgail18:amd64 (2.24.32-1ubuntu1) ... Selecting previously unselected package libgail-common:amd64. Preparing to unpack .../43-libgail-common_2.24.32-1ubuntu1_amd64.deb ... Unpacking libgail-common:amd64 (2.24.32-1ubuntu1) ... Selecting previously unselected package libgdk-pixbuf2.0-dev. Preparing to unpack .../44-libgdk-pixbuf2.0-dev_2.36.11-2_amd64.deb ... Unpacking libgdk-pixbuf2.0-dev (2.36.11-2) ... Selecting previously unselected package libgme-dev:amd64. Preparing to unpack .../45-libgme-dev_0.6.2-1_amd64.deb ... Unpacking libgme-dev:amd64 (0.6.2-1) ... Selecting previously unselected package libgtk2.0-bin. Preparing to unpack .../46-libgtk2.0-bin_2.24.32-1ubuntu1_amd64.deb ... Unpacking libgtk2.0-bin (2.24.32-1ubuntu1) ... Selecting previously unselected package libpango1.0-dev. Preparing to unpack .../47-libpango1.0-dev_1.40.14-1ubuntu0.1_amd64.deb ... Unpacking libpango1.0-dev (1.40.14-1ubuntu0.1) ... Selecting previously unselected package x11proto-xinerama-dev. Preparing to unpack .../48-x11proto-xinerama-dev_2018.4-4_all.deb ... Unpacking x11proto-xinerama-dev (2018.4-4) ... Selecting previously unselected package libxinerama-dev:amd64. Preparing to unpack .../49-libxinerama-dev_2%3a1.1.3-1_amd64.deb ... Unpacking libxinerama-dev:amd64 (2:1.1.3-1) ... Selecting previously unselected package x11proto-randr-dev. Preparing to unpack .../50-x11proto-randr-dev_2018.4-4_all.deb ... Unpacking x11proto-randr-dev (2018.4-4) ... Selecting previously unselected package libxrandr-dev:amd64. Preparing to unpack .../51-libxrandr-dev_2%3a1.5.1-1_amd64.deb ... Unpacking libxrandr-dev:amd64 (2:1.5.1-1) ... Selecting previously unselected package libxcursor-dev:amd64. Preparing to unpack .../52-libxcursor-dev_1%3a1.1.15-1_amd64.deb ... Unpacking libxcursor-dev:amd64 (1:1.1.15-1) ... Selecting previously unselected package x11proto-composite-dev. Preparing to unpack .../53-x11proto-composite-dev_1%3a2018.4-4_all.deb ... Unpacking x11proto-composite-dev (1:2018.4-4) ... Selecting previously unselected package libxcomposite-dev:amd64. Preparing to unpack .../54-libxcomposite-dev_1%3a0.4.4-2_amd64.deb ... Unpacking libxcomposite-dev:amd64 (1:0.4.4-2) ... Selecting previously unselected package libxml2-utils. Preparing to unpack .../55-libxml2-utils_2.9.4+dfsg1-6.1ubuntu1.3_amd64.deb ... Unpacking libxml2-utils (2.9.4+dfsg1-6.1ubuntu1.3) ... Selecting previously unselected package libgtk2.0-dev. Preparing to unpack .../56-libgtk2.0-dev_2.24.32-1ubuntu1_amd64.deb ... Unpacking libgtk2.0-dev (2.24.32-1ubuntu1) ... Selecting previously unselected package libibus-1.0-dev:amd64. Preparing to unpack .../57-libibus-1.0-dev_1.5.17-3ubuntu5.3_amd64.deb ... Unpacking libibus-1.0-dev:amd64 (1.5.17-3ubuntu5.3) ... Selecting previously unselected package libsys-hostname-long-perl. Preparing to unpack .../58-libsys-hostname-long-perl_1.5-1_all.deb ... Unpacking libsys-hostname-long-perl (1.5-1) ... Selecting previously unselected package libmail-sendmail-perl. Preparing to unpack .../59-libmail-sendmail-perl_0.80-1_all.deb ... Unpacking libmail-sendmail-perl (0.80-1) ... Selecting previously unselected package libmircore1:amd64. Preparing to unpack .../60-libmircore1_0.31.1-0ubuntu1_amd64.deb ... Unpacking libmircore1:amd64 (0.31.1-0ubuntu1) ... Selecting previously unselected package libmircommon7:amd64. Preparing to unpack .../61-libmircommon7_0.31.1-0ubuntu1_amd64.deb ... Unpacking libmircommon7:amd64 (0.31.1-0ubuntu1) ... Selecting previously unselected package libprotobuf-lite10:amd64. Preparing to unpack .../62-libprotobuf-lite10_3.0.0-9.1ubuntu1_amd64.deb ... Unpacking libprotobuf-lite10:amd64 (3.0.0-9.1ubuntu1) ... Selecting previously unselected package libmirprotobuf3:amd64. Preparing to unpack .../63-libmirprotobuf3_0.31.1-0ubuntu1_amd64.deb ... Unpacking libmirprotobuf3:amd64 (0.31.1-0ubuntu1) ... Selecting previously unselected package libmirclient9:amd64. Preparing to unpack .../64-libmirclient9_0.31.1-0ubuntu1_amd64.deb ... Unpacking libmirclient9:amd64 (0.31.1-0ubuntu1) ... Selecting previously unselected package libmircore-dev:amd64. Preparing to unpack .../65-libmircore-dev_0.31.1-0ubuntu1_amd64.deb ... Unpacking libmircore-dev:amd64 (0.31.1-0ubuntu1) ... Selecting previously unselected package libprotobuf-dev:amd64. Preparing to unpack .../66-libprotobuf-dev_3.0.0-9.1ubuntu1_amd64.deb ... Unpacking libprotobuf-dev:amd64 (3.0.0-9.1ubuntu1) ... Selecting previously unselected package libxkbcommon-dev:amd64. Preparing to unpack .../67-libxkbcommon-dev_0.8.2-1~ubuntu18.04.1_amd64.deb ... Unpacking libxkbcommon-dev:amd64 (0.8.2-1~ubuntu18.04.1) ... Selecting previously unselected package libmircommon-dev:amd64. Preparing to unpack .../68-libmircommon-dev_0.31.1-0ubuntu1_amd64.deb ... Unpacking libmircommon-dev:amd64 (0.31.1-0ubuntu1) ... Selecting previously unselected package libmircookie2:amd64. Preparing to unpack .../69-libmircookie2_0.31.1-0ubuntu1_amd64.deb ... Unpacking libmircookie2:amd64 (0.31.1-0ubuntu1) ... Selecting previously unselected package libmircookie-dev:amd64. Preparing to unpack .../70-libmircookie-dev_0.31.1-0ubuntu1_amd64.deb ... Unpacking libmircookie-dev:amd64 (0.31.1-0ubuntu1) ... Selecting previously unselected package libmirclient-dev:amd64. Preparing to unpack .../71-libmirclient-dev_0.31.1-0ubuntu1_amd64.deb ... Unpacking libmirclient-dev:amd64 (0.31.1-0ubuntu1) ... Selecting previously unselected package libopenal-dev:amd64. Preparing to unpack .../72-libopenal-dev_1%3a1.18.2-2_amd64.deb ... Unpacking libopenal-dev:amd64 (1:1.18.2-2) ... Selecting previously unselected package libpulse-mainloop-glib0:amd64. Preparing to unpack .../73-libpulse-mainloop-glib0_1%3a11.1-1ubuntu7.11_amd64.deb ... Unpacking libpulse-mainloop-glib0:amd64 (1:11.1-1ubuntu7.11) ... Selecting previously unselected package libpulse-dev:amd64. Preparing to unpack .../74-libpulse-dev_1%3a11.1-1ubuntu7.11_amd64.deb ... Unpacking libpulse-dev:amd64 (1:11.1-1ubuntu7.11) ... Selecting previously unselected package libsndio-dev:amd64. Preparing to unpack .../75-libsndio-dev_1.1.0-3_amd64.deb ... Unpacking libsndio-dev:amd64 (1.1.0-3) ... Selecting previously unselected package libudev-dev:amd64. Preparing to unpack .../76-libudev-dev_237-3ubuntu10.43_amd64.deb ... Unpacking libudev-dev:amd64 (237-3ubuntu10.43) ... Selecting previously unselected package libxv-dev:amd64. Preparing to unpack .../77-libxv-dev_2%3a1.0.11-1_amd64.deb ... Unpacking libxv-dev:amd64 (2:1.0.11-1) ... Selecting previously unselected package libsdl2-dev:amd64. Preparing to unpack .../78-libsdl2-dev_2.0.8+dfsg1-1ubuntu1.18.04.4_amd64.deb ... Unpacking libsdl2-dev:amd64 (2.0.8+dfsg1-1ubuntu1.18.04.4) ... Selecting previously unselected package libwildmidi-config. Preparing to unpack .../79-libwildmidi-config_0.4.2-1_all.deb ... Unpacking libwildmidi-config (0.4.2-1) ... Selecting previously unselected package libwildmidi2:amd64. Preparing to unpack .../80-libwildmidi2_0.4.2-1_amd64.deb ... Unpacking libwildmidi2:amd64 (0.4.2-1) ... Selecting previously unselected package libwildmidi-dev. Preparing to unpack .../81-libwildmidi-dev_0.4.2-1_amd64.deb ... Unpacking libwildmidi-dev (0.4.2-1) ... Selecting previously unselected package nasm. Preparing to unpack .../82-nasm_2.13.02-0.1_amd64.deb ... Unpacking nasm (2.13.02-0.1) ... Selecting previously unselected package timidity. Preparing to unpack .../83-timidity_2.13.2-41_amd64.deb ... Unpacking timidity (2.13.2-41) ... Selecting previously unselected package timidity-daemon. Preparing to unpack .../84-timidity-daemon_2.13.2-41_all.deb ... Unpacking timidity-daemon (2.13.2-41) ... Setting up libdbus-1-dev:amd64 (1.12.2-1ubuntu1.2) ... Setting up libxcursor-dev:amd64 (1:1.1.15-1) ... Setting up gir1.2-atk-1.0:amd64 (2.28.1-1) ... Setting up libgtk2.0-common (2.24.32-1ubuntu1) ... Setting up libxkbcommon-dev:amd64 (0.8.2-1~ubuntu18.04.1) ... Setting up libpulse-mainloop-glib0:amd64 (1:11.1-1ubuntu7.11) ... Setting up libpulse-dev:amd64 (1:11.1-1ubuntu7.11) ... Setting up libarchive-zip-perl (1.60-1ubuntu0.1) ... Setting up libmircore-dev:amd64 (0.31.1-0ubuntu1) ... Setting up libtimedate-perl (2.3000-2) ... Setting up libcairo-script-interpreter2:amd64 (1.15.10-2ubuntu0.1) ... Setting up libsigsegv2:amd64 (2.12-1) ... Setting up libgme-dev:amd64 (0.6.2-1) ... Setting up gir1.2-freedesktop:amd64 (1.56.1-1) ... Setting up libsndio-dev:amd64 (1.1.0-3) ... Setting up libxcb-shm0-dev:amd64 (1.13-2~ubuntu18.04) ... Setting up libpangoxft-1.0-0:amd64 (1.40.14-1ubuntu0.1) ... Setting up libxml2-utils (2.9.4+dfsg1-6.1ubuntu1.3) ... Setting up libarchive-cpio-perl (0.10-1) ... Setting up gir1.2-gdkpixbuf-2.0:amd64 (2.36.11-2) ... Setting up libatk1.0-dev:amd64 (2.28.1-1) ... Setting up gettext-base (0.19.8.1-6ubuntu0.3) ... Setting up m4 (1.4.18-1) ... Setting up libmagic-mgc (1:5.32-2ubuntu0.4) ... Setting up gir1.2-pango-1.0:amd64 (1.40.14-1ubuntu0.1) ... Setting up libmagic1:amd64 (1:5.32-2ubuntu0.4) ... Setting up libopenal-dev:amd64 (1:1.18.2-2) ... Setting up libsys-hostname-long-perl (1.5-1) ... Setting up libwildmidi-config (0.4.2-1) ... Setting up libmircookie2:amd64 (0.31.1-0ubuntu1) ... Setting up libgdk-pixbuf2.0-dev (2.36.11-2) ... Setting up libmail-sendmail-perl (0.80-1) ... Setting up x11proto-xinerama-dev (2018.4-4) ... Setting up autotools-dev (20180224.1) ... Setting up libpixman-1-dev:amd64 (0.34.0-2) ... Setting up x11proto-randr-dev (2018.4-4) ... Setting up libxinerama-dev:amd64 (2:1.1.3-1) ... Setting up libxv-dev:amd64 (2:1.0.11-1) ... Setting up nasm (2.13.02-0.1) ... Setting up libcapnp-0.6.1:amd64 (0.6.1-1ubuntu1) ... Setting up libibus-1.0-5:amd64 (1.5.17-3ubuntu5.3) ... Setting up libmircore1:amd64 (0.31.1-0ubuntu1) ... Setting up freepats (20060219-1) ... Setting up libprotobuf-lite10:amd64 (3.0.0-9.1ubuntu1) ... Setting up libudev-dev:amd64 (237-3ubuntu10.43) ... Setting up libfluidsynth1:amd64 (1.1.9-1) ... Setting up x11proto-composite-dev (1:2018.4-4) ... Setting up autopoint (0.19.8.1-6ubuntu0.3) ... Setting up libaudio2:amd64 (1.9.4-6) ... Setting up libfile-stripnondeterminism-perl (0.040-1.1~build1) ... Setting up libgtk2.0-0:amd64 (2.24.32-1ubuntu1) ... Setting up gir1.2-ibus-1.0:amd64 (1.5.17-3ubuntu5.3) ... Setting up libgail18:amd64 (2.24.32-1ubuntu1) ... Setting up libxrandr-dev:amd64 (2:1.5.1-1) ... Setting up libcairo2-dev:amd64 (1.15.10-2ubuntu0.1) ... Setting up gettext (0.19.8.1-6ubuntu0.3) ... Setting up libxcomposite-dev:amd64 (1:0.4.4-2) ... Setting up libmirprotobuf3:amd64 (0.31.1-0ubuntu1) ... Setting up libprotobuf-dev:amd64 (3.0.0-9.1ubuntu1) ... Setting up libgail-common:amd64 (2.24.32-1ubuntu1) ... Setting up autoconf (2.69-11) ... Setting up libmircookie-dev:amd64 (0.31.1-0ubuntu1) ... Setting up libwildmidi2:amd64 (0.4.2-1) ... Setting up file (1:5.32-2ubuntu0.4) ... Setting up intltool-debian (0.35.0+20060710.4) ... Setting up libibus-1.0-dev:amd64 (1.5.17-3ubuntu5.3) ... Setting up automake (1:1.15.1-3ubuntu2) ... update-alternatives: using /usr/bin/automake-1.15 to provide /usr/bin/automake (automake) in auto mode Setting up libmircommon7:amd64 (0.31.1-0ubuntu1) ... Setting up libpango1.0-dev (1.40.14-1ubuntu0.1) ... Setting up libfluidsynth-dev:amd64 (1.1.9-1) ... Setting up timidity (2.13.2-41) ... Setting up gir1.2-gtk-2.0 (2.24.32-1ubuntu1) ... Setting up libgtk2.0-bin (2.24.32-1ubuntu1) ... Setting up libtool (2.4.6-2) ... Setting up po-debconf (1.0.20) ... Setting up libwildmidi-dev (0.4.2-1) ... Setting up libgtk2.0-dev (2.24.32-1ubuntu1) ... Setting up libmirclient9:amd64 (0.31.1-0ubuntu1) ... Setting up libmircommon-dev:amd64 (0.31.1-0ubuntu1) ... Setting up timidity-daemon (2.13.2-41) ... Adding group timidity....done Adding system user timidity....done Adding user `timidity' to group `audio' ... Adding user timidity to group audio Done. invoke-rc.d: could not determine current runlevel invoke-rc.d: policy-rc.d denied execution of stop. invoke-rc.d: could not determine current runlevel invoke-rc.d: policy-rc.d denied execution of start. Setting up libmirclient-dev:amd64 (0.31.1-0ubuntu1) ... Setting up libsdl2-dev:amd64 (2.0.8+dfsg1-1ubuntu1.18.04.4) ... Setting up debhelper (11.1.6ubuntu2) ... Setting up dh-autoreconf (17) ... Setting up dh-strip-nondeterminism (0.040-1.1~build1) ... Processing triggers for libc-bin (2.27-3ubuntu1.2) ... /sbin/ldconfig.real: /usr/local/lib/python3.6/dist-packages/ideep4py/lib/libmkldnn.so.0 is not a symbolic link Processing triggers for systemd (237-3ubuntu10.43) ... Processing triggers for man-db (2.8.3-2ubuntu0.1) ... Reading package lists... Building dependency tree... Reading state information... libboost-all-dev is already the newest version (1.65.1.0ubuntu1). 0 upgraded, 0 newly installed, 0 to remove and 15 not upgraded. Reading package lists... Building dependency tree... Reading state information... The following additional packages will be installed: libtool-bin The following NEW packages will be installed: liblua5.1-0-dev libtool-bin 0 upgraded, 2 newly installed, 0 to remove and 15 not upgraded. Need to get 198 kB of archives. After this operation, 1,188 kB of additional disk space will be used. Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 liblua5.1-0-dev amd64 5.1.5-8.1build2 [119 kB] Get:2 http://archive.ubuntu.com/ubuntu bionic/main amd64 libtool-bin amd64 2.4.6-2 [79.5 kB] Fetched 198 kB in 1s (384 kB/s) Selecting previously unselected package liblua5.1-0-dev:amd64. (Reading database ... 148573 files and directories currently installed.) Preparing to unpack .../liblua5.1-0-dev_5.1.5-8.1build2_amd64.deb ... Unpacking liblua5.1-0-dev:amd64 (5.1.5-8.1build2) ... Selecting previously unselected package libtool-bin. Preparing to unpack .../libtool-bin_2.4.6-2_amd64.deb ... Unpacking libtool-bin (2.4.6-2) ... Setting up libtool-bin (2.4.6-2) ... Setting up liblua5.1-0-dev:amd64 (5.1.5-8.1build2) ... Processing triggers for man-db (2.8.3-2ubuntu0.1) ...

#Partially observed Markov decision processes (POMDPs) We will start by exploring POMDPs , the states of the environment, ztz_{t} , are hidden from the agent. The agent gets to see partial observations derived from the hidden state, which we denote by stSs_{t} \in \mathcal{S} these are sampled from the observation model, p(stzt)p(s_{t}|z_{t}).

In this example we will work with ViZDoom and Deep Recurrent Q Network.

Note that this is a quick overview example, the details will be discussed later.

Deep Recurrent Q Network

# title Install ViZDoom... takes few mins !pip install vizdoom
Collecting vizdoom Downloading https://files.pythonhosted.org/packages/41/0e/e7299dc536baab77ca61e7459883f353e4607f24d2e6266cd2a5ceb754d6/vizdoom-1.1.8.tar.gz (21.9MB) |████████████████████████████████| 21.9MB 1.6MB/s Requirement already satisfied: numpy in /usr/local/lib/python3.6/dist-packages (from vizdoom) (1.19.4) Building wheels for collected packages: vizdoom Building wheel for vizdoom (setup.py) ... done Created wheel for vizdoom: filename=vizdoom-1.1.8-cp36-none-any.whl size=14461987 sha256=3b0a45f0ef895782cc8e99dfa29e2cc1766a2951aa5ec14b1605463adfc89e88 Stored in directory: /root/.cache/pip/wheels/7d/04/dd/fafbaf68bb30e82ca4e336b9e13813d667d81aecb4648227a3 Successfully built vizdoom Installing collected packages: vizdoom Successfully installed vizdoom-1.1.8
# title Clone ViZDoom-Keras-RL repo and imports # Clone VizDoom-Keras-RL !git clone https://github.com/mjsML/VizDoom-Keras-RL.git %cd /content/VizDoom-Keras-RL from __future__ import print_function import skimage as skimage from skimage import transform, color, exposure from skimage.viewer import ImageViewer import random from random import choice import numpy as np from collections import deque import time import json from keras.models import model_from_json from keras.models import Sequential, load_model, Model from keras.layers.wrappers import TimeDistributed from keras.layers.core import Dense, Dropout, Activation, Flatten, RepeatVector, Masking from keras.layers import ( Convolution2D, Dense, Flatten, MaxPooling2D, Input, AveragePooling2D, Lambda, Activation, Embedding, ) # tf.keras.layers.Concatenate(axis=1)([x, y]) from keras.layers.recurrent import LSTM, GRU # from keras.optimizers import SGD, Adam, rmsprop from keras.optimizers import SGD, Adam from keras import backend as K from vizdoom import DoomGame, ScreenResolution from vizdoom import * import itertools as it from time import sleep import tensorflow as tf from networks import Networks
Cloning into 'VizDoom-Keras-RL'... remote: Enumerating objects: 49, done. remote: Total 49 (delta 0), reused 0 (delta 0), pack-reused 49 Unpacking objects: 100% (49/49), done. /content/VizDoom-Keras-RL
--------------------------------------------------------------------------- ImportError Traceback (most recent call last) <ipython-input-7-9cedabb4297f> in <module>() 32 import tensorflow as tf 33 ---> 34 from networks import Networks /content/VizDoom-Keras-RL/networks.py in <module>() 17 from keras.layers.wrappers import TimeDistributed 18 from keras.layers import Convolution2D, Dense, Flatten, MaxPooling2D, Input, AveragePooling2D, Lambda, Activation, Embedding ---> 19 from keras.optimizers import SGD, Adam, rmsprop 20 from keras.layers.recurrent import LSTM, GRU 21 from keras.layers.normalization import BatchNormalization ImportError: cannot import name 'rmsprop' --------------------------------------------------------------------------- NOTE: If your import is failing due to a missing package, you can manually install dependencies using either !pip or !apt. To view examples of installing some common dependencies, click the "Open Examples" button below. ---------------------------------------------------------------------------
# title Setup ViZDoom with defend the center scenario # TF2 TF1 compatibility config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=config) tf.compat.v1.keras.backend.set_session(sess) from drqn import ReplayMemory, DoubleDQNAgent, preprocessImg game = DoomGame() game.load_config("/content/VizDoom-Keras-RL/defend_the_center.cfg") game.set_sound_enabled(True) game.set_screen_resolution(ScreenResolution.RES_640X480) game.set_window_visible(False) game.init() game.new_episode() game_state = game.get_state() misc = game_state.game_variables # [KILLCOUNT, AMMO, HEALTH] prev_misc = misc action_size = game.get_available_buttons_size() img_rows, img_cols = 64, 64 img_channels = 3 # Color channel trace_length = 4 # Temporal Dimension state_size = (trace_length, img_rows, img_cols, img_channels) agent = DoubleDQNAgent(state_size, action_size, trace_length) agent.model = Networks.drqn(state_size, action_size, agent.learning_rate) agent.target_model = Networks.drqn(state_size, action_size, agent.learning_rate) s_t = game_state.screen_buffer # 480 x 640 s_t = preprocessImg(s_t, size=(img_rows, img_cols)) is_terminated = game.is_episode_finished()
# title Start training DRQN Agent epsilon = agent.initial_epsilon GAME = 0 t = 0 max_life = 0 # Maximum episode life (Proxy for agent performance) life = 0 episode_buf = [] # Save entire episode # Buffer to compute rolling statistics life_buffer, ammo_buffer, kills_buffer = [], [], [] while not game.is_episode_finished(): loss = 0 Q_max = 0 r_t = 0 a_t = np.zeros([action_size]) # Epsilon Greedy if len(episode_buf) > agent.trace_length: # 1x8x64x64x3 state_series = np.array([trace[-1] for trace in episode_buf[-agent.trace_length :]]) state_series = np.expand_dims(state_series, axis=0) action_idx = agent.get_action(state_series) else: action_idx = random.randrange(agent.action_size) a_t[action_idx] = 1 a_t = a_t.astype(int) game.set_action(a_t.tolist()) skiprate = agent.frame_per_action game.advance_action(skiprate) game_state = game.get_state() # Observe again after we take the action is_terminated = game.is_episode_finished() # each frame we get reward of 0.1, so 4 frames will be 0.4 r_t = game.get_last_reward() if is_terminated: if life > max_life: max_life = life GAME += 1 life_buffer.append(life) ammo_buffer.append(misc[1]) kills_buffer.append(misc[0]) print("Episode Finish ", misc) game.new_episode() game_state = game.get_state() misc = game_state.game_variables s_t1 = game_state.screen_buffer s_t1 = game_state.screen_buffer misc = game_state.game_variables s_t1 = preprocessImg(s_t1, size=(img_rows, img_cols)) r_t = agent.shape_reward(r_t, misc, prev_misc, t) if is_terminated: life = 0 else: life += 1 # update the cache prev_misc = misc # Update epsilon if agent.epsilon > agent.final_epsilon and t > agent.observe: agent.epsilon -= (agent.initial_epsilon - agent.final_epsilon) / agent.explore # Do the training if t > agent.observe: Q_max, loss = agent.train_replay() # save the sample <s, a, r, s'> to episode buffer episode_buf.append([s_t, action_idx, r_t, s_t1]) if is_terminated: agent.memory.add(episode_buf) episode_buf = [] # Reset Episode Buf s_t = s_t1 t += 1 # save progress every 10000 iterations if t % 10000 == 0: print("Now we save model") agent.model.save_weights("./models/drqn.h5", overwrite=True) # print info state = "" if t <= agent.observe: state = "observe" elif t > agent.observe and t <= agent.observe + agent.explore: state = "explore" else: state = "train" if is_terminated: print( "TIME", t, "/ GAME", GAME, "/ STATE", state, "/ EPSILON", agent.epsilon, "/ ACTION", action_idx, "/ REWARD", r_t, "/ Q_MAX %e" % np.max(Q_max), "/ LIFE", max_life, "/ LOSS", loss, ) # Save Agent's Performance Statistics if GAME % agent.stats_window_size == 0 and t > agent.observe: print("Update Rolling Statistics") agent.mavg_score.append(np.mean(np.array(life_buffer))) agent.var_score.append(np.var(np.array(life_buffer))) agent.mavg_ammo_left.append(np.mean(np.array(ammo_buffer))) agent.mavg_kill_counts.append(np.mean(np.array(kills_buffer))) # Reset rolling stats buffer life_buffer, ammo_buffer, kills_buffer = [], [], [] # Write Rolling Statistics to file with open("statistics/drqn_stats.txt", "w") as stats_file: stats_file.write("Game: " + str(GAME) + "\n") stats_file.write("Max Score: " + str(max_life) + "\n") stats_file.write("mavg_score: " + str(agent.mavg_score) + "\n") stats_file.write("var_score: " + str(agent.var_score) + "\n") stats_file.write("mavg_ammo_left: " + str(agent.mavg_ammo_left) + "\n") stats_file.write("mavg_kill_counts: " + str(agent.mavg_kill_counts) + "\n")

Fully observed Markov decision processes (MDPs)

Now we explore fully observed Markov decision process.

In a fully observable problem the observed state is equal to the hidden state (i.e., st=zts_{t}=z_{t}).

In this case, the POMDP reduces to a simpler model known as a Markov decision process or MDP

Actor Critic Method

As an agent takes actions and moves through an environment, it learns to map the observed state of the environment to two possible outputs:

Recommended action:

A probabiltiy value for each action in the action space. The part of the agent responsible for this output is called the actor.

Estimated rewards in the future:

Sum of all rewards it expects to receive in the future. The part of the agent responsible for this output is the critic.

Agent and Critic learn to perform their tasks, such that the recommended actions from the actor maximize the rewards.

CartPole-V0

A pole is attached to a cart placed on a frictionless track. The agent has to apply force to move the cart. It is rewarded for every time step the pole remains upright. The agent, therefore, must learn to keep the pole from falling over.

# @title Imports import gym import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers # Configuration parameters for the whole setup seed = 42 gamma = 0.99 # Discount factor for past rewards max_steps_per_episode = 10000 env = gym.make("CartPole-v0") # Create the environment env.seed(seed) eps = np.finfo(np.float32).eps.item() # Smallest number such that 1.0 + eps != 1.0
# @title Define Model num_inputs = 4 num_actions = 2 num_hidden = 128 inputs = layers.Input(shape=(num_inputs,)) common = layers.Dense(num_hidden, activation="relu")(inputs) action = layers.Dense(num_actions, activation="softmax")(common) critic = layers.Dense(1)(common) model = keras.Model(inputs=inputs, outputs=[action, critic])
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-1-313e5ba83e2c> in <module>() 4 num_hidden = 128 5 ----> 6 inputs = layers.Input(shape=(num_inputs,)) 7 common = layers.Dense(num_hidden, activation="relu")(inputs) 8 action = layers.Dense(num_actions, activation="softmax")(common) NameError: name 'layers' is not defined
# @title Train model optimizer = keras.optimizers.Adam(learning_rate=0.01) huber_loss = keras.losses.Huber() action_probs_history = [] critic_value_history = [] rewards_history = [] running_reward = 0 episode_count = 0 while True: # Run until solved state = env.reset() episode_reward = 0 with tf.GradientTape() as tape: for timestep in range(1, max_steps_per_episode): # env.render(); Adding this line would show the attempts # of the agent in a pop up window. state = tf.convert_to_tensor(state) state = tf.expand_dims(state, 0) # Predict action probabilities and estimated future rewards # from environment state action_probs, critic_value = model(state) critic_value_history.append(critic_value[0, 0]) # Sample action from action probability distribution action = np.random.choice(num_actions, p=np.squeeze(action_probs)) action_probs_history.append(tf.math.log(action_probs[0, action])) # Apply the sampled action in our environment state, reward, done, _ = env.step(action) rewards_history.append(reward) episode_reward += reward if done: break # Update running reward to check condition for solving running_reward = 0.05 * episode_reward + (1 - 0.05) * running_reward # Calculate expected value from rewards # - At each timestep what was the total reward received after that timestep # - Rewards in the past are discounted by multiplying them with gamma # - These are the labels for our critic returns = [] discounted_sum = 0 for r in rewards_history[::-1]: discounted_sum = r + gamma * discounted_sum returns.insert(0, discounted_sum) # Normalize returns = np.array(returns) returns = (returns - np.mean(returns)) / (np.std(returns) + eps) returns = returns.tolist() # Calculating loss values to update our network history = zip(action_probs_history, critic_value_history, returns) actor_losses = [] critic_losses = [] for log_prob, value, ret in history: # At this point in history, the critic estimated that we would get a # total reward = `value` in the future. We took an action with log probability # of `log_prob` and ended up recieving a total reward = `ret`. # The actor must be updated so that it predicts an action that leads to # high rewards (compared to critic's estimate) with high probability. diff = ret - value actor_losses.append(-log_prob * diff) # actor loss # The critic must be updated so that it predicts a better estimate of # the future rewards. critic_losses.append(huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))) # Backpropagation loss_value = sum(actor_losses) + sum(critic_losses) grads = tape.gradient(loss_value, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) # Clear the loss and reward history action_probs_history.clear() critic_value_history.clear() rewards_history.clear() # Log details episode_count += 1 if episode_count % 10 == 0: template = "running reward: {:.2f} at episode {}" print(template.format(running_reward, episode_count)) if running_reward > 195: # Condition to consider the task solved print("Solved at episode {}!".format(episode_count)) break

Visualizations

In early stages of training:

Imgur

In later stages of training:

Imgur