Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
aamini
GitHub Repository: aamini/introtodeeplearning
Path: blob/master/mitdeeplearning/lab3_old.py
547 views
1
import io
2
import base64
3
from IPython.display import HTML
4
import gym
5
import numpy as np
6
import cv2
7
8
9
def play_video(filename, width=None):
10
encoded = base64.b64encode(io.open(filename, "r+b").read())
11
video_width = 'width="' + str(width) + '"' if width is not None else ""
12
embedded = HTML(
13
data="""
14
<video controls {0}>
15
<source src="data:video/mp4;base64,{1}" type="video/mp4" />
16
</video>""".format(video_width, encoded.decode("ascii"))
17
)
18
19
return embedded
20
21
22
def preprocess_pong(image):
23
I = image[35:195] # Crop
24
I = I[::2, ::2, 0] # Downsample width and height by a factor of 2
25
I[I == 144] = 0 # Remove background type 1
26
I[I == 109] = 0 # Remove background type 2
27
I[I != 0] = 1 # Set remaining elements (paddles, ball, etc.) to 1
28
I = cv2.dilate(I, np.ones((3, 3), np.uint8), iterations=1)
29
I = I[::2, ::2, np.newaxis]
30
return I.astype(np.float)
31
32
33
def pong_change(prev, curr):
34
prev = preprocess_pong(prev)
35
curr = preprocess_pong(curr)
36
I = prev - curr
37
# I = (I - I.min()) / (I.max() - I.min() + 1e-10)
38
return I
39
40
41
class Memory:
42
def __init__(self):
43
self.clear()
44
45
# Resets/restarts the memory buffer
46
def clear(self):
47
self.observations = []
48
self.actions = []
49
self.rewards = []
50
51
# Add observations, actions, rewards to memory
52
def add_to_memory(self, new_observation, new_action, new_reward):
53
self.observations.append(new_observation)
54
self.actions.append(new_action)
55
self.rewards.append(new_reward)
56
57
58
def aggregate_memories(memories):
59
batch_memory = Memory()
60
61
for memory in memories:
62
for step in zip(memory.observations, memory.actions, memory.rewards):
63
batch_memory.add_to_memory(*step)
64
65
return batch_memory
66
67
68
def parallelized_collect_rollout(batch_size, envs, model, choose_action):
69
assert (
70
len(envs) == batch_size
71
), "Number of parallel environments must be equal to the batch size."
72
73
memories = [Memory() for _ in range(batch_size)]
74
next_observations = [single_env.reset() for single_env in envs]
75
previous_frames = [obs for obs in next_observations]
76
done = [False] * batch_size
77
rewards = [0] * batch_size
78
79
while True:
80
current_frames = [obs for obs in next_observations]
81
diff_frames = [
82
pong_change(prev, curr)
83
for (prev, curr) in zip(previous_frames, current_frames)
84
]
85
86
diff_frames_not_done = [
87
diff_frames[b] for b in range(batch_size) if not done[b]
88
]
89
actions_not_done = choose_action(
90
model, np.array(diff_frames_not_done), single=False
91
)
92
93
actions = [None] * batch_size
94
ind_not_done = 0
95
for b in range(batch_size):
96
if not done[b]:
97
actions[b] = actions_not_done[ind_not_done]
98
ind_not_done += 1
99
100
for b in range(batch_size):
101
if done[b]:
102
continue
103
next_observations[b], rewards[b], done[b], info = envs[b].step(actions[b])
104
previous_frames[b] = current_frames[b]
105
memories[b].add_to_memory(diff_frames[b], actions[b], rewards[b])
106
107
if all(done):
108
break
109
110
return memories
111
112
113
def save_video_of_model(model, env_name, suffix=""):
114
import skvideo.io
115
from pyvirtualdisplay import Display
116
117
display = Display(visible=0, size=(400, 300))
118
display.start()
119
120
env = gym.make(env_name)
121
obs = env.reset()
122
prev_obs = obs
123
124
filename = env_name + suffix + ".mp4"
125
output_video = skvideo.io.FFmpegWriter(filename)
126
127
counter = 0
128
done = False
129
while not done:
130
frame = env.render(mode="rgb_array")
131
output_video.writeFrame(frame)
132
133
if "CartPole" in env_name:
134
input_obs = obs
135
elif "Pong" in env_name:
136
input_obs = pong_change(prev_obs, obs)
137
else:
138
raise ValueError(f"Unknown env for saving: {env_name}")
139
140
action = model(np.expand_dims(input_obs, 0)).numpy().argmax()
141
142
prev_obs = obs
143
obs, reward, done, info = env.step(action)
144
counter += 1
145
146
output_video.close()
147
print("Successfully saved {} frames into {}!".format(counter, filename))
148
return filename
149
150
151
def save_video_of_memory(memory, filename, size=(512, 512)):
152
import skvideo.io
153
154
output_video = skvideo.io.FFmpegWriter(filename)
155
156
for observation in memory.observations:
157
output_video.writeFrame(cv2.resize(255 * observation, size))
158
159
output_video.close()
160
return filename
161
162