Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
aimacode
GitHub Repository: aimacode/aima-python
Path: blob/master/perception4e.py
615 views
1
"""Perception (Chapter 24)"""
2
3
import cv2
4
import keras
5
import matplotlib.pyplot as plt
6
import numpy as np
7
import scipy.signal
8
from keras.datasets import mnist
9
from keras.layers import Dense, Activation, Flatten, InputLayer, Conv2D, MaxPooling2D
10
from keras.models import Sequential
11
12
from utils4e import gaussian_kernel_2D
13
14
15
# ____________________________________________________
16
# 24.3 Early Image Processing Operators
17
# 24.3.1 Edge Detection
18
19
20
def array_normalization(array, range_min, range_max):
21
"""Normalize an array in the range of (range_min, range_max)"""
22
if not isinstance(array, np.ndarray):
23
array = np.asarray(array)
24
array = array - np.min(array)
25
array = array * (range_max - range_min) / np.max(array) + range_min
26
return array
27
28
29
def gradient_edge_detector(image):
30
"""
31
Image edge detection by calculating gradients in the image
32
:param image: numpy ndarray or an iterable object
33
:return: numpy ndarray, representing a gray scale image
34
"""
35
if not isinstance(image, np.ndarray):
36
image = np.asarray(image)
37
# gradient filters of x and y direction edges
38
x_filter, y_filter = np.array([[1, -1]]), np.array([[1], [-1]])
39
# convolution between filter and image to get edges
40
y_edges = scipy.signal.convolve2d(image, x_filter, 'same')
41
x_edges = scipy.signal.convolve2d(image, y_filter, 'same')
42
edges = array_normalization(x_edges + y_edges, 0, 255)
43
return edges
44
45
46
def gaussian_derivative_edge_detector(image):
47
"""Image edge detector using derivative of gaussian kernels"""
48
if not isinstance(image, np.ndarray):
49
image = np.asarray(image)
50
gaussian_filter = gaussian_kernel_2D()
51
# init derivative of gaussian filters
52
x_filter = scipy.signal.convolve2d(gaussian_filter, np.asarray([[1, -1]]), 'same')
53
y_filter = scipy.signal.convolve2d(gaussian_filter, np.asarray([[1], [-1]]), 'same')
54
# extract edges using convolution
55
y_edges = scipy.signal.convolve2d(image, x_filter, 'same')
56
x_edges = scipy.signal.convolve2d(image, y_filter, 'same')
57
edges = array_normalization(x_edges + y_edges, 0, 255)
58
return edges
59
60
61
def laplacian_edge_detector(image):
62
"""Extract image edge with laplacian filter"""
63
if not isinstance(image, np.ndarray):
64
image = np.asarray(image)
65
# init laplacian filter
66
laplacian_kernel = np.asarray([[0, -1, 0], [-1, 4, -1], [0, -1, 0]])
67
# extract edges with convolution
68
edges = scipy.signal.convolve2d(image, laplacian_kernel, 'same')
69
edges = array_normalization(edges, 0, 255)
70
return edges
71
72
73
def show_edges(edges):
74
""" helper function to show edges picture"""
75
plt.imshow(edges, cmap='gray', vmin=0, vmax=255)
76
plt.axis('off')
77
plt.show()
78
79
80
# __________________________________________________
81
# 24.3.3 Optical flow
82
83
84
def sum_squared_difference(pic1, pic2):
85
"""SSD of two frames"""
86
pic1 = np.asarray(pic1)
87
pic2 = np.asarray(pic2)
88
assert pic1.shape == pic2.shape
89
min_ssd = np.inf
90
min_dxy = (np.inf, np.inf)
91
92
# consider picture shift from -30 to 30
93
for Dx in range(-30, 31):
94
for Dy in range(-30, 31):
95
# shift the image
96
shifted_pic = np.roll(pic2, Dx, axis=0)
97
shifted_pic = np.roll(shifted_pic, Dy, axis=1)
98
# calculate the difference
99
diff = np.sum((pic1 - shifted_pic) ** 2)
100
if diff < min_ssd:
101
min_dxy = (Dx, Dy)
102
min_ssd = diff
103
return min_dxy, min_ssd
104
105
106
# ____________________________________________________
107
# segmentation
108
109
def gen_gray_scale_picture(size, level=3):
110
"""
111
Generate a picture with different gray scale levels
112
:param size: size of generated picture
113
:param level: the number of level of gray scales in the picture,
114
range (0, 255) are equally divided by number of levels
115
:return image in numpy ndarray type
116
"""
117
assert level > 0
118
# init an empty image
119
image = np.zeros((size, size))
120
if level == 1:
121
return image
122
# draw a square on the left upper corner of the image
123
for x in range(size):
124
for y in range(size):
125
image[x, y] += (250 // (level - 1)) * (max(x, y) * level // size)
126
return image
127
128
129
gray_scale_image = gen_gray_scale_picture(3)
130
131
132
def probability_contour_detection(image, discs, threshold=0):
133
"""
134
Detect edges/contours by applying a set of discs to an image
135
:param image: an image in type of numpy ndarray
136
:param discs: a set of discs/filters to apply to pixels of image
137
:param threshold: threshold to tell whether the pixel at (x, y) is on an edge
138
:return image showing edges in numpy ndarray type
139
"""
140
# init an empty output image
141
res = np.zeros(image.shape)
142
step = discs[0].shape[0]
143
for x_i in range(0, image.shape[0] - step + 1, 1):
144
for y_i in range(0, image.shape[1] - step + 1, 1):
145
diff = []
146
# apply each pair of discs and calculate the difference
147
for d in range(0, len(discs), 2):
148
disc1, disc2 = discs[d], discs[d + 1]
149
# crop the region of interest
150
region = image[x_i: x_i + step, y_i: y_i + step]
151
diff.append(np.sum(np.multiply(region, disc1)) - np.sum(np.multiply(region, disc2)))
152
if max(diff) > threshold:
153
# change color of the center of region
154
res[x_i + step // 2, y_i + step // 2] = 255
155
return res
156
157
158
def group_contour_detection(image, cluster_num=2):
159
"""
160
Detecting contours in an image with k-means clustering
161
:param image: an image in numpy ndarray type
162
:param cluster_num: number of clusters in k-means
163
"""
164
img = image
165
Z = np.float32(img)
166
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
167
K = cluster_num
168
# use kmeans in opencv-python
169
ret, label, center = cv2.kmeans(Z, K, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
170
center = np.uint8(center)
171
res = center[label.flatten()]
172
res2 = res.reshape(img.shape)
173
# show the image
174
# cv2.imshow('res2', res2)
175
# cv2.waitKey(0)
176
# cv2.destroyAllWindows()
177
return res2
178
179
180
def image_to_graph(image):
181
"""
182
Convert an image to an graph in adjacent matrix form
183
"""
184
graph_dict = {}
185
for x in range(image.shape[0]):
186
for y in range(image.shape[1]):
187
graph_dict[(x, y)] = [(x + 1, y) if x + 1 < image.shape[0] else None,
188
(x, y + 1) if y + 1 < image.shape[1] else None]
189
return graph_dict
190
191
192
def generate_edge_weight(image, v1, v2):
193
"""
194
Find edge weight between two vertices in an image
195
:param image: image in numpy ndarray type
196
:param v1, v2: verticles in the image in form of (x index, y index)
197
"""
198
diff = abs(image[v1[0], v1[1]] - image[v2[0], v2[1]])
199
return 255 - diff
200
201
202
class Graph:
203
"""Graph in adjacent matrix to represent an image"""
204
205
def __init__(self, image):
206
"""image: ndarray"""
207
self.graph = image_to_graph(image)
208
# number of columns and rows
209
self.ROW = len(self.graph)
210
self.COL = 2
211
self.image = image
212
# dictionary to save the maximum flow of each edge
213
self.flow = {}
214
# initialize the flow
215
for s in self.graph:
216
self.flow[s] = {}
217
for t in self.graph[s]:
218
if t:
219
self.flow[s][t] = generate_edge_weight(image, s, t)
220
221
def bfs(self, s, t, parent):
222
"""Breadth first search to tell whether there is an edge between source and sink
223
parent: a list to save the path between s and t"""
224
# queue to save the current searching frontier
225
queue = [s]
226
visited = []
227
228
while queue:
229
u = queue.pop(0)
230
for node in self.graph[u]:
231
# only select edge with positive flow
232
if node not in visited and node and self.flow[u][node] > 0:
233
queue.append(node)
234
visited.append(node)
235
parent.append((u, node))
236
return True if t in visited else False
237
238
def min_cut(self, source, sink):
239
"""Find the minimum cut of the graph between source and sink"""
240
parent = []
241
max_flow = 0
242
243
while self.bfs(source, sink, parent):
244
path_flow = np.inf
245
# find the minimum flow of s-t path
246
for s, t in parent:
247
path_flow = min(path_flow, self.flow[s][t])
248
249
max_flow += path_flow
250
251
# update all edges between source and sink
252
for s in self.flow:
253
for t in self.flow[s]:
254
if t[0] <= sink[0] and t[1] <= sink[1]:
255
self.flow[s][t] -= path_flow
256
parent = []
257
res = []
258
for i in self.flow:
259
for j in self.flow[i]:
260
if self.flow[i][j] == 0 and generate_edge_weight(self.image, i, j) > 0:
261
res.append((i, j))
262
return res
263
264
265
def gen_discs(init_scale, scales=1):
266
"""
267
Generate a collection of disc pairs by splitting an round discs with different angles
268
:param init_scale: the initial size of each half discs
269
:param scales: scale number of each type of half discs, the scale size will be doubled each time
270
:return: the collection of generated discs: [discs of scale1, discs of scale2...]
271
"""
272
discs = []
273
for m in range(scales):
274
scale = init_scale * (m + 1)
275
disc = []
276
# make the full empty dist
277
white = np.zeros((scale, scale))
278
center = (scale - 1) / 2
279
for i in range(scale):
280
for j in range(scale):
281
if (i - center) ** 2 + (j - center) ** 2 <= (center ** 2):
282
white[i, j] = 255
283
# generate lower half and upper half
284
lower_half = np.copy(white)
285
lower_half[:(scale - 1) // 2, :] = 0
286
upper_half = lower_half[::-1, ::-1]
287
# generate left half and right half
288
disc += [lower_half, upper_half, np.transpose(lower_half), np.transpose(upper_half)]
289
# generate upper-left, lower-right, upper-right, lower-left half discs
290
disc += [np.tril(white, 0), np.triu(white, 0), np.flip(np.tril(white, 0), axis=0),
291
np.flip(np.triu(white, 0), axis=0)]
292
discs.append(disc)
293
return discs
294
295
296
# __________________________________________________
297
# 24.4 Classifying Images
298
299
300
def load_MINST(train_size, val_size, test_size):
301
"""Load MINST dataset from keras"""
302
(x_train, y_train), (x_test, y_test) = mnist.load_data()
303
total_size = len(x_train)
304
if train_size + val_size > total_size:
305
train_size = total_size - val_size
306
x_train = x_train.reshape(x_train.shape[0], 1, 28, 28)
307
x_test = x_test.reshape(x_test.shape[0], 1, 28, 28)
308
x_train = x_train.astype('float32')
309
x_train /= 255
310
test_x = x_test.astype('float32')
311
test_x /= 255
312
y_train = keras.utils.to_categorical(y_train, 10)
313
y_test = keras.utils.to_categorical(y_test, 10)
314
return ((x_train[:train_size], y_train[:train_size]),
315
(x_train[train_size:train_size + val_size], y_train[train_size:train_size + val_size]),
316
(x_test[:test_size], y_test[:test_size]))
317
318
319
def simple_convnet(size=3, num_classes=10):
320
"""
321
Simple convolutional network for digit recognition
322
:param size: number of convolution layers
323
:param num_classes: number of output classes
324
:return a convolution network in keras model type
325
"""
326
model = Sequential()
327
# add input layer for images of size (28, 28)
328
model.add(InputLayer(input_shape=(1, 28, 28)))
329
# add convolution layers and max pooling layers
330
for _ in range(size):
331
model.add(Conv2D(32, (2, 2), padding='same', kernel_initializer='random_uniform'))
332
model.add(MaxPooling2D(padding='same'))
333
334
# add flatten layer and output layers
335
model.add(Flatten())
336
model.add(Dense(num_classes))
337
model.add(Activation('softmax'))
338
339
# compile model
340
model.compile(loss='categorical_crossentropy',
341
metrics=['accuracy'])
342
print(model.summary())
343
return model
344
345
346
def train_model(model):
347
"""Train the simple convolution network"""
348
# load dataset
349
(train_x, train_y), (val_x, val_y), (test_x, test_y) = load_MINST(1000, 100, 100)
350
model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=5, verbose=2, batch_size=32)
351
scores = model.evaluate(test_x, test_y, verbose=1)
352
print(scores)
353
return model
354
355
356
# _____________________________________________________
357
# 24.5 DETECTING OBJECTS
358
359
360
def selective_search(image):
361
"""
362
Selective search for object detection
363
:param image: str, the path of image or image in ndarray type with 3 channels
364
:return list of bounding boxes, each element is in form of [x_min, y_min, x_max, y_max]
365
"""
366
if not image:
367
im = cv2.imread("./images/stapler1-test.png")
368
elif isinstance(image, str):
369
im = cv2.imread(image)
370
else:
371
im = np.stack(image * 3, axis=-1)
372
373
# use opencv python to extract bounding box with selective search
374
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
375
ss.setBaseImage(im)
376
ss.switchToSelectiveSearchQuality()
377
rects = ss.process()
378
379
# show bounding boxes with the input image
380
image_out = im.copy()
381
for rect in rects[:100]:
382
print(rect)
383
x, y, w, h = rect
384
cv2.rectangle(image_out, (x, y), (x + w, y + h), (0, 255, 0), 1, cv2.LINE_AA)
385
cv2.imshow("Output", image_out)
386
cv2.waitKey(0)
387
return rects
388
389
390
# faster RCNN
391
def pool_rois(feature_map, rois, pooled_height, pooled_width):
392
"""
393
Applies ROI pooling for a single image and various ROIs
394
:param feature_map: ndarray, in shape of (width, height, channel)
395
:param rois: list of roi
396
:param pooled_height: height of pooled area
397
:param pooled_width: width of pooled area
398
:return list of pooled features
399
"""
400
401
def curried_pool_roi(roi):
402
return pool_roi(feature_map, roi, pooled_height, pooled_width)
403
404
pooled_areas = list(map(curried_pool_roi, rois))
405
return pooled_areas
406
407
408
def pool_roi(feature_map, roi, pooled_height, pooled_width):
409
"""
410
Applies a single ROI pooling to a single image
411
:param feature_map: ndarray, in shape of (width, height, channel)
412
:param roi: region of interest, in form of [x_min_ratio, y_min_ratio, x_max_ratio, y_max_ratio]
413
:return feature of pooling output, in shape of (pooled_width, pooled_height)
414
"""
415
416
# Compute the region of interest
417
feature_map_height = int(feature_map.shape[0])
418
feature_map_width = int(feature_map.shape[1])
419
420
h_start = int(feature_map_height * roi[0])
421
w_start = int(feature_map_width * roi[1])
422
h_end = int(feature_map_height * roi[2])
423
w_end = int(feature_map_width * roi[3])
424
425
region = feature_map[h_start:h_end, w_start:w_end, :]
426
427
# Divide the region into non overlapping areas
428
region_height = h_end - h_start
429
region_width = w_end - w_start
430
h_step = region_height // pooled_height
431
w_step = region_width // pooled_width
432
433
areas = [[(
434
i * h_step,
435
j * w_step,
436
(i + 1) * h_step if i + 1 < pooled_height else region_height,
437
(j + 1) * w_step if j + 1 < pooled_width else region_width)
438
for j in range(pooled_width)]
439
for i in range(pooled_height)]
440
441
# take the maximum of each area and stack the result
442
def pool_area(x):
443
return np.max(region[x[0]:x[2], x[1]:x[3], :])
444
445
pooled_features = np.stack([[pool_area(x) for x in row] for row in areas])
446
return pooled_features
447
448
# faster rcnn demo can be installed and shown in jupyter notebook
449
# def faster_rcnn_demo(directory):
450
# """
451
# show the demo of rcnn, the model is from
452
# @inproceedings{renNIPS15fasterrcnn,
453
# Author = {Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun},
454
# Title = {Faster {R-CNN}: Towards Real-Time Object Detection
455
# with Region Proposal Networks},
456
# Booktitle = {Advances in Neural Information Processing Systems ({NIPS})},
457
# Year = {2015}}
458
# :param directory: the directory where the faster rcnn model is installed
459
# """
460
# os.chdir(directory + '/lib')
461
# # make file
462
# os.system("make clean")
463
# os.system("make")
464
# # run demo
465
# os.chdir(directory)
466
# os.system("./tools/demo.py")
467
# return 0
468
469