CoCalc -- perception4e.py

GitHub Repository: aimacode/aima-python
Path: blob/master/perception4e.py
⁶¹⁵ views
1
"""Perception (Chapter 24)"""
2

3
import cv2
4
import keras
5
import matplotlib.pyplot as plt
6
import numpy as np
7
import scipy.signal
8
from keras.datasets import mnist
9
from keras.layers import Dense, Activation, Flatten, InputLayer, Conv2D, MaxPooling2D
10
from keras.models import Sequential
11

12
from utils4e import gaussian_kernel_2D
13

14

15
# ____________________________________________________
16
# 24.3 Early Image Processing Operators
17
# 24.3.1 Edge Detection
18

19

20
def array_normalization(array, range_min, range_max):
21
    """Normalize an array in the range of (range_min, range_max)"""
22
    if not isinstance(array, np.ndarray):
23
        array = np.asarray(array)
24
    array = array - np.min(array)
25
    array = array * (range_max - range_min) / np.max(array) + range_min
26
    return array
27

28

29
def gradient_edge_detector(image):
30
    """
31
    Image edge detection by calculating gradients in the image
32
    :param image: numpy ndarray or an iterable object
33
    :return: numpy ndarray, representing a gray scale image
34
    """
35
    if not isinstance(image, np.ndarray):
36
        image = np.asarray(image)
37
    # gradient filters of x and y direction edges
38
    x_filter, y_filter = np.array([[1, -1]]), np.array([[1], [-1]])
39
    # convolution between filter and image to get edges
40
    y_edges = scipy.signal.convolve2d(image, x_filter, 'same')
41
    x_edges = scipy.signal.convolve2d(image, y_filter, 'same')
42
    edges = array_normalization(x_edges + y_edges, 0, 255)
43
    return edges
44

45

46
def gaussian_derivative_edge_detector(image):
47
    """Image edge detector using derivative of gaussian kernels"""
48
    if not isinstance(image, np.ndarray):
49
        image = np.asarray(image)
50
    gaussian_filter = gaussian_kernel_2D()
51
    # init derivative of gaussian filters
52
    x_filter = scipy.signal.convolve2d(gaussian_filter, np.asarray([[1, -1]]), 'same')
53
    y_filter = scipy.signal.convolve2d(gaussian_filter, np.asarray([[1], [-1]]), 'same')
54
    # extract edges using convolution
55
    y_edges = scipy.signal.convolve2d(image, x_filter, 'same')
56
    x_edges = scipy.signal.convolve2d(image, y_filter, 'same')
57
    edges = array_normalization(x_edges + y_edges, 0, 255)
58
    return edges
59

60

61
def laplacian_edge_detector(image):
62
    """Extract image edge with laplacian filter"""
63
    if not isinstance(image, np.ndarray):
64
        image = np.asarray(image)
65
    # init laplacian filter
66
    laplacian_kernel = np.asarray([[0, -1, 0], [-1, 4, -1], [0, -1, 0]])
67
    # extract edges with convolution
68
    edges = scipy.signal.convolve2d(image, laplacian_kernel, 'same')
69
    edges = array_normalization(edges, 0, 255)
70
    return edges
71

72

73
def show_edges(edges):
74
    """ helper function to show edges picture"""
75
    plt.imshow(edges, cmap='gray', vmin=0, vmax=255)
76
    plt.axis('off')
77
    plt.show()
78

79

80
# __________________________________________________
81
# 24.3.3 Optical flow
82

83

84
def sum_squared_difference(pic1, pic2):
85
    """SSD of two frames"""
86
    pic1 = np.asarray(pic1)
87
    pic2 = np.asarray(pic2)
88
    assert pic1.shape == pic2.shape
89
    min_ssd = np.inf
90
    min_dxy = (np.inf, np.inf)
91

92
    # consider picture shift from -30 to 30
93
    for Dx in range(-30, 31):
94
        for Dy in range(-30, 31):
95
            # shift the image
96
            shifted_pic = np.roll(pic2, Dx, axis=0)
97
            shifted_pic = np.roll(shifted_pic, Dy, axis=1)
98
            # calculate the difference
99
            diff = np.sum((pic1 - shifted_pic) ** 2)
100
            if diff < min_ssd:
101
                min_dxy = (Dx, Dy)
102
                min_ssd = diff
103
    return min_dxy, min_ssd
104

105

106
# ____________________________________________________
107
# segmentation
108

109
def gen_gray_scale_picture(size, level=3):
110
    """
111
    Generate a picture with different gray scale levels
112
    :param size: size of generated picture
113
    :param level: the number of level of gray scales in the picture,
114
                  range (0, 255) are equally divided by number of levels
115
    :return image in numpy ndarray type
116
    """
117
    assert level > 0
118
    # init an empty image
119
    image = np.zeros((size, size))
120
    if level == 1:
121
        return image
122
    # draw a square on the left upper corner of the image
123
    for x in range(size):
124
        for y in range(size):
125
            image[x, y] += (250 // (level - 1)) * (max(x, y) * level // size)
126
    return image
127

128

129
gray_scale_image = gen_gray_scale_picture(3)
130

131

132
def probability_contour_detection(image, discs, threshold=0):
133
    """
134
    Detect edges/contours by applying a set of discs to an image
135
    :param image: an image in type of numpy ndarray
136
    :param discs: a set of discs/filters to apply to pixels of image
137
    :param threshold: threshold to tell whether the pixel at (x, y) is on an edge
138
    :return image showing edges in numpy ndarray type
139
    """
140
    # init an empty output image
141
    res = np.zeros(image.shape)
142
    step = discs[0].shape[0]
143
    for x_i in range(0, image.shape[0] - step + 1, 1):
144
        for y_i in range(0, image.shape[1] - step + 1, 1):
145
            diff = []
146
            # apply each pair of discs and calculate the difference
147
            for d in range(0, len(discs), 2):
148
                disc1, disc2 = discs[d], discs[d + 1]
149
                # crop the region of interest
150
                region = image[x_i: x_i + step, y_i: y_i + step]
151
                diff.append(np.sum(np.multiply(region, disc1)) - np.sum(np.multiply(region, disc2)))
152
            if max(diff) > threshold:
153
                # change color of the center of region
154
                res[x_i + step // 2, y_i + step // 2] = 255
155
    return res
156

157

158
def group_contour_detection(image, cluster_num=2):
159
    """
160
    Detecting contours in an image with k-means clustering
161
    :param image: an image in numpy ndarray type
162
    :param cluster_num: number of clusters in k-means
163
    """
164
    img = image
165
    Z = np.float32(img)
166
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
167
    K = cluster_num
168
    # use kmeans in opencv-python
169
    ret, label, center = cv2.kmeans(Z, K, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
170
    center = np.uint8(center)
171
    res = center[label.flatten()]
172
    res2 = res.reshape(img.shape)
173
    # show the image
174
    # cv2.imshow('res2', res2)
175
    # cv2.waitKey(0)
176
    # cv2.destroyAllWindows()
177
    return res2
178

179

180
def image_to_graph(image):
181
    """
182
    Convert an image to an graph in adjacent matrix form
183
    """
184
    graph_dict = {}
185
    for x in range(image.shape[0]):
186
        for y in range(image.shape[1]):
187
            graph_dict[(x, y)] = [(x + 1, y) if x + 1 < image.shape[0] else None,
188
                                  (x, y + 1) if y + 1 < image.shape[1] else None]
189
    return graph_dict
190

191

192
def generate_edge_weight(image, v1, v2):
193
    """
194
    Find edge weight between two vertices in an image
195
    :param image: image in numpy ndarray type
196
    :param v1, v2: verticles in the image in form of (x index, y index)
197
    """
198
    diff = abs(image[v1[0], v1[1]] - image[v2[0], v2[1]])
199
    return 255 - diff
200

201

202
class Graph:
203
    """Graph in adjacent matrix to represent an image"""
204

205
    def __init__(self, image):
206
        """image: ndarray"""
207
        self.graph = image_to_graph(image)
208
        # number of columns and rows
209
        self.ROW = len(self.graph)
210
        self.COL = 2
211
        self.image = image
212
        # dictionary to save the maximum flow of each edge
213
        self.flow = {}
214
        # initialize the flow
215
        for s in self.graph:
216
            self.flow[s] = {}
217
            for t in self.graph[s]:
218
                if t:
219
                    self.flow[s][t] = generate_edge_weight(image, s, t)
220

221
    def bfs(self, s, t, parent):
222
        """Breadth first search to tell whether there is an edge between source and sink
223
        parent: a list to save the path between s and t"""
224
        # queue to save the current searching frontier
225
        queue = [s]
226
        visited = []
227

228
        while queue:
229
            u = queue.pop(0)
230
            for node in self.graph[u]:
231
                # only select edge with positive flow
232
                if node not in visited and node and self.flow[u][node] > 0:
233
                    queue.append(node)
234
                    visited.append(node)
235
                    parent.append((u, node))
236
        return True if t in visited else False
237

238
    def min_cut(self, source, sink):
239
        """Find the minimum cut of the graph between source and sink"""
240
        parent = []
241
        max_flow = 0
242

243
        while self.bfs(source, sink, parent):
244
            path_flow = np.inf
245
            # find the minimum flow of s-t path
246
            for s, t in parent:
247
                path_flow = min(path_flow, self.flow[s][t])
248

249
            max_flow += path_flow
250

251
            # update all edges between source and sink
252
            for s in self.flow:
253
                for t in self.flow[s]:
254
                    if t[0] <= sink[0] and t[1] <= sink[1]:
255
                        self.flow[s][t] -= path_flow
256
            parent = []
257
        res = []
258
        for i in self.flow:
259
            for j in self.flow[i]:
260
                if self.flow[i][j] == 0 and generate_edge_weight(self.image, i, j) > 0:
261
                    res.append((i, j))
262
        return res
263

264

265
def gen_discs(init_scale, scales=1):
266
    """
267
    Generate a collection of disc pairs by splitting an round discs with different angles
268
    :param init_scale: the initial size of each half discs
269
    :param scales: scale number of each type of half discs, the scale size will be doubled each time
270
    :return: the collection of generated discs: [discs of scale1, discs of scale2...]
271
    """
272
    discs = []
273
    for m in range(scales):
274
        scale = init_scale * (m + 1)
275
        disc = []
276
        # make the full empty dist
277
        white = np.zeros((scale, scale))
278
        center = (scale - 1) / 2
279
        for i in range(scale):
280
            for j in range(scale):
281
                if (i - center) ** 2 + (j - center) ** 2 <= (center ** 2):
282
                    white[i, j] = 255
283
        # generate lower half and upper half
284
        lower_half = np.copy(white)
285
        lower_half[:(scale - 1) // 2, :] = 0
286
        upper_half = lower_half[::-1, ::-1]
287
        # generate left half and right half
288
        disc += [lower_half, upper_half, np.transpose(lower_half), np.transpose(upper_half)]
289
        # generate upper-left, lower-right, upper-right, lower-left half discs
290
        disc += [np.tril(white, 0), np.triu(white, 0), np.flip(np.tril(white, 0), axis=0),
291
                 np.flip(np.triu(white, 0), axis=0)]
292
        discs.append(disc)
293
    return discs
294

295

296
# __________________________________________________
297
# 24.4 Classifying Images
298

299

300
def load_MINST(train_size, val_size, test_size):
301
    """Load MINST dataset from keras"""
302
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
303
    total_size = len(x_train)
304
    if train_size + val_size > total_size:
305
        train_size = total_size - val_size
306
    x_train = x_train.reshape(x_train.shape[0], 1, 28, 28)
307
    x_test = x_test.reshape(x_test.shape[0], 1, 28, 28)
308
    x_train = x_train.astype('float32')
309
    x_train /= 255
310
    test_x = x_test.astype('float32')
311
    test_x /= 255
312
    y_train = keras.utils.to_categorical(y_train, 10)
313
    y_test = keras.utils.to_categorical(y_test, 10)
314
    return ((x_train[:train_size], y_train[:train_size]),
315
            (x_train[train_size:train_size + val_size], y_train[train_size:train_size + val_size]),
316
            (x_test[:test_size], y_test[:test_size]))
317

318

319
def simple_convnet(size=3, num_classes=10):
320
    """
321
    Simple convolutional network for digit recognition
322
    :param size: number of convolution layers
323
    :param num_classes: number of output classes
324
    :return a convolution network in keras model type
325
    """
326
    model = Sequential()
327
    # add input layer for images of size (28, 28)
328
    model.add(InputLayer(input_shape=(1, 28, 28)))
329
    # add convolution layers and max pooling layers
330
    for _ in range(size):
331
        model.add(Conv2D(32, (2, 2), padding='same', kernel_initializer='random_uniform'))
332
        model.add(MaxPooling2D(padding='same'))
333

334
    # add flatten layer and output layers
335
    model.add(Flatten())
336
    model.add(Dense(num_classes))
337
    model.add(Activation('softmax'))
338

339
    # compile model
340
    model.compile(loss='categorical_crossentropy',
341
                  metrics=['accuracy'])
342
    print(model.summary())
343
    return model
344

345

346
def train_model(model):
347
    """Train the simple convolution network"""
348
    # load dataset
349
    (train_x, train_y), (val_x, val_y), (test_x, test_y) = load_MINST(1000, 100, 100)
350
    model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=5, verbose=2, batch_size=32)
351
    scores = model.evaluate(test_x, test_y, verbose=1)
352
    print(scores)
353
    return model
354

355

356
# _____________________________________________________
357
# 24.5 DETECTING OBJECTS
358

359

360
def selective_search(image):
361
    """
362
    Selective search for object detection
363
    :param image: str, the path of image or image in ndarray type with 3 channels
364
    :return list of bounding boxes, each element is in form of [x_min, y_min, x_max, y_max]
365
    """
366
    if not image:
367
        im = cv2.imread("./images/stapler1-test.png")
368
    elif isinstance(image, str):
369
        im = cv2.imread(image)
370
    else:
371
        im = np.stack(image * 3, axis=-1)
372

373
    # use opencv python to extract bounding box with selective search
374
    ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
375
    ss.setBaseImage(im)
376
    ss.switchToSelectiveSearchQuality()
377
    rects = ss.process()
378

379
    # show bounding boxes with the input image
380
    image_out = im.copy()
381
    for rect in rects[:100]:
382
        print(rect)
383
        x, y, w, h = rect
384
        cv2.rectangle(image_out, (x, y), (x + w, y + h), (0, 255, 0), 1, cv2.LINE_AA)
385
    cv2.imshow("Output", image_out)
386
    cv2.waitKey(0)
387
    return rects
388

389

390
# faster RCNN
391
def pool_rois(feature_map, rois, pooled_height, pooled_width):
392
    """
393
    Applies ROI pooling for a single image and various ROIs
394
    :param feature_map: ndarray, in shape of (width, height, channel)
395
    :param rois: list of roi
396
    :param pooled_height: height of pooled area
397
    :param pooled_width: width of pooled area
398
    :return list of pooled features
399
    """
400

401
    def curried_pool_roi(roi):
402
        return pool_roi(feature_map, roi, pooled_height, pooled_width)
403

404
    pooled_areas = list(map(curried_pool_roi, rois))
405
    return pooled_areas
406

407

408
def pool_roi(feature_map, roi, pooled_height, pooled_width):
409
    """
410
    Applies a single ROI pooling to a single image
411
    :param feature_map: ndarray, in shape of (width, height, channel)
412
    :param roi: region of interest, in form of [x_min_ratio, y_min_ratio, x_max_ratio, y_max_ratio]
413
    :return feature of pooling output, in shape of (pooled_width, pooled_height)
414
    """
415

416
    # Compute the region of interest
417
    feature_map_height = int(feature_map.shape[0])
418
    feature_map_width = int(feature_map.shape[1])
419

420
    h_start = int(feature_map_height * roi[0])
421
    w_start = int(feature_map_width * roi[1])
422
    h_end = int(feature_map_height * roi[2])
423
    w_end = int(feature_map_width * roi[3])
424

425
    region = feature_map[h_start:h_end, w_start:w_end, :]
426

427
    # Divide the region into non overlapping areas
428
    region_height = h_end - h_start
429
    region_width = w_end - w_start
430
    h_step = region_height // pooled_height
431
    w_step = region_width // pooled_width
432

433
    areas = [[(
434
        i * h_step,
435
        j * w_step,
436
        (i + 1) * h_step if i + 1 < pooled_height else region_height,
437
        (j + 1) * w_step if j + 1 < pooled_width else region_width)
438
        for j in range(pooled_width)]
439
        for i in range(pooled_height)]
440

441
    # take the maximum of each area and stack the result
442
    def pool_area(x):
443
        return np.max(region[x[0]:x[2], x[1]:x[3], :])
444

445
    pooled_features = np.stack([[pool_area(x) for x in row] for row in areas])
446
    return pooled_features
447

448
# faster rcnn demo can be installed and shown in jupyter notebook
449
# def faster_rcnn_demo(directory):
450
#     """
451
#     show the demo of rcnn, the model is from
452
#     @inproceedings{renNIPS15fasterrcnn,
453
#     Author = {Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun},
454
#     Title = {Faster {R-CNN}: Towards Real-Time Object Detection
455
#              with Region Proposal Networks},
456
#     Booktitle = {Advances in Neural Information Processing Systems ({NIPS})},
457
#     Year = {2015}}
458
#     :param directory: the directory where the faster rcnn model is installed
459
#     """
460
# os.chdir(directory + '/lib')
461
# # make file
462
# os.system("make clean")
463
# os.system("make")
464
# # run demo
465
# os.chdir(directory)
466
# os.system("./tools/demo.py")
467
# return 0
468

469
Product

Resources

Company