"""Perception (Chapter 24)"""
import cv2
import keras
import matplotlib.pyplot as plt
import numpy as np
import scipy.signal
from keras.datasets import mnist
from keras.layers import Dense, Activation, Flatten, InputLayer, Conv2D, MaxPooling2D
from keras.models import Sequential
from utils4e import gaussian_kernel_2D
def array_normalization(array, range_min, range_max):
"""Normalize an array in the range of (range_min, range_max)"""
if not isinstance(array, np.ndarray):
array = np.asarray(array)
array = array - np.min(array)
array = array * (range_max - range_min) / np.max(array) + range_min
return array
def gradient_edge_detector(image):
"""
Image edge detection by calculating gradients in the image
:param image: numpy ndarray or an iterable object
:return: numpy ndarray, representing a gray scale image
"""
if not isinstance(image, np.ndarray):
image = np.asarray(image)
x_filter, y_filter = np.array([[1, -1]]), np.array([[1], [-1]])
y_edges = scipy.signal.convolve2d(image, x_filter, 'same')
x_edges = scipy.signal.convolve2d(image, y_filter, 'same')
edges = array_normalization(x_edges + y_edges, 0, 255)
return edges
def gaussian_derivative_edge_detector(image):
"""Image edge detector using derivative of gaussian kernels"""
if not isinstance(image, np.ndarray):
image = np.asarray(image)
gaussian_filter = gaussian_kernel_2D()
x_filter = scipy.signal.convolve2d(gaussian_filter, np.asarray([[1, -1]]), 'same')
y_filter = scipy.signal.convolve2d(gaussian_filter, np.asarray([[1], [-1]]), 'same')
y_edges = scipy.signal.convolve2d(image, x_filter, 'same')
x_edges = scipy.signal.convolve2d(image, y_filter, 'same')
edges = array_normalization(x_edges + y_edges, 0, 255)
return edges
def laplacian_edge_detector(image):
"""Extract image edge with laplacian filter"""
if not isinstance(image, np.ndarray):
image = np.asarray(image)
laplacian_kernel = np.asarray([[0, -1, 0], [-1, 4, -1], [0, -1, 0]])
edges = scipy.signal.convolve2d(image, laplacian_kernel, 'same')
edges = array_normalization(edges, 0, 255)
return edges
def show_edges(edges):
""" helper function to show edges picture"""
plt.imshow(edges, cmap='gray', vmin=0, vmax=255)
plt.axis('off')
plt.show()
def sum_squared_difference(pic1, pic2):
"""SSD of two frames"""
pic1 = np.asarray(pic1)
pic2 = np.asarray(pic2)
assert pic1.shape == pic2.shape
min_ssd = np.inf
min_dxy = (np.inf, np.inf)
for Dx in range(-30, 31):
for Dy in range(-30, 31):
shifted_pic = np.roll(pic2, Dx, axis=0)
shifted_pic = np.roll(shifted_pic, Dy, axis=1)
diff = np.sum((pic1 - shifted_pic) ** 2)
if diff < min_ssd:
min_dxy = (Dx, Dy)
min_ssd = diff
return min_dxy, min_ssd
def gen_gray_scale_picture(size, level=3):
"""
Generate a picture with different gray scale levels
:param size: size of generated picture
:param level: the number of level of gray scales in the picture,
range (0, 255) are equally divided by number of levels
:return image in numpy ndarray type
"""
assert level > 0
image = np.zeros((size, size))
if level == 1:
return image
for x in range(size):
for y in range(size):
image[x, y] += (250 // (level - 1)) * (max(x, y) * level // size)
return image
gray_scale_image = gen_gray_scale_picture(3)
def probability_contour_detection(image, discs, threshold=0):
"""
Detect edges/contours by applying a set of discs to an image
:param image: an image in type of numpy ndarray
:param discs: a set of discs/filters to apply to pixels of image
:param threshold: threshold to tell whether the pixel at (x, y) is on an edge
:return image showing edges in numpy ndarray type
"""
res = np.zeros(image.shape)
step = discs[0].shape[0]
for x_i in range(0, image.shape[0] - step + 1, 1):
for y_i in range(0, image.shape[1] - step + 1, 1):
diff = []
for d in range(0, len(discs), 2):
disc1, disc2 = discs[d], discs[d + 1]
region = image[x_i: x_i + step, y_i: y_i + step]
diff.append(np.sum(np.multiply(region, disc1)) - np.sum(np.multiply(region, disc2)))
if max(diff) > threshold:
res[x_i + step // 2, y_i + step // 2] = 255
return res
def group_contour_detection(image, cluster_num=2):
"""
Detecting contours in an image with k-means clustering
:param image: an image in numpy ndarray type
:param cluster_num: number of clusters in k-means
"""
img = image
Z = np.float32(img)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
K = cluster_num
ret, label, center = cv2.kmeans(Z, K, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
center = np.uint8(center)
res = center[label.flatten()]
res2 = res.reshape(img.shape)
return res2
def image_to_graph(image):
"""
Convert an image to an graph in adjacent matrix form
"""
graph_dict = {}
for x in range(image.shape[0]):
for y in range(image.shape[1]):
graph_dict[(x, y)] = [(x + 1, y) if x + 1 < image.shape[0] else None,
(x, y + 1) if y + 1 < image.shape[1] else None]
return graph_dict
def generate_edge_weight(image, v1, v2):
"""
Find edge weight between two vertices in an image
:param image: image in numpy ndarray type
:param v1, v2: verticles in the image in form of (x index, y index)
"""
diff = abs(image[v1[0], v1[1]] - image[v2[0], v2[1]])
return 255 - diff
class Graph:
"""Graph in adjacent matrix to represent an image"""
def __init__(self, image):
"""image: ndarray"""
self.graph = image_to_graph(image)
self.ROW = len(self.graph)
self.COL = 2
self.image = image
self.flow = {}
for s in self.graph:
self.flow[s] = {}
for t in self.graph[s]:
if t:
self.flow[s][t] = generate_edge_weight(image, s, t)
def bfs(self, s, t, parent):
"""Breadth first search to tell whether there is an edge between source and sink
parent: a list to save the path between s and t"""
queue = [s]
visited = []
while queue:
u = queue.pop(0)
for node in self.graph[u]:
if node not in visited and node and self.flow[u][node] > 0:
queue.append(node)
visited.append(node)
parent.append((u, node))
return True if t in visited else False
def min_cut(self, source, sink):
"""Find the minimum cut of the graph between source and sink"""
parent = []
max_flow = 0
while self.bfs(source, sink, parent):
path_flow = np.inf
for s, t in parent:
path_flow = min(path_flow, self.flow[s][t])
max_flow += path_flow
for s in self.flow:
for t in self.flow[s]:
if t[0] <= sink[0] and t[1] <= sink[1]:
self.flow[s][t] -= path_flow
parent = []
res = []
for i in self.flow:
for j in self.flow[i]:
if self.flow[i][j] == 0 and generate_edge_weight(self.image, i, j) > 0:
res.append((i, j))
return res
def gen_discs(init_scale, scales=1):
"""
Generate a collection of disc pairs by splitting an round discs with different angles
:param init_scale: the initial size of each half discs
:param scales: scale number of each type of half discs, the scale size will be doubled each time
:return: the collection of generated discs: [discs of scale1, discs of scale2...]
"""
discs = []
for m in range(scales):
scale = init_scale * (m + 1)
disc = []
white = np.zeros((scale, scale))
center = (scale - 1) / 2
for i in range(scale):
for j in range(scale):
if (i - center) ** 2 + (j - center) ** 2 <= (center ** 2):
white[i, j] = 255
lower_half = np.copy(white)
lower_half[:(scale - 1) // 2, :] = 0
upper_half = lower_half[::-1, ::-1]
disc += [lower_half, upper_half, np.transpose(lower_half), np.transpose(upper_half)]
disc += [np.tril(white, 0), np.triu(white, 0), np.flip(np.tril(white, 0), axis=0),
np.flip(np.triu(white, 0), axis=0)]
discs.append(disc)
return discs
def load_MINST(train_size, val_size, test_size):
"""Load MINST dataset from keras"""
(x_train, y_train), (x_test, y_test) = mnist.load_data()
total_size = len(x_train)
if train_size + val_size > total_size:
train_size = total_size - val_size
x_train = x_train.reshape(x_train.shape[0], 1, 28, 28)
x_test = x_test.reshape(x_test.shape[0], 1, 28, 28)
x_train = x_train.astype('float32')
x_train /= 255
test_x = x_test.astype('float32')
test_x /= 255
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
return ((x_train[:train_size], y_train[:train_size]),
(x_train[train_size:train_size + val_size], y_train[train_size:train_size + val_size]),
(x_test[:test_size], y_test[:test_size]))
def simple_convnet(size=3, num_classes=10):
"""
Simple convolutional network for digit recognition
:param size: number of convolution layers
:param num_classes: number of output classes
:return a convolution network in keras model type
"""
model = Sequential()
model.add(InputLayer(input_shape=(1, 28, 28)))
for _ in range(size):
model.add(Conv2D(32, (2, 2), padding='same', kernel_initializer='random_uniform'))
model.add(MaxPooling2D(padding='same'))
model.add(Flatten())
model.add(Dense(num_classes))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
metrics=['accuracy'])
print(model.summary())
return model
def train_model(model):
"""Train the simple convolution network"""
(train_x, train_y), (val_x, val_y), (test_x, test_y) = load_MINST(1000, 100, 100)
model.fit(train_x, train_y, validation_data=(val_x, val_y), epochs=5, verbose=2, batch_size=32)
scores = model.evaluate(test_x, test_y, verbose=1)
print(scores)
return model
def selective_search(image):
"""
Selective search for object detection
:param image: str, the path of image or image in ndarray type with 3 channels
:return list of bounding boxes, each element is in form of [x_min, y_min, x_max, y_max]
"""
if not image:
im = cv2.imread("./images/stapler1-test.png")
elif isinstance(image, str):
im = cv2.imread(image)
else:
im = np.stack(image * 3, axis=-1)
ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
ss.setBaseImage(im)
ss.switchToSelectiveSearchQuality()
rects = ss.process()
image_out = im.copy()
for rect in rects[:100]:
print(rect)
x, y, w, h = rect
cv2.rectangle(image_out, (x, y), (x + w, y + h), (0, 255, 0), 1, cv2.LINE_AA)
cv2.imshow("Output", image_out)
cv2.waitKey(0)
return rects
def pool_rois(feature_map, rois, pooled_height, pooled_width):
"""
Applies ROI pooling for a single image and various ROIs
:param feature_map: ndarray, in shape of (width, height, channel)
:param rois: list of roi
:param pooled_height: height of pooled area
:param pooled_width: width of pooled area
:return list of pooled features
"""
def curried_pool_roi(roi):
return pool_roi(feature_map, roi, pooled_height, pooled_width)
pooled_areas = list(map(curried_pool_roi, rois))
return pooled_areas
def pool_roi(feature_map, roi, pooled_height, pooled_width):
"""
Applies a single ROI pooling to a single image
:param feature_map: ndarray, in shape of (width, height, channel)
:param roi: region of interest, in form of [x_min_ratio, y_min_ratio, x_max_ratio, y_max_ratio]
:return feature of pooling output, in shape of (pooled_width, pooled_height)
"""
feature_map_height = int(feature_map.shape[0])
feature_map_width = int(feature_map.shape[1])
h_start = int(feature_map_height * roi[0])
w_start = int(feature_map_width * roi[1])
h_end = int(feature_map_height * roi[2])
w_end = int(feature_map_width * roi[3])
region = feature_map[h_start:h_end, w_start:w_end, :]
region_height = h_end - h_start
region_width = w_end - w_start
h_step = region_height // pooled_height
w_step = region_width // pooled_width
areas = [[(
i * h_step,
j * w_step,
(i + 1) * h_step if i + 1 < pooled_height else region_height,
(j + 1) * w_step if j + 1 < pooled_width else region_width)
for j in range(pooled_width)]
for i in range(pooled_height)]
def pool_area(x):
return np.max(region[x[0]:x[2], x[1]:x[3], :])
pooled_features = np.stack([[pool_area(x) for x in row] for row in areas])
return pooled_features