Path: blob/master/site/ko/hub/tutorials/image_classification.ipynb
25118 views
Copyright 2021 The TensorFlow Hub Authors.
Licensed under the Apache License, Version 2.0 (the "License");
# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==============================================================================
TensorFlow Hub를 사용한 이미지 분류
이 colab에서는 TensorFlow Hub의 여러 이미지 분류 모델을 시도하고 사용 사례에 가장 적합한 모델을 결정합니다.
TF Hub는 이미지에서 작동하는 모델에 대한 일관된 입력 규칙을 권장하기 때문에 다양한 아키텍처를 실험하여 요구 사항에 가장 적합한 아키텍처를 쉽게 찾을 수 있습니다.
import tensorflow as tf import tensorflow_hub as hub import requests from PIL import Image from io import BytesIO import matplotlib.pyplot as plt import numpy as np
#@title Helper functions for loading image (hidden) original_image_cache = {} def preprocess_image(image): image = np.array(image) # reshape into shape [batch_size, height, width, num_channels] img_reshaped = tf.reshape(image, [1, image.shape[0], image.shape[1], image.shape[2]]) # Use `convert_image_dtype` to convert to floats in the [0,1] range. image = tf.image.convert_image_dtype(img_reshaped, tf.float32) return image def load_image_from_url(img_url): """Returns an image with shape [1, height, width, num_channels].""" user_agent = {'User-agent': 'Colab Sample (https://tensorflow.org)'} response = requests.get(img_url, headers=user_agent) image = Image.open(BytesIO(response.content)) image = preprocess_image(image) return image def load_image(image_url, image_size=256, dynamic_size=False, max_dynamic_size=512): """Loads and preprocesses images.""" # Cache image file locally. if image_url in original_image_cache: img = original_image_cache[image_url] elif image_url.startswith('https://'): img = load_image_from_url(image_url) else: fd = tf.io.gfile.GFile(image_url, 'rb') img = preprocess_image(Image.open(fd)) original_image_cache[image_url] = img # Load and convert to float32 numpy array, add batch dimension, and normalize to range [0, 1]. img_raw = img if tf.reduce_max(img) > 1.0: img = img / 255. if len(img.shape) == 3: img = tf.stack([img, img, img], axis=-1) if not dynamic_size: img = tf.image.resize_with_pad(img, image_size, image_size) elif img.shape[1] > max_dynamic_size or img.shape[2] > max_dynamic_size: img = tf.image.resize_with_pad(img, max_dynamic_size, max_dynamic_size) return img, img_raw def show_image(image, title=''): image_size = image.shape[1] w = (image_size * 6) // 320 plt.figure(figsize=(w, w)) plt.imshow(image[0], aspect='equal') plt.axis('off') plt.title(title) plt.show()
이미지 분류 모델을 선택합니다. 그러면 일부 내부 변수가 설정되고 레이블 파일이 다운로드되어 사용할 준비가 됩니다.
다른 입력 크기, 모델 크기, 정확도 및 추론 시간과 같이 모델 간에는 몇 가지 기술적인 차이가 있습니다. 여기에서 사용 사례에 가장 적합한 모델을 찾을 때까지 사용 중인 모델을 변경할 수 있습니다.
사용자의 편의를 위해 모델의 핸들(url)이 인쇄되어 있습니다. 여기에서 각 모델에 대한 추가 문서를 사용할 수 있습니다.
참고: 이 모든 모델은 ImageNet 데이터세트에서 훈련되었습니다.
#@title Select an Image Classification model image_size = 224 dynamic_size = False model_name = "efficientnetv2-s" # @param ['efficientnetv2-s', 'efficientnetv2-m', 'efficientnetv2-l', 'efficientnetv2-s-21k', 'efficientnetv2-m-21k', 'efficientnetv2-l-21k', 'efficientnetv2-xl-21k', 'efficientnetv2-b0-21k', 'efficientnetv2-b1-21k', 'efficientnetv2-b2-21k', 'efficientnetv2-b3-21k', 'efficientnetv2-s-21k-ft1k', 'efficientnetv2-m-21k-ft1k', 'efficientnetv2-l-21k-ft1k', 'efficientnetv2-xl-21k-ft1k', 'efficientnetv2-b0-21k-ft1k', 'efficientnetv2-b1-21k-ft1k', 'efficientnetv2-b2-21k-ft1k', 'efficientnetv2-b3-21k-ft1k', 'efficientnetv2-b0', 'efficientnetv2-b1', 'efficientnetv2-b2', 'efficientnetv2-b3', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'bit_s-r50x1', 'inception_v3', 'inception_resnet_v2', 'resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'nasnet_large', 'nasnet_mobile', 'pnasnet_large', 'mobilenet_v2_100_224', 'mobilenet_v2_130_224', 'mobilenet_v2_140_224', 'mobilenet_v3_small_100_224', 'mobilenet_v3_small_075_224', 'mobilenet_v3_large_100_224', 'mobilenet_v3_large_075_224'] model_handle_map = { "efficientnetv2-s": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/classification/2", "efficientnetv2-m": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_m/classification/2", "efficientnetv2-l": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_l/classification/2", "efficientnetv2-s-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/classification/2", "efficientnetv2-m-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_m/classification/2", "efficientnetv2-l-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/classification/2", "efficientnetv2-xl-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_xl/classification/2", "efficientnetv2-b0-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b0/classification/2", "efficientnetv2-b1-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b1/classification/2", "efficientnetv2-b2-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b2/classification/2", "efficientnetv2-b3-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b3/classification/2", "efficientnetv2-s-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/classification/2", "efficientnetv2-m-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/classification/2", "efficientnetv2-l-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/classification/2", "efficientnetv2-xl-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/classification/2", "efficientnetv2-b0-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b0/classification/2", "efficientnetv2-b1-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b1/classification/2", "efficientnetv2-b2-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b2/classification/2", "efficientnetv2-b3-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b3/classification/2", "efficientnetv2-b0": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/classification/2", "efficientnetv2-b1": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b1/classification/2", "efficientnetv2-b2": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b2/classification/2", "efficientnetv2-b3": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b3/classification/2", "efficientnet_b0": "https://tfhub.dev/tensorflow/efficientnet/b0/classification/1", "efficientnet_b1": "https://tfhub.dev/tensorflow/efficientnet/b1/classification/1", "efficientnet_b2": "https://tfhub.dev/tensorflow/efficientnet/b2/classification/1", "efficientnet_b3": "https://tfhub.dev/tensorflow/efficientnet/b3/classification/1", "efficientnet_b4": "https://tfhub.dev/tensorflow/efficientnet/b4/classification/1", "efficientnet_b5": "https://tfhub.dev/tensorflow/efficientnet/b5/classification/1", "efficientnet_b6": "https://tfhub.dev/tensorflow/efficientnet/b6/classification/1", "efficientnet_b7": "https://tfhub.dev/tensorflow/efficientnet/b7/classification/1", "bit_s-r50x1": "https://tfhub.dev/google/bit/s-r50x1/ilsvrc2012_classification/1", "inception_v3": "https://tfhub.dev/google/imagenet/inception_v3/classification/4", "inception_resnet_v2": "https://tfhub.dev/google/imagenet/inception_resnet_v2/classification/4", "resnet_v1_50": "https://tfhub.dev/google/imagenet/resnet_v1_50/classification/4", "resnet_v1_101": "https://tfhub.dev/google/imagenet/resnet_v1_101/classification/4", "resnet_v1_152": "https://tfhub.dev/google/imagenet/resnet_v1_152/classification/4", "resnet_v2_50": "https://tfhub.dev/google/imagenet/resnet_v2_50/classification/4", "resnet_v2_101": "https://tfhub.dev/google/imagenet/resnet_v2_101/classification/4", "resnet_v2_152": "https://tfhub.dev/google/imagenet/resnet_v2_152/classification/4", "nasnet_large": "https://tfhub.dev/google/imagenet/nasnet_large/classification/4", "nasnet_mobile": "https://tfhub.dev/google/imagenet/nasnet_mobile/classification/4", "pnasnet_large": "https://tfhub.dev/google/imagenet/pnasnet_large/classification/4", "mobilenet_v2_100_224": "https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/classification/4", "mobilenet_v2_130_224": "https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/classification/4", "mobilenet_v2_140_224": "https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/classification/4", "mobilenet_v3_small_100_224": "https://tfhub.dev/google/imagenet/mobilenet_v3_small_100_224/classification/5", "mobilenet_v3_small_075_224": "https://tfhub.dev/google/imagenet/mobilenet_v3_small_075_224/classification/5", "mobilenet_v3_large_100_224": "https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/classification/5", "mobilenet_v3_large_075_224": "https://tfhub.dev/google/imagenet/mobilenet_v3_large_075_224/classification/5", } model_image_size_map = { "efficientnetv2-s": 384, "efficientnetv2-m": 480, "efficientnetv2-l": 480, "efficientnetv2-b0": 224, "efficientnetv2-b1": 240, "efficientnetv2-b2": 260, "efficientnetv2-b3": 300, "efficientnetv2-s-21k": 384, "efficientnetv2-m-21k": 480, "efficientnetv2-l-21k": 480, "efficientnetv2-xl-21k": 512, "efficientnetv2-b0-21k": 224, "efficientnetv2-b1-21k": 240, "efficientnetv2-b2-21k": 260, "efficientnetv2-b3-21k": 300, "efficientnetv2-s-21k-ft1k": 384, "efficientnetv2-m-21k-ft1k": 480, "efficientnetv2-l-21k-ft1k": 480, "efficientnetv2-xl-21k-ft1k": 512, "efficientnetv2-b0-21k-ft1k": 224, "efficientnetv2-b1-21k-ft1k": 240, "efficientnetv2-b2-21k-ft1k": 260, "efficientnetv2-b3-21k-ft1k": 300, "efficientnet_b0": 224, "efficientnet_b1": 240, "efficientnet_b2": 260, "efficientnet_b3": 300, "efficientnet_b4": 380, "efficientnet_b5": 456, "efficientnet_b6": 528, "efficientnet_b7": 600, "inception_v3": 299, "inception_resnet_v2": 299, "mobilenet_v2_100_224": 224, "mobilenet_v2_130_224": 224, "mobilenet_v2_140_224": 224, "nasnet_large": 331, "nasnet_mobile": 224, "pnasnet_large": 331, "resnet_v1_50": 224, "resnet_v1_101": 224, "resnet_v1_152": 224, "resnet_v2_50": 224, "resnet_v2_101": 224, "resnet_v2_152": 224, "mobilenet_v3_small_100_224": 224, "mobilenet_v3_small_075_224": 224, "mobilenet_v3_large_100_224": 224, "mobilenet_v3_large_075_224": 224, } model_handle = model_handle_map[model_name] print(f"Selected model: {model_name} : {model_handle}") max_dynamic_size = 512 if model_name in model_image_size_map: image_size = model_image_size_map[model_name] dynamic_size = False print(f"Images will be converted to {image_size}x{image_size}") else: dynamic_size = True print(f"Images will be capped to a max size of {max_dynamic_size}x{max_dynamic_size}") labels_file = "https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt" #download labels and creates a maps downloaded_file = tf.keras.utils.get_file("labels.txt", origin=labels_file) classes = [] with open(downloaded_file) as f: labels = f.readlines() classes = [l.strip() for l in labels]
아래 이미지 중 하나를 선택하거나 가지고 있는 이미지를 사용할 수 있습니다. 모델의 입력 크기는 다양하며 일부 모델은 동적 입력 크기를 사용(축소되지 않은 이미지에 대한 추론 가능)한다는 점을 기억하십시오. 이를 감안할 때 load_image
메서드는 이미 이미지의 크기를 예상 형식에 맞게 다시 조정합니다.
#@title Select an Input Image image_name = "turtle" # @param ['tiger', 'bus', 'car', 'cat', 'dog', 'apple', 'banana', 'turtle', 'flamingo', 'piano', 'honeycomb', 'teapot'] images_for_test_map = { "tiger": "https://upload.wikimedia.org/wikipedia/commons/b/b0/Bengal_tiger_%28Panthera_tigris_tigris%29_female_3_crop.jpg", #by Charles James Sharp, CC BY-SA 4.0 <https://creativecommons.org/licenses/by-sa/4.0>, via Wikimedia Commons "bus": "https://upload.wikimedia.org/wikipedia/commons/6/63/LT_471_%28LTZ_1471%29_Arriva_London_New_Routemaster_%2819522859218%29.jpg", #by Martin49 from London, England, CC BY 2.0 <https://creativecommons.org/licenses/by/2.0>, via Wikimedia Commons "car": "https://upload.wikimedia.org/wikipedia/commons/4/49/2013-2016_Toyota_Corolla_%28ZRE172R%29_SX_sedan_%282018-09-17%29_01.jpg", #by EurovisionNim, CC BY-SA 4.0 <https://creativecommons.org/licenses/by-sa/4.0>, via Wikimedia Commons "cat": "https://upload.wikimedia.org/wikipedia/commons/4/4d/Cat_November_2010-1a.jpg", #by Alvesgaspar, CC BY-SA 3.0 <https://creativecommons.org/licenses/by-sa/3.0>, via Wikimedia Commons "dog": "https://upload.wikimedia.org/wikipedia/commons/archive/a/a9/20090914031557%21Saluki_dog_breed.jpg", #by Craig Pemberton, CC BY-SA 3.0 <https://creativecommons.org/licenses/by-sa/3.0>, via Wikimedia Commons "apple": "https://upload.wikimedia.org/wikipedia/commons/1/15/Red_Apple.jpg", #by Abhijit Tembhekar from Mumbai, India, CC BY 2.0 <https://creativecommons.org/licenses/by/2.0>, via Wikimedia Commons "banana": "https://upload.wikimedia.org/wikipedia/commons/1/1c/Bananas_white_background.jpg", #by fir0002 flagstaffotos [at] gmail.com Canon 20D + Tamron 28-75mm f/2.8, GFDL 1.2 <http://www.gnu.org/licenses/old-licenses/fdl-1.2.html>, via Wikimedia Commons "turtle": "https://upload.wikimedia.org/wikipedia/commons/8/80/Turtle_golfina_escobilla_oaxaca_mexico_claudio_giovenzana_2010.jpg", #by Claudio Giovenzana, CC BY-SA 3.0 <https://creativecommons.org/licenses/by-sa/3.0>, via Wikimedia Commons "flamingo": "https://upload.wikimedia.org/wikipedia/commons/b/b8/James_Flamingos_MC.jpg", #by Christian Mehlführer, User:Chmehl, CC BY 3.0 <https://creativecommons.org/licenses/by/3.0>, via Wikimedia Commons "piano": "https://upload.wikimedia.org/wikipedia/commons/d/da/Steinway_%26_Sons_upright_piano%2C_model_K-132%2C_manufactured_at_Steinway%27s_factory_in_Hamburg%2C_Germany.png", #by "Photo: © Copyright Steinway & Sons", CC BY-SA 3.0 <https://creativecommons.org/licenses/by-sa/3.0>, via Wikimedia Commons "honeycomb": "https://upload.wikimedia.org/wikipedia/commons/f/f7/Honey_comb.jpg", #by Merdal, CC BY-SA 3.0 <http://creativecommons.org/licenses/by-sa/3.0/>, via Wikimedia Commons "teapot": "https://upload.wikimedia.org/wikipedia/commons/4/44/Black_tea_pot_cropped.jpg", #by Mendhak, CC BY-SA 2.0 <https://creativecommons.org/licenses/by-sa/2.0>, via Wikimedia Commons } img_url = images_for_test_map[image_name] image, original_image = load_image(img_url, image_size, dynamic_size, max_dynamic_size) show_image(image, 'Scaled image')
이제 모델이 선택되었으므로 TensorFlow Hub를 사용하여 모델을 로드하는 것은 간단합니다.
이것은 또한 무작위 입력이 있는 모델을 "워밍업" 실행으로 호출합니다. 종속 호출은 종종 훨씬 더 빠르며 이를 아래의 대기 시간과 비교할 수 있습니다.
참고: 동적 크기를 사용하는 모델은 각 이미지 크기에 대해 새로운 "워밍업" 실행이 필요할 수 있습니다.
classifier = hub.load(model_handle) input_shape = image.shape warmup_input = tf.random.uniform(input_shape, 0, 1.0) %time warmup_logits = classifier(warmup_input).numpy()
모든 것을 추론할 준비가 되었습니다. 여기에서 선택한 이미지에 대한 모델의 상위 5개 결과를 볼 수 있습니다.
# Run model on image %time probabilities = tf.nn.softmax(classifier(image)).numpy() top_5 = tf.argsort(probabilities, axis=-1, direction="DESCENDING")[0][:5].numpy() np_classes = np.array(classes) # Some models include an additional 'background' class in the predictions, so # we must account for this when reading the class labels. includes_background_class = probabilities.shape[1] == 1001 for i, item in enumerate(top_5): class_index = item if includes_background_class else item + 1 line = f'({i+1}) {class_index:4} - {classes[class_index]}: {probabilities[0][top_5][i]}' print(line) show_image(image, '')
자세히 알아보기
더 자세히 알아보고 이러한 모델로 전이 학습을 수행하는 방법을 시도하려면 이미지 분류를 위한 전이 학습 튜토리얼을 시도해 보십시오.
더 많은 이미지 모델을 확인하려면 tfhub.dev에서 확인할 수 있습니다.