Path: blob/master/site/zh-cn/hub/tutorials/tf2_image_retraining.ipynb
25118 views
Copyright 2021 The TensorFlow Hub Authors.
Licensed under the Apache License, Version 2.0 (the "License");
# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==============================================================================
重新训练图像分类器
简介
图像分类模型有数百万个参数。从头训练需要大量带标签的训练数据和强大的算力。迁移学习会在新模型中重复使用已在相关任务上训练过的模型的一部分,可以显著降低这些需求。
此 Colab 演示了对于图像特征提取,如何使用 TensorFlow Hub 中的预训练 TF2 SavedModel(已在更庞大和更通用的 ImageNet 数据集上进行过训练),为五种花的分类构建 Keras 模型。您可以选择性地将特征提取器与新添加的分类器一起训练(“微调”)。
想要寻找替代工具?
这是 TensorFlow 编码教程。如果您只是希望找一款为其构建 TensorFlow 或 TF Lite 模型的工具,请查看通过 PIP 软件包 tensorflow-hub[make_image_classifier]
安装的 make_image_classifier 命令行工具,或查看此 TF Lite Colab。
设置
import itertools import os import matplotlib.pylab as plt import numpy as np import tensorflow as tf import tensorflow_hub as hub print("TF version:", tf.__version__) print("Hub version:", hub.__version__) print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")
选择要使用的 TF2 SavedModel 模块
对于初学者,请使用 https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4。在代码中使用同一网址可以识别 SavedModel,在浏览器中访问该网址可以显示其文档。(请注意,TF1 Hub 格式的模型在此处无效。)
You can find more TF2 models that generate image feature vectors here.
您可以尝试多个模型。您只需在以下单元上选择不同的模型,然后按照笔记本操作即可。
#@title model_name = "efficientnetv2-xl-21k" # @param ['efficientnetv2-s', 'efficientnetv2-m', 'efficientnetv2-l', 'efficientnetv2-s-21k', 'efficientnetv2-m-21k', 'efficientnetv2-l-21k', 'efficientnetv2-xl-21k', 'efficientnetv2-b0-21k', 'efficientnetv2-b1-21k', 'efficientnetv2-b2-21k', 'efficientnetv2-b3-21k', 'efficientnetv2-s-21k-ft1k', 'efficientnetv2-m-21k-ft1k', 'efficientnetv2-l-21k-ft1k', 'efficientnetv2-xl-21k-ft1k', 'efficientnetv2-b0-21k-ft1k', 'efficientnetv2-b1-21k-ft1k', 'efficientnetv2-b2-21k-ft1k', 'efficientnetv2-b3-21k-ft1k', 'efficientnetv2-b0', 'efficientnetv2-b1', 'efficientnetv2-b2', 'efficientnetv2-b3', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'bit_s-r50x1', 'inception_v3', 'inception_resnet_v2', 'resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'nasnet_large', 'nasnet_mobile', 'pnasnet_large', 'mobilenet_v2_100_224', 'mobilenet_v2_130_224', 'mobilenet_v2_140_224', 'mobilenet_v3_small_100_224', 'mobilenet_v3_small_075_224', 'mobilenet_v3_large_100_224', 'mobilenet_v3_large_075_224'] model_handle_map = { "efficientnetv2-s": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/feature_vector/2", "efficientnetv2-m": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_m/feature_vector/2", "efficientnetv2-l": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_l/feature_vector/2", "efficientnetv2-s-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/feature_vector/2", "efficientnetv2-m-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_m/feature_vector/2", "efficientnetv2-l-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/feature_vector/2", "efficientnetv2-xl-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_xl/feature_vector/2", "efficientnetv2-b0-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b0/feature_vector/2", "efficientnetv2-b1-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b1/feature_vector/2", "efficientnetv2-b2-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b2/feature_vector/2", "efficientnetv2-b3-21k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b3/feature_vector/2", "efficientnetv2-s-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/feature_vector/2", "efficientnetv2-m-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/feature_vector/2", "efficientnetv2-l-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/feature_vector/2", "efficientnetv2-xl-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2", "efficientnetv2-b0-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b0/feature_vector/2", "efficientnetv2-b1-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b1/feature_vector/2", "efficientnetv2-b2-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b2/feature_vector/2", "efficientnetv2-b3-21k-ft1k": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b3/feature_vector/2", "efficientnetv2-b0": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/feature_vector/2", "efficientnetv2-b1": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b1/feature_vector/2", "efficientnetv2-b2": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b2/feature_vector/2", "efficientnetv2-b3": "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b3/feature_vector/2", "efficientnet_b0": "https://tfhub.dev/tensorflow/efficientnet/b0/feature-vector/1", "efficientnet_b1": "https://tfhub.dev/tensorflow/efficientnet/b1/feature-vector/1", "efficientnet_b2": "https://tfhub.dev/tensorflow/efficientnet/b2/feature-vector/1", "efficientnet_b3": "https://tfhub.dev/tensorflow/efficientnet/b3/feature-vector/1", "efficientnet_b4": "https://tfhub.dev/tensorflow/efficientnet/b4/feature-vector/1", "efficientnet_b5": "https://tfhub.dev/tensorflow/efficientnet/b5/feature-vector/1", "efficientnet_b6": "https://tfhub.dev/tensorflow/efficientnet/b6/feature-vector/1", "efficientnet_b7": "https://tfhub.dev/tensorflow/efficientnet/b7/feature-vector/1", "bit_s-r50x1": "https://tfhub.dev/google/bit/s-r50x1/1", "inception_v3": "https://tfhub.dev/google/imagenet/inception_v3/feature-vector/4", "inception_resnet_v2": "https://tfhub.dev/google/imagenet/inception_resnet_v2/feature-vector/4", "resnet_v1_50": "https://tfhub.dev/google/imagenet/resnet_v1_50/feature-vector/4", "resnet_v1_101": "https://tfhub.dev/google/imagenet/resnet_v1_101/feature-vector/4", "resnet_v1_152": "https://tfhub.dev/google/imagenet/resnet_v1_152/feature-vector/4", "resnet_v2_50": "https://tfhub.dev/google/imagenet/resnet_v2_50/feature-vector/4", "resnet_v2_101": "https://tfhub.dev/google/imagenet/resnet_v2_101/feature-vector/4", "resnet_v2_152": "https://tfhub.dev/google/imagenet/resnet_v2_152/feature-vector/4", "nasnet_large": "https://tfhub.dev/google/imagenet/nasnet_large/feature_vector/4", "nasnet_mobile": "https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4", "pnasnet_large": "https://tfhub.dev/google/imagenet/pnasnet_large/feature_vector/4", "mobilenet_v2_100_224": "https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4", "mobilenet_v2_130_224": "https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/feature_vector/4", "mobilenet_v2_140_224": "https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4", "mobilenet_v3_small_100_224": "https://tfhub.dev/google/imagenet/mobilenet_v3_small_100_224/feature_vector/5", "mobilenet_v3_small_075_224": "https://tfhub.dev/google/imagenet/mobilenet_v3_small_075_224/feature_vector/5", "mobilenet_v3_large_100_224": "https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5", "mobilenet_v3_large_075_224": "https://tfhub.dev/google/imagenet/mobilenet_v3_large_075_224/feature_vector/5", } model_image_size_map = { "efficientnetv2-s": 384, "efficientnetv2-m": 480, "efficientnetv2-l": 480, "efficientnetv2-b0": 224, "efficientnetv2-b1": 240, "efficientnetv2-b2": 260, "efficientnetv2-b3": 300, "efficientnetv2-s-21k": 384, "efficientnetv2-m-21k": 480, "efficientnetv2-l-21k": 480, "efficientnetv2-xl-21k": 512, "efficientnetv2-b0-21k": 224, "efficientnetv2-b1-21k": 240, "efficientnetv2-b2-21k": 260, "efficientnetv2-b3-21k": 300, "efficientnetv2-s-21k-ft1k": 384, "efficientnetv2-m-21k-ft1k": 480, "efficientnetv2-l-21k-ft1k": 480, "efficientnetv2-xl-21k-ft1k": 512, "efficientnetv2-b0-21k-ft1k": 224, "efficientnetv2-b1-21k-ft1k": 240, "efficientnetv2-b2-21k-ft1k": 260, "efficientnetv2-b3-21k-ft1k": 300, "efficientnet_b0": 224, "efficientnet_b1": 240, "efficientnet_b2": 260, "efficientnet_b3": 300, "efficientnet_b4": 380, "efficientnet_b5": 456, "efficientnet_b6": 528, "efficientnet_b7": 600, "inception_v3": 299, "inception_resnet_v2": 299, "nasnet_large": 331, "pnasnet_large": 331, } model_handle = model_handle_map.get(model_name) pixels = model_image_size_map.get(model_name, 224) print(f"Selected model: {model_name} : {model_handle}") IMAGE_SIZE = (pixels, pixels) print(f"Input size {IMAGE_SIZE}") BATCH_SIZE = 16#@param {type:"integer"}
设置花数据集
输入根据所选模块适当调整大小。数据集扩充(即每次读取图像时的随机畸变)可改善训练,特别是在微调时。
data_dir = tf.keras.utils.get_file( 'flower_photos', 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz', untar=True)
def build_dataset(subset): return tf.keras.preprocessing.image_dataset_from_directory( data_dir, validation_split=.20, subset=subset, label_mode="categorical", # Seed needs to provided when using validation_split and shuffle = True. # A fixed seed is used so that the validation set is stable across runs. seed=123, image_size=IMAGE_SIZE, batch_size=1) train_ds = build_dataset("training") class_names = tuple(train_ds.class_names) train_size = train_ds.cardinality().numpy() train_ds = train_ds.unbatch().batch(BATCH_SIZE) train_ds = train_ds.repeat() normalization_layer = tf.keras.layers.Rescaling(1. / 255) preprocessing_model = tf.keras.Sequential([normalization_layer]) do_data_augmentation = False #@param {type:"boolean"} if do_data_augmentation: preprocessing_model.add( tf.keras.layers.RandomRotation(40)) preprocessing_model.add( tf.keras.layers.RandomTranslation(0, 0.2)) preprocessing_model.add( tf.keras.layers.RandomTranslation(0.2, 0)) # Like the old tf.keras.preprocessing.image.ImageDataGenerator(), # image sizes are fixed when reading, and then a random zoom is applied. # If all training inputs are larger than image_size, one could also use # RandomCrop with a batch size of 1 and rebatch later. preprocessing_model.add( tf.keras.layers.RandomZoom(0.2, 0.2)) preprocessing_model.add( tf.keras.layers.RandomFlip(mode="horizontal")) train_ds = train_ds.map(lambda images, labels: (preprocessing_model(images), labels)) val_ds = build_dataset("validation") valid_size = val_ds.cardinality().numpy() val_ds = val_ds.unbatch().batch(BATCH_SIZE) val_ds = val_ds.map(lambda images, labels: (normalization_layer(images), labels))
定义模型
您需要做的就是使用 Hub 模块将一个线性分类器放在 feature_extractor_layer
的顶部。
为了提高速度,我们先从不可训练的 feature_extractor_layer
开始,但是,为了提高准确率,您也可以启用微调。
do_fine_tuning = False #@param {type:"boolean"}
print("Building model with", model_handle) model = tf.keras.Sequential([ # Explicitly define the input shape so the model can be properly # loaded by the TFLiteConverter tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE + (3,)), hub.KerasLayer(model_handle, trainable=do_fine_tuning), tf.keras.layers.Dropout(rate=0.2), tf.keras.layers.Dense(len(class_names), kernel_regularizer=tf.keras.regularizers.l2(0.0001)) ]) model.build((None,)+IMAGE_SIZE+(3,)) model.summary()
训练模型
model.compile( optimizer=tf.keras.optimizers.SGD(learning_rate=0.005, momentum=0.9), loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1), metrics=['accuracy'])
steps_per_epoch = train_size // BATCH_SIZE validation_steps = valid_size // BATCH_SIZE hist = model.fit( train_ds, epochs=5, steps_per_epoch=steps_per_epoch, validation_data=val_ds, validation_steps=validation_steps).history
plt.figure() plt.ylabel("Loss (training and validation)") plt.xlabel("Training Steps") plt.ylim([0,2]) plt.plot(hist["loss"]) plt.plot(hist["val_loss"]) plt.figure() plt.ylabel("Accuracy (training and validation)") plt.xlabel("Training Steps") plt.ylim([0,1]) plt.plot(hist["accuracy"]) plt.plot(hist["val_accuracy"])
在验证数据中的图像上尝试模型:
x, y = next(iter(val_ds)) image = x[0, :, :, :] true_index = np.argmax(y[0]) plt.imshow(image) plt.axis('off') plt.show() # Expand the validation image to (1, 224, 224, 3) before predicting the label prediction_scores = model.predict(np.expand_dims(image, axis=0)) predicted_index = np.argmax(prediction_scores) print("True label: " + class_names[true_index]) print("Predicted label: " + class_names[predicted_index])
最后,您可以保存训练的模型,以便部署到 TF Serving 或 TF Lite(在移动设备上),如下所示。
saved_model_path = f"/tmp/saved_flowers_model_{model_name}" tf.saved_model.save(model, saved_model_path)
可选:部署到 TensorFlow Lite
TensorFlow Lite 可让您将 TensorFlow 模型部署到移动和 IoT 设备上。下面的代码演示了如何将训练的模型转化为 TF Lite 以及应用 TensorFlow Model Optimization Toolkit 中的训练后工具。最后,它会在 TF Lite Interpreter 中运行模型,以检查结果质量。
未使用优化的转换结果与之前相同(由舍入误差决定)。
进行了优化但未使用任何数据的转换会将模型权重量化为 8 位,但是,神经网络激活的推理仍使用浮点计算。这会将模型大小减小为约四分之一,同时可以改善移动设备上的 CPU 延迟。
最重要的是,如果提供一个小参考数据集来校准量化范围,神经网络激活的计算也可以量化为 8 位整数。在移动设备上,这会进一步加快推断速度,并使其可以在 EdgeTPU 之类的加速器上运行。
#@title Optimization settings optimize_lite_model = False #@param {type:"boolean"} #@markdown Setting a value greater than zero enables quantization of neural network activations. A few dozen is already a useful amount. num_calibration_examples = 60 #@param {type:"slider", min:0, max:1000, step:1} representative_dataset = None if optimize_lite_model and num_calibration_examples: # Use a bounded number of training examples without labels for calibration. # TFLiteConverter expects a list of input tensors, each with batch size 1. representative_dataset = lambda: itertools.islice( ([image[None, ...]] for batch, _ in train_ds for image in batch), num_calibration_examples) converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_path) if optimize_lite_model: converter.optimizations = [tf.lite.Optimize.DEFAULT] if representative_dataset: # This is optional, see above. converter.representative_dataset = representative_dataset lite_model_content = converter.convert() with open(f"/tmp/lite_flowers_model_{model_name}.tflite", "wb") as f: f.write(lite_model_content) print("Wrote %sTFLite model of %d bytes." % ("optimized " if optimize_lite_model else "", len(lite_model_content)))
interpreter = tf.lite.Interpreter(model_content=lite_model_content) # This little helper wraps the TFLite Interpreter as a numpy-to-numpy function. def lite_model(images): interpreter.allocate_tensors() interpreter.set_tensor(interpreter.get_input_details()[0]['index'], images) interpreter.invoke() return interpreter.get_tensor(interpreter.get_output_details()[0]['index'])
#@markdown For rapid experimentation, start with a moderate number of examples. num_eval_examples = 50 #@param {type:"slider", min:0, max:700} eval_dataset = ((image, label) # TFLite expects batch size 1. for batch in train_ds for (image, label) in zip(*batch)) count = 0 count_lite_tf_agree = 0 count_lite_correct = 0 for image, label in eval_dataset: probs_lite = lite_model(image[None, ...])[0] probs_tf = model(image[None, ...]).numpy()[0] y_lite = np.argmax(probs_lite) y_tf = np.argmax(probs_tf) y_true = np.argmax(label) count +=1 if y_lite == y_tf: count_lite_tf_agree += 1 if y_lite == y_true: count_lite_correct += 1 if count >= num_eval_examples: break print("TFLite model agrees with original model on %d of %d examples (%g%%)." % (count_lite_tf_agree, count, 100.0 * count_lite_tf_agree / count)) print("TFLite model is accurate on %d of %d examples (%g%%)." % (count_lite_correct, count, 100.0 * count_lite_correct / count))