Path: blob/master/extra_keras_datasets/iris.py
153 views
"""1Import the Iris dataset2Source: http://archive.ics.uci.edu/ml/datasets/Iris3Description: The data set contains 3 classes of 50 instances each, where4each class refers to a type of iris plant.56~~~ Important note ~~~7Please cite the following paper when using or referencing the dataset:8Fisher,R.A. "The use of multiple measurements in taxonomic problems"9Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions10to Mathematical Statistics" (John Wiley, NY, 1950).11"""1213from tensorflow.keras.utils import get_file14import numpy as np15import math16import logging171819def warn_citation():20"""Warns about citation requirements21# Returns22Void23"""24logging.warning(("Please cite the following paper when using or"25" referencing this Extra Keras Dataset:"))26logging.warning(27("Fisher,R.A. \"The use of multiple measurements in taxonomic "28"problems\" Annual Eugenics, 7, Part II, 179-188 (1936); also "29"in \"Contributions to Mathematical Statistics\" (John Wiley"30", NY, 1950).")31)323334def load_data(path="iris.npz", test_split=0.2):35"""Loads the Iris dataset.36# Arguments37path: path where to cache the dataset locally38(relative to ~/.keras/datasets).39test_split: percentage of data to use for testing (by default 20%)40# Returns41Tuple of Numpy arrays: `(input_train, target_train),42(input_test, target_test)`.43Input structure: (sepal length, sepal width, petal length,44petal width)45Target structure: 0 = iris setosa; 1 = iris versicolor;462 = iris virginica.47"""48# Log about loading49logging.basicConfig(level=logging.INFO)50logging.info('Loading dataset = iris')5152# Load data53path = get_file(54path,55origin=("http://archive.ics.uci.edu/ml/machine-learning-databases/"56"iris/iris.data")57)5859# Read data from file60f = open(path, "r")61lines = f.readlines()6263# Process each line into input/target structure64samples = []65for line in lines:66sample = line_to_list(line)67if sample is not None:68samples.append(sample)69f.close()7071# Randomly shuffle the data72np.random.shuffle(samples)7374# Compute test_split in length75num_test_samples = math.floor(len(samples) * test_split)7677# Split data78training_data = samples[num_test_samples:]79testing_data = samples[:num_test_samples]8081# Split into inputs and targets82input_train = np.array([i[0:4] for i in training_data])83input_test = np.array([i[0:4] for i in testing_data])84target_train = np.array([i[4] for i in training_data])85target_test = np.array([i[4] for i in testing_data])8687# Warn about citation88warn_citation()8990# Return data91return (input_train, target_train), (input_test, target_test)929394def line_to_list(line):95"""96Convert a String-based line into a list with input and target data.97"""98elements = line.split(",")99if len(elements) > 1:100target = target_string_to_int(elements[4])101full_sample = [float(i) for i in elements[0:4]]102full_sample.append(target)103return tuple(full_sample)104else:105return None106107108def target_string_to_int(target_value):109"""110Convert a String-based into an Integer-based target value.111"""112if target_value == "Iris-setosa\n":113return 0114elif target_value == "Iris-versicolor\n":115return 1116else:117return 2118119120