CoCalc -- Day 2 Synthetic Data Generation using Python.ipynb

GitHub Repository: suyashi29/python-su
Path: blob/master/Generative AI for Intelligent Data Handling/Day 2 Synthetic Data Generation using Python.ipynb
³⁰⁷⁴ views

Kernel: Python 3 (ipykernel)

Generate synthetic data using Python with the help of libraries like numpy and pandas, and then save it to a CSV file

In [1]:

import numpy as np
import pandas as pd

# Generate synthetic data
num_samples = 1000

# Example features: age, income, and number of purchases
age = np.random.randint(18, 80, size=num_samples)
income = np.random.randint(20000, 100000, size=num_samples)
num_purchases = np.random.randint(5, 100, size=num_samples)

# Combine the features into a DataFrame
data = pd.DataFrame({
    'Age': age,
    'Income': income,
    'Num_Purchases': num_purchases
})

# Save the synthetic data to a CSV file
data.to_csv('synthetic_data.csv', index=False)

print("Synthetic data has been generated and saved to synthetic_data.csv.")

Out[1]:

Synthetic data has been generated and saved to synthetic_data.csv.

pip install wordcloud --trusted-host pypi.org --trusted-host files.pythonhosted.org Faker

pip install wordcloud --trusted-host pypi.org --trusted-host files.pythonhosted.org  --upgrade pip

In [3]:

from faker import Faker
import pandas as pd

S1 = Faker()

# Generate synthetic data
samples = 1000

# Example features: name, address, email, and job title
data = {
    'Name': [S1.name() for _ in range(samples)],
    'Address': [S1.address() for _ in range(samples)],
    'Email': [S1.email() for _ in range(samples)],
    'Job_Title': [S1.job() for _ in range(samples)]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Save the synthetic data to a CSV file
df.to_csv('Emp_data.csv', index=False)

print("Synthetic data has been generated and saved to Emp_data.csv.")

Out[3]:

Synthetic data has been generated and saved to Emp_data.csv.

In [8]:

## List of functions in faker lib
function_list = dir(S1)

In [4]:

import numpy as np
import matplotlib.pyplot as plt

# Generate sample image data
image_data = np.random.rand(100, 100)  # Generating a random 100x100 image
plt.imshow(image_data, cmap='gray')
plt.axis('off')

# Save the image to a specified location
image_path = 'sample_image.png'
plt.savefig(image_path)
plt.close()

print("Sample image saved successfully.")

Out[4]:

Sample image saved successfully.

In [10]:

import matplotlib.pyplot as plt
import numpy as np

# Define a function to generate images of alphabet letters
def generate_alphabet_images():
    fig, ax = plt.subplots(5, 6, figsize=(10, 8))
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    idx = 0
    for i in range(5):
        for j in range(6):
            if idx >= 26:
                break
            ax[i, j].imshow(np.zeros((10, 10)), cmap='gray')  # Initialize with a blank image
            ax[i, j].axis('off')
            ax[i, j].text(2, 5, letters[idx], fontsize=10, color='white')
            idx += 1
    plt.tight_layout()
    plt.show()

# Generate and display alphabet images
generate_alphabet_images()

Out[10]:

In [11]:

import matplotlib.pyplot as plt
import numpy as np

# Define a function to generate images of alphabet letters with a white background
def generate_alphabet_images():
    fig, ax = plt.subplots(5, 6, figsize=(10, 8))
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    idx = 0
    for i in range(5):
        for j in range(6):
            if idx >= 26:
                break
            ax[i, j].imshow(np.ones((10, 10)), cmap='gray')  # Initialize with a white background
            ax[i, j].axis('off')
            ax[i, j].text(2, 5, letters[idx], fontsize=50, color='pink')
            idx += 1
    plt.tight_layout()
    plt.show()

# Generate and display alphabet images with a white background
generate_alphabet_images()

Out[11]:

In [14]:

import matplotlib.pyplot as plt
import numpy as np

# Define a function to generate images of alphabet letters with a black background
def generate_alphabet_images():
    fig, ax = plt.subplots(5, 6, figsize=(20, 20))  # Increase the size of the figure to fit the larger images
    letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
    idx = 0
    for i in range(5):
        for j in range(6):
            if idx >= 26:
                break
            ax[i, j].imshow(np.zeros((28, 28)), cmap='gray')  # Adjust the size of the image to match the background
            ax[i, j].set_facecolor('black')  # Set the background color to black
            ax[i, j].axis('off')
            ax[i, j].text(7, 14, letters[idx], fontsize=130, color='blue')  # Increase the font size of the text
            idx += 1
    plt.tight_layout()
    plt.show()

# Generate and display alphabet images with a black background and larger letters
generate_alphabet_images()

Out[14]:

pip install wordcloud --trusted-host pypi.org --trusted-host files.pythonhosted.org google_images_download

In [22]:

from faker import Faker
from google_images_download import google_images_download
import os

# Instantiate Faker
fake = Faker()

# Generate 5 fake animal names
animal_names = [fake.word(ext_word_list=["animal"]) for _ in range(5)]

# Download images of those animals
response = google_images_download.googleimagesdownload()
output_directory = "animal_images"
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

for animal in animal_names:
    arguments = {"keywords": animal, "limit": 1, "format": "jpg", "output_directory": output_directory}
    response.download(arguments)

print("Images downloaded and saved in the directory:", output_directory)

Out[22]:

Item no.: 1 --> Item name = animal
Evaluating...
Starting Download...

Errors: 0


Item no.: 1 --> Item name = animal
Evaluating...
Starting Download...

Errors: 0


Item no.: 1 --> Item name = animal
Evaluating...
Starting Download...

Errors: 0


Item no.: 1 --> Item name = animal
Evaluating...
Starting Download...

Errors: 0


Item no.: 1 --> Item name = animal
Evaluating...
Starting Download...

Errors: 0

Images downloaded and saved in the directory: animal_images

pip install wordcloud --trusted-host pypi.org --trusted-host files.pythonhosted.org pillow

In [ ]:

from PIL import Image
import os

output_directory = "animal_images"

# List files in the directory
files = os.listdir(output_directory)

# Display images
for file in files:
    image_path = os.path.join(output_directory, file)
    image = Image.open(image_path)
    image.show()

In [24]:

from faker import Faker
import pandas as pd
import random

# Instantiate Faker
fake = Faker()

# Define categories and sub-categories
categories = ['Electronics', 'Clothing', 'Home', 'Sports', 'Books']
sub_categories = {
    'Electronics': ['Smartphones', 'Laptops', 'Tablets', 'Headphones'],
    'Clothing': ['T-Shirts', 'Jeans', 'Dresses', 'Sweaters'],
    'Home': ['Furniture', 'Kitchen Appliances', 'Bedding', 'Home Decor'],
    'Sports': ['Running', 'Cycling', 'Fitness', 'Team Sports'],
    'Books': ['Fiction', 'Non-fiction', 'Biographies', 'Self-Help']
}

# Generate shopping data
data = []
for _ in range(500):
    category = random.choice(categories)
    sub_category = random.choice(sub_categories[category])
    item = fake.word().capitalize() + ' ' + sub_category
    price = round(random.uniform(10, 1000), 2)
    quantity = random.randint(1, 5)
    data.append([category, sub_category, item, price, quantity])

# Create DataFrame
columns = ['Category', 'Sub-Category', 'Item', 'Price', 'Quantity']
df = pd.DataFrame(data, columns=columns)

# Save DataFrame to CSV
df.to_csv('shopping_data.csv', index=False)

Generate synthetic data using Python with the help of libraries like numpy and pandas, and then save it to a CSV file

Product

Resources

Company