Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
suyashi29
GitHub Repository: suyashi29/python-su
Path: blob/master/Generative AI for Intelligent Data Handling/Day 2 Synthetic Data Generation using Python.ipynb
3074 views
Kernel: Python 3 (ipykernel)

Generate synthetic data using Python with the help of libraries like numpy and pandas, and then save it to a CSV file

import numpy as np import pandas as pd # Generate synthetic data num_samples = 1000 # Example features: age, income, and number of purchases age = np.random.randint(18, 80, size=num_samples) income = np.random.randint(20000, 100000, size=num_samples) num_purchases = np.random.randint(5, 100, size=num_samples) # Combine the features into a DataFrame data = pd.DataFrame({ 'Age': age, 'Income': income, 'Num_Purchases': num_purchases }) # Save the synthetic data to a CSV file data.to_csv('synthetic_data.csv', index=False) print("Synthetic data has been generated and saved to synthetic_data.csv.")
Synthetic data has been generated and saved to synthetic_data.csv.
pip install wordcloud --trusted-host pypi.org --trusted-host files.pythonhosted.org Faker
pip install wordcloud --trusted-host pypi.org --trusted-host files.pythonhosted.org --upgrade pip
from faker import Faker import pandas as pd S1 = Faker() # Generate synthetic data samples = 1000 # Example features: name, address, email, and job title data = { 'Name': [S1.name() for _ in range(samples)], 'Address': [S1.address() for _ in range(samples)], 'Email': [S1.email() for _ in range(samples)], 'Job_Title': [S1.job() for _ in range(samples)] } # Create a DataFrame df = pd.DataFrame(data) # Save the synthetic data to a CSV file df.to_csv('Emp_data.csv', index=False) print("Synthetic data has been generated and saved to Emp_data.csv.")
Synthetic data has been generated and saved to Emp_data.csv.
## List of functions in faker lib function_list = dir(S1)
import numpy as np import matplotlib.pyplot as plt # Generate sample image data image_data = np.random.rand(100, 100) # Generating a random 100x100 image plt.imshow(image_data, cmap='gray') plt.axis('off') # Save the image to a specified location image_path = 'sample_image.png' plt.savefig(image_path) plt.close() print("Sample image saved successfully.")
Sample image saved successfully.
import matplotlib.pyplot as plt import numpy as np # Define a function to generate images of alphabet letters def generate_alphabet_images(): fig, ax = plt.subplots(5, 6, figsize=(10, 8)) letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' idx = 0 for i in range(5): for j in range(6): if idx >= 26: break ax[i, j].imshow(np.zeros((10, 10)), cmap='gray') # Initialize with a blank image ax[i, j].axis('off') ax[i, j].text(2, 5, letters[idx], fontsize=10, color='white') idx += 1 plt.tight_layout() plt.show() # Generate and display alphabet images generate_alphabet_images()
Image in a Jupyter notebook
import matplotlib.pyplot as plt import numpy as np # Define a function to generate images of alphabet letters with a white background def generate_alphabet_images(): fig, ax = plt.subplots(5, 6, figsize=(10, 8)) letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' idx = 0 for i in range(5): for j in range(6): if idx >= 26: break ax[i, j].imshow(np.ones((10, 10)), cmap='gray') # Initialize with a white background ax[i, j].axis('off') ax[i, j].text(2, 5, letters[idx], fontsize=50, color='pink') idx += 1 plt.tight_layout() plt.show() # Generate and display alphabet images with a white background generate_alphabet_images()
Image in a Jupyter notebook
import matplotlib.pyplot as plt import numpy as np # Define a function to generate images of alphabet letters with a black background def generate_alphabet_images(): fig, ax = plt.subplots(5, 6, figsize=(20, 20)) # Increase the size of the figure to fit the larger images letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' idx = 0 for i in range(5): for j in range(6): if idx >= 26: break ax[i, j].imshow(np.zeros((28, 28)), cmap='gray') # Adjust the size of the image to match the background ax[i, j].set_facecolor('black') # Set the background color to black ax[i, j].axis('off') ax[i, j].text(7, 14, letters[idx], fontsize=130, color='blue') # Increase the font size of the text idx += 1 plt.tight_layout() plt.show() # Generate and display alphabet images with a black background and larger letters generate_alphabet_images()
Image in a Jupyter notebook
pip install wordcloud --trusted-host pypi.org --trusted-host files.pythonhosted.org google_images_download
from faker import Faker from google_images_download import google_images_download import os # Instantiate Faker fake = Faker() # Generate 5 fake animal names animal_names = [fake.word(ext_word_list=["animal"]) for _ in range(5)] # Download images of those animals response = google_images_download.googleimagesdownload() output_directory = "animal_images" if not os.path.exists(output_directory): os.makedirs(output_directory) for animal in animal_names: arguments = {"keywords": animal, "limit": 1, "format": "jpg", "output_directory": output_directory} response.download(arguments) print("Images downloaded and saved in the directory:", output_directory)
Item no.: 1 --> Item name = animal Evaluating... Starting Download... Errors: 0 Item no.: 1 --> Item name = animal Evaluating... Starting Download... Errors: 0 Item no.: 1 --> Item name = animal Evaluating... Starting Download... Errors: 0 Item no.: 1 --> Item name = animal Evaluating... Starting Download... Errors: 0 Item no.: 1 --> Item name = animal Evaluating... Starting Download... Errors: 0 Images downloaded and saved in the directory: animal_images
pip install wordcloud --trusted-host pypi.org --trusted-host files.pythonhosted.org pillow
from PIL import Image import os output_directory = "animal_images" # List files in the directory files = os.listdir(output_directory) # Display images for file in files: image_path = os.path.join(output_directory, file) image = Image.open(image_path) image.show()
from faker import Faker import pandas as pd import random # Instantiate Faker fake = Faker() # Define categories and sub-categories categories = ['Electronics', 'Clothing', 'Home', 'Sports', 'Books'] sub_categories = { 'Electronics': ['Smartphones', 'Laptops', 'Tablets', 'Headphones'], 'Clothing': ['T-Shirts', 'Jeans', 'Dresses', 'Sweaters'], 'Home': ['Furniture', 'Kitchen Appliances', 'Bedding', 'Home Decor'], 'Sports': ['Running', 'Cycling', 'Fitness', 'Team Sports'], 'Books': ['Fiction', 'Non-fiction', 'Biographies', 'Self-Help'] } # Generate shopping data data = [] for _ in range(500): category = random.choice(categories) sub_category = random.choice(sub_categories[category]) item = fake.word().capitalize() + ' ' + sub_category price = round(random.uniform(10, 1000), 2) quantity = random.randint(1, 5) data.append([category, sub_category, item, price, quantity]) # Create DataFrame columns = ['Category', 'Sub-Category', 'Item', 'Price', 'Quantity'] df = pd.DataFrame(data, columns=columns) # Save DataFrame to CSV df.to_csv('shopping_data.csv', index=False)