Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
suyashi29
GitHub Repository: suyashi29/python-su
Path: blob/master/Data Science Essentials for Data Analysts/EDA Report Technology Trends .ipynb
3074 views
Kernel: Python 3 (ipykernel)
import pandas as pd import numpy as np # Generate data np.random.seed(0) years = np.random.randint(2010, 2023, 3000) technologies = ['Python', 'JavaScript', 'Java', 'C#', 'C++', 'Ruby', 'Swift', 'Go', 'Rust', 'Kotlin'] categories = ['Programming Language', 'Framework', 'Tool', 'Library', 'Platform', 'Database', 'Language', 'IDE', 'Framework', 'Tool'] popularity_index = np.random.uniform(0, 100, 3000) growth_rate = np.random.uniform(-10, 10, 3000) usage = np.random.randint(1000, 1000000, 3000) market_share = np.random.uniform(0, 50, 3000) regions = ['North America', 'Europe', 'Asia', 'South America', 'Africa'] industries = ['IT', 'Finance', 'Healthcare', 'Manufacturing', 'Education'] comments = ['None'] * 3000 # Create DataFrame df = pd.DataFrame({ 'Year': years, 'Technology': np.random.choice(technologies, 3000), 'Category': np.random.choice(categories, 3000), 'Popularity_Index': popularity_index, 'Growth_Rate': growth_rate, 'Usage': usage, 'Market_Share': market_share, 'Region': np.random.choice(regions, 3000), 'Industry': np.random.choice(industries, 3000), 'Comments': comments }) # Save DataFrame to CSV df.to_csv('technology_trends.csv', index=False)
df = pd.read_csv('technology_trends.csv') df
import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns # Load the dataset df = pd.read_csv('technology_trends.csv') # Display basic information about the dataset print("Dataset Information:") print(df.info()) # Summary statistics print("\nSummary Statistics:") print(df.describe()) # Check for missing values print("\nMissing Values:") print(df.isnull().sum()) # Visualization 1: Popularity Index Distribution (Histogram) plt.figure(figsize=(10, 6)) sns.histplot(df['Popularity_Index'], bins=20, kde=True, color='skyblue') plt.title('Distribution of Popularity Index') plt.xlabel('Popularity Index') plt.ylabel('Frequency') plt.show() # Visualization 2: Growth Rate by Category (Box Plot) plt.figure(figsize=(10, 6)) sns.boxplot(x='Category', y='Growth_Rate', data=df, palette='Set2') plt.title('Growth Rate by Category') plt.xlabel('Category') plt.ylabel('Growth Rate') plt.xticks(rotation=45) plt.show() # Visualization 3: Market Share by Region (Bar Plot) plt.figure(figsize=(10, 6)) sns.barplot(x='Region', y='Market_Share', data=df, estimator=np.mean, ci=None, palette='viridis') plt.title('Average Market Share by Region') plt.xlabel('Region') plt.ylabel('Average Market Share') plt.xticks(rotation=45) plt.show() # Visualization 5: Popularity Index vs. Growth Rate (Scatter Plot) plt.figure(figsize=(10, 6)) sns.scatterplot(x='Popularity_Index', y='Growth_Rate', data=df, hue='Category', palette='Dark2') plt.title('Popularity Index vs. Growth Rate') plt.xlabel('Popularity Index') plt.ylabel('Growth Rate') plt.legend(title='Category') plt.show() # Visualization 6: Market Share by Technology (Pie Chart) plt.figure(figsize=(10, 6)) market_share_by_tech = df.groupby('Technology')['Market_Share'].sum().sort_values(ascending=False) plt.pie(market_share_by_tech, labels=market_share_by_tech.index, autopct='%1.1f%%', startangle=140) plt.title('Market Share by Technology') plt.axis('equal') plt.show() # Visualization 7: Usage Distribution by Year (Line Plot) plt.figure(figsize=(10, 6)) sns.lineplot(x='Year', y='Usage', data=df, estimator=np.mean, ci=None, marker='o') plt.title('Usage Distribution by Year') plt.xlabel('Year') plt.ylabel('Average Usage') plt.xticks(rotation=45) plt.show() # Visualization 8: Popularity Index Distribution by Category (Histograms) plt.figure(figsize=(12, 8)) sns.histplot(data=df, x='Popularity_Index', hue='Category', bins=20, multiple='stack', palette='Set2') plt.title('Popularity Index Distribution by Category') plt.xlabel('Popularity Index') plt.ylabel('Frequency') plt.show() # Visualization 9: Market Share by Region and Industry (Heatmap) plt.figure(figsize=(10, 6)) market_share_pivot = df.pivot_table(index='Region', columns='Industry', values='Market_Share', aggfunc=np.mean) sns.heatmap(market_share_pivot, cmap='coolwarm', annot=True, fmt=".1f") plt.title('Market Share by Region and Industry') plt.xlabel('Industry') plt.ylabel('Region') plt.show() # Visualization 10: Growth Rate Trend by Technology (Area Plot) plt.figure(figsize=(10, 6)) growth_rate_trend = df.groupby('Year')['Growth_Rate'].mean() sns.lineplot(x=growth_rate_trend.index, y=growth_rate_trend.values, marker='o', color='orange') plt.title('Average Growth Rate Trend') plt.xlabel('Year') plt.ylabel('Average Growth Rate') plt.xticks(rotation=45) plt.grid(True) plt.show()
Dataset Information: <class 'pandas.core.frame.DataFrame'> RangeIndex: 3000 entries, 0 to 2999 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Year 3000 non-null int64 1 Technology 3000 non-null object 2 Category 3000 non-null object 3 Popularity_Index 3000 non-null float64 4 Growth_Rate 3000 non-null float64 5 Usage 3000 non-null int64 6 Market_Share 3000 non-null float64 7 Region 3000 non-null object 8 Industry 3000 non-null object 9 Comments 3000 non-null object dtypes: float64(3), int64(2), object(5) memory usage: 234.5+ KB None Summary Statistics: Year Popularity_Index Growth_Rate Usage Market_Share count 3000.000000 3000.000000 3000.000000 3000.000000 3000.000000 mean 2015.993333 50.300724 0.130376 495705.509667 25.078948 std 3.748774 28.952742 5.727954 285066.826091 14.522436 min 2010.000000 0.018671 -9.999599 1597.000000 0.001629 25% 2013.000000 25.270949 -4.734733 255163.750000 12.507861 50% 2016.000000 50.648571 0.174553 494828.000000 24.749080 75% 2019.000000 75.997921 5.119626 741175.000000 37.873498 max 2022.000000 99.989729 9.996769 999955.000000 49.998547 Missing Values: Year 0 Technology 0 Category 0 Popularity_Index 0 Growth_Rate 0 Usage 0 Market_Share 0 Region 0 Industry 0 Comments 0 dtype: int64
Image in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebookImage in a Jupyter notebook