Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
suyashi29
GitHub Repository: suyashi29/python-su
Path: blob/master/data/Descriptive Stats.ipynb
3074 views
Kernel: Python 3
import numpy as np A=[1,2,3,4,5,6] ##- List A+A D = np.array(A) D
D.sum()
D.mean()
np.std(D)
np.median(D)
np.var(D)
np.std(D[0:3]) # sd for 2 at 2.
a=[[1,2,13,0],[4,5,6,78],[-8,8,-6,9]] z=np.array(a) z
z[1]
z[1,1]
z[:,2]
# Sum of elements from 3rd row # mean of elements of 4th column # std deviatian for array # median across 2th row # mean across 1 and 3 row
(D[0].mean(),D[2].mean())
3*7

Analyzing Car sales Data

import numpy as np # Implemennts milti-dimensional array and matrices import pandas as pd # For data manipulation and analysis #import pandas_profiling import matplotlib.pyplot as plt # Plotting library for Python programming language and it's numerical mathematics extension NumPy import seaborn as sns # Provides a high level interface for drawing attractive and informative statistical graphics %matplotlib inline sns.set() from subprocess import check_output

Importing Data

s=[1,2,3] pd.Series(s)
pd.DataFrame(s)
Car= pd.read_excel("Car_Sales.xlsx")# data set is located in the same folder #Car=pd.ExcelFile(r"C:\Users\suyashi144893\Documents\data Sets\Car_Sales.xlsx").parse("Sheet1")
Car.shape
Car.head()
Car.tail(2)
Car.describe()
Car.describe(include="all")
Car.isnull().sum()
a=Car["engV"].mean() a
b=Car["engV"].median() b
Car["engV"]= Car["engV"].fillna(b)
Car=Car.dropna()
Car.isnull().sum()
Car.duplicated().sum()
Car.iloc[:,1]
Car.loc[Car.duplicated(keep=False),:]
Car.duplicated().sum()
Car.drop_duplicates(keep='first').shape

Visulaizations

Car["mileage"].plot.hist(bins=10)
sns.countplot(x="body",data=Car).set_title("Count plot for Car variants")
Car.body.count()
pd.crosstab(index=Car["body"],columns="count")

Price distribution between registered and non-registered cars

sns.boxplot(x='registration',y='price',data=Car)

Establish coorelation between all the features using heatmap.

corr = Car.corr() plt.figure(figsize=(10,10)) sns.heatmap(corr,vmax=.8,linewidth=.01, square = True, annot = True,cmap='YlGnBu',linecolor ='black') plt.title('Correlation between features')
Car.groupby(['engType', 'drive']).count()['body']

Engtype VS Drive

Car[Car['engType'] == 'Petrol'].drive.groupby(Car.drive).count().plot(kind='pie', figsize=(6, 6),autopct='%1.2f%%') plt.axis('equal')
sns.barplot(x='registration', y='price', data=Car)
sns.swarmplot(x='registration', y='price', data=Car)