Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
robertopucp
GitHub Repository: robertopucp/1eco35_2022_2
Path: blob/main/Lab3/Lab3_Python.ipynb
2710 views
Kernel: Python 3 (ipykernel)
from IPython.display import display, HTML display(HTML(data=""" <style> div#notebook-container { width: 75%; } div#menubar-container { width: 80%; } div#maintoolbar-container { width: 80%; }a </style> """))
''' Importar librerias - numpy para operaciones de vectores y matrices - pandas para manipular DataFrame (base de datos) - Series para manipular columnas de DataFrame ''' import numpy as np import pandas as pd from pandas import DataFrame,Series
# importar el csv almacenada en la carpeta data # relative path ../ salir de la carpeta Lab3 # /data ingresar a la carpeta data netflix = pd.read_csv("../data/netflix_titles.csv") # pro default UTF-8 para caracteres especiales y lectura de missing netflix
netflix.head()
netflix.info() # varaible's type print( netflix.shape ) # filas y columnas
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7787 entries, 0 to 7786 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 show_id 7787 non-null object 1 type 7787 non-null object 2 title 7787 non-null object 3 director 5398 non-null object 4 cast 7069 non-null object 5 country 7280 non-null object 6 date_added 7777 non-null object 7 release_year 7787 non-null int64 8 rating 7780 non-null object 9 duration 7787 non-null object 10 listed_in 7787 non-null object 11 description 7787 non-null object dtypes: int64(1), object(11) memory usage: 730.2+ KB (7787, 12)
''' Equivalent sum de Stata, solo la variable release_year es numerica ''' netflix.describe()
## Verificando ID print( netflix.show_id.unique() ) print( len( netflix.show_id.unique() ) ) print( netflix.show_id.is_unique ) # trueo false ################################## # print( len(netflix['show_id'].unique() ) ) # netflix['show_id'].unique() # netflix['show_id'].is_unique # verificando repeticiones
['s1' 's2' 's3' ... 's7785' 's7786' 's7787'] 7787 True
netflix.release_year.is_unique
False
len( netflix.show_id.unique() )
7787
netflix.show_id
0 s1 1 s2 2 s3 3 s4 4 s5 ... 7782 s7783 7783 s7784 7784 s7785 7785 s7786 7786 s7787 Name: show_id, Length: 7787, dtype: object
netflix[['director']]

Exploring a DataFrame:

# Series en Python print(type(netflix['director'])) netflix['director'] netflix.director
<class 'pandas.core.series.Series'>
0 NaN 1 Jorge Michel Grau 2 Gilbert Chan 3 Shane Acker 4 Robert Luketic ... 7782 Josef Fares 7783 Mozez Singh 7784 NaN 7785 NaN 7786 Sam Dunn Name: director, Length: 7787, dtype: object
## revisado missing values (NaN in Python) print( netflix.director.unique() ) print( netflix.director.is_unique ) ## isnull() and isna() son similares print( netflix.director.isnull() ) print( netflix['director'].isna() )
[nan 'Jorge Michel Grau' 'Gilbert Chan' ... 'Josef Fares' 'Mozez Singh' 'Sam Dunn'] False 0 True 1 False 2 False 3 False 4 False ... 7782 False 7783 False 7784 True 7785 True 7786 False Name: director, Length: 7787, dtype: bool 0 True 1 False 2 False 3 False 4 False ... 7782 False 7783 False 7784 True 7785 True 7786 False Name: director, Length: 7787, dtype: bool
# cantidad de missing values en la variables director print( netflix['director'].isna().sum() ) # Borrar missing values netflix.dropna() # borra todas las filas o columnas con al menos un missing value netflix.dropna(axis = 0) # borras filas con missing value , axis = 0 : aplicar función por filas netflix.dropna(axis = 1) # borra columnas con al menos un missign values, axis = 1: aplicar función por columnas netflix.drop('listed_in', axis = 1) # elimiar columna listed_in netflix.drop(7786) # eliminar una fila netflix.drop(columns = "listed_in", inplace = True) # inplace = True permite alterar la base de datos sin la necesidad de asignarse nuevamente # netflix = netflix.drop(columns = "listed_in")
2389
netflix
netflix.dropna(subset = ['director'], inplace = True) # drop observaciones que presentan missing values en la columna director # Reemplazar missing values de la columna director con la palabra "Sin director" # netflix.director = netflix.director.fillna("Sin director")
netflix
# netflix['director'] = netflix.director.fillna("Sin director")
# Alrerar la misma base de datos sin la necesidad de asignar a uno nuevo netflix.country.fillna("Sin nacionalidad", inplace = True) # Qué funciones admiten Inplace = True # drop, fillna, rename
netflix.isna().sum() # netflix.isna() hallar los missing por columna, .sum() sumas la cantidad
show_id 0 type 0 title 0 director 0 cast 419 country 0 date_added 0 release_year 0 rating 4 duration 0 description 0 dtype: int64
  • Notese que la base de datos Netflix no ha sido alterada. Para que la base de datos se altere debe ocurrir dos cosas:

  1. Asignarse a una nueva base de datos

  2. Si la función lo permite, incluir el input Inplace = True

# Create new variable netflix['new_col'] = np.arange(0, netflix.shape[0]) # creat new varaible
netflix.shape[0]
5398
netflix
# DataFrame.loc[ filas, columnas] netflix.loc[210:500] # filtro por filas
netflix.loc[210:] # desde fila indexing 210 hasta el final
netflix.loc[:,['show_id','type','description']] # : todas las filas
''' Loc usa los nombres de filas y columnas para filtrar ''' # Filter rows netflix.loc[0:100] netflix.loc[210:500] netflix.loc[210:] # desde la fila posición 210 hasta al final # filter columns netflix.loc[:,['show_id','type','description']] netflix.loc[1000:2000,['show_id','type','description']]
netflix[['show_id','type','description']]
netflix
#netflix.set_index( [ 'show_id' ] , inplace = True) netflix
# Filtrando columnas por index netflix.loc[ ['s2','s100','s7787'] ]
# Reset index netflix.reset_index( inplace = True) #
netflix
''' iloc usa las posiciones de filas y columnas ''' # Filter rows netflix.iloc[0:100]
netflix.iloc[210:500]
# filter columns netflix.iloc[:,[0,1,10]] # filtrando por posiciones de columnas
netflix.iloc[1000:2000,[0,1,10]]
# Column names in a list list(netflix.columns)
['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added', 'release_year', 'rating', 'duration', 'description']

Sorting and Subsetting

# sort ascendet netflix.sort_values("release_year", inplace = True) netflix
netflix.sort_values("release_year", ascending = False, inplace = True) netflix.head(10)
netflix['number'] = np.random.randint(1, 10, netflix.shape[0])
netflix.sort_values(["release_year","number"], inplace = True) netflix.head(10)
net_two_sort = netflix.sort_values(["release_year","number"], ascending = [True,False]) net_two_sort.head(10) # obs : en 1943 ==> en ["duration"] lo ordena de forma descendente

Subsetting columns

netflix.director
0 Sin director 1 Jorge Michel Grau 2 Gilbert Chan 3 Shane Acker 4 Robert Luketic ... 7782 Josef Fares 7783 Mozez Singh 7784 Sin director 7785 Sin director 7786 Sam Dunn Name: director, Length: 7787, dtype: object
netflix[["director","cast"]]

Subsetting rows

netflix[ netflix["release_year"] < 2011 ]
netflix[(netflix["release_year"] < 2011) & (netflix["number"] > 5)] # para más de un condicional colocar los casos en paréntesis
netflix[( netflix.release_year < 2011 ) & (netflix.number > 5)]
netflix[['director']].iloc[0:10]
netflix

Subsetting based on text data

net_peru = netflix[netflix.country == "Peru"] net_peru
net_mex = netflix[netflix["country"] == "Mexico"] net_mex

Filtering Data

Checking unique values

netflix["country"].unique()
array([nan, 'United States', 'United States, United Kingdom', 'Egypt', 'Italy, France', 'Italy', 'India', 'India, Malaysia', 'United Kingdom, United States', 'United States, Italy, United Kingdom, Liechtenstein', 'Poland', 'Italy, United States', 'United States, East Germany, West Germany', 'United Kingdom, West Germany', 'Hong Kong, United States', 'Hong Kong', 'United Kingdom', 'France', 'Lebanon', 'Poland,', 'West Germany', 'Egypt, Algeria', 'Japan', 'Mexico', 'Lebanon, Canada, France', 'Australia, United States', 'Soviet Union, India', 'Kuwait', 'Australia', 'France, Lebanon', 'India, Soviet Union', 'Poland, West Germany', 'Argentina', 'United States, Japan', 'Italy, South Africa, West Germany, Australia, United States', 'Hong Kong, China', 'Denmark, France, Poland', 'United States, Canada', 'Ireland, United Kingdom, United States', 'France, Egypt', 'United States, Mexico', 'France, Belgium, Italy', 'United States, Hong Kong', 'Australia, United Kingdom', 'United Kingdom, Japan, United States', 'United States, France', 'France, United Kingdom, India', 'United States, France, Mexico', 'Argentina, Spain', 'Egypt, France', 'United States, France, Japan', 'United States, Germany', 'Canada, United States', 'United States, Germany, Mexico', 'Canada', 'France, Norway, Lebanon, Belgium', 'United States, Germany, Canada', 'Germany, United States', 'Thailand', 'Germany, United States, United Kingdom, Canada', 'United Kingdom, France', 'France, Japan, United States', 'Taiwan, Hong Kong, United States, China', 'Germany, Canada, United States', 'United States, United Kingdom, Italy', 'Turkey', 'Indonesia', 'United Kingdom, United States, France, Germany', 'United States, United Arab Emirates', 'France, United States', 'United States, Netherlands, Japan, France', 'Taiwan', 'United States, Italy', 'United Kingdom, Germany, Canada, United States', 'South Africa, United States', 'Brazil, France, Germany', 'New Zealand, United States', 'United States, New Zealand', 'United States, Germany, United Kingdom', 'United States, Australia', 'Malaysia', 'United States, France, Canada, Spain', 'Colombia, United States', 'India, France', 'Lebanon, France', 'South Korea', 'United States, Czech Republic', 'United States, Canada, Germany', 'Uruguay, Argentina, Germany, Spain', 'Hong Kong, China, United States', 'Germany, United States, Canada', 'United States, United Kingdom, Germany', 'Sweden, Czech Republic, United Kingdom, Denmark, Netherlands', 'United Kingdom, Czech Republic, Germany, United States', 'China, Hong Kong', 'United States, Germany, Australia', 'United States, France, United Kingdom, Japan', 'United Kingdom, United States, Japan', 'Romania', 'United States, Czech Republic, United Kingdom', 'Netherlands', 'United Kingdom, Czech Republic, United States, Germany, Bahamas', 'United Kingdom, Thailand', 'United States, Malta, France, United Kingdom', 'Saudi Arabia, United Arab Emirates', 'Mexico, Spain', 'France, United States, Mexico', 'Germany', 'Philippines', 'Denmark', 'France, Lebanon, United Kingdom', 'United Kingdom, Ireland', 'France, Belgium', 'United States, United Kingdom, Canada', 'United Kingdom, India', 'United States, New Zealand, Japan', 'Hong Kong, Taiwan', 'Venezuela, Colombia', 'New Zealand, United Kingdom, Australia', 'Norway', 'United States, South Africa', 'United States, South Korea', 'Spain', 'Colombia', 'United States, China', 'China', 'Colombia, Mexico, United States', 'India, United States', 'Saudi Arabia', 'Germany, United Kingdom, United States', 'United Kingdom, Germany, United States', 'United States, United Kingdom, France', 'Czech Republic, United Kingdom, France', 'France, Switzerland, Spain, United States, United Arab Emirates', 'Singapore', 'Nigeria', 'United States, United Kingdom, Australia', 'South Africa, United States, New Zealand, Canada', 'United States, Germany, United Kingdom, Italy', 'Spain, United Kingdom, United States', 'United States, Germany, United Kingdom, Australia', 'United States, Canada, France', 'United Kingdom, Canada, France, United States', 'United Kingdom, Canada', 'Germany, United States, France', 'South Korea, United States', 'United States, Ireland', 'Australia, France, Ireland', 'France, United Kingdom, United States', 'Canada, United States, India, United Kingdom', 'Mexico, United States', 'Hong Kong, China, Singapore', 'South Africa', 'India, Germany, France', 'Romania, France, Switzerland, Germany', 'United States, Canada, United Kingdom', 'Germany, France, United States, Canada, United Kingdom', 'India, Australia', 'United States, France, Italy, United Kingdom', 'United States, United Kingdom, Canada, Japan', 'United States, Australia, Mexico', 'India, Pakistan', 'India, Japan', 'United Kingdom, United States, Australia', 'Spain, Mexico, France', 'Germany, Sri Lanka', 'United Kingdom, France, United States', 'Ireland, Canada, United States, United Kingdom', 'Japan, United States', 'Canada, Nigeria', 'Turkey, United States', 'United Kingdom, Australia, Canada, United States', 'United Kingdom, United States, Spain, Germany, Greece, Canada', 'United Kingdom, Nigeria', 'Italy, Canada, France', 'Iran, France', 'United States, Sweden, Norway', 'United Kingdom, Germany, France, United States', 'Pakistan', 'United States, India', 'United States, New Zealand, United Kingdom', 'France, Belgium, United States', 'India, Germany', 'United States, Belgium, Canada', 'Canada, United States, United Kingdom, France, Luxembourg', 'Ireland, United States', 'United Kingdom, United States, France', 'Brazil, United States', 'Brazil', 'United States, India, Bangladesh', 'United Kingdom, Germany', 'United States, Chile', 'United States, Russia', 'Israel', 'Lebanon, United States, United Arab Emirates', 'Spain, Thailand, United States', 'United Kingdom, Denmark, Canada, Croatia', 'Philippines, Canada, United Kingdom, United States', 'Sweden, United Kingdom, Finland', 'Germany, United States, Hong Kong, Singapore', 'United Kingdom, Norway, Denmark, Germany, Sweden', 'Saudi Arabia, Netherlands, Germany, Jordan, United Arab Emirates, United States', 'Nigeria, United Kingdom', 'Canada, France, United States', 'United States, Canada, Belgium, United Kingdom', 'United States, Brazil', 'Indonesia, Singapore', 'Chile', 'Israel, Germany, Poland, Luxembourg, Belgium, France, United States', 'South Korea, Czech Republic', 'Switzerland, United Kingdom, United States', 'Denmark, Germany, Belgium, United Kingdom, France, Sweden', 'Russia', 'Denmark, Germany, Belgium, United Kingdom, France', 'United States, Spain', 'United States, France, Canada, Belgium', 'Turkey, Azerbaijan', 'United States, Hungary, Ireland, Canada', 'France, Belgium, Spain', 'United States, Israel, Italy, South Africa', 'United Kingdom, India, United States', 'Canada, Spain, France', 'United States, United Kingdom, France, Germany, Japan', 'United Kingdom, Egypt, United States', 'Denmark, France, United States, Sweden', 'Australia, France', 'United States, China, Hong Kong', 'India, Switzerland', 'India, Canada', 'Canada, India', 'United Kingdom,', 'France, Germany, Switzerland', 'Pakistan, Norway, United States', 'United States, France, Canada, Lebanon, Qatar', 'Denmark, Brazil, France, Portugal, Sweden', 'Canada, France, Italy, Morocco, United States', 'Denmark, Zimbabwe', 'United Kingdom, China, United States', 'Canada, Luxembourg', 'Germany, United Kingdom', 'Canada, United Kingdom, United States', 'Canada, Germany, France, United States', 'Denmark, Indonesia, Finland, Norway, United Kingdom, Israel, France, United States, Germany, Netherlands', 'United Arab Emirates', 'United States, United Kingdom, Canada, China', 'Bulgaria, United States, Spain, Canada', 'United Arab Emirates, United States', 'Norway, United States', 'United States, France, Serbia', 'Vietnam', 'United Arab Emirates, Jordan, Lebanon', 'United Kingdom, France, Canada, Belgium, United States', 'United States, Greece, United Kingdom', 'United Kingdom, Brazil, Germany', 'United States, Bermuda, Ecuador', 'Norway, United Kingdom, France, Ireland', 'Australia, United Kingdom, United States, New Zealand, Italy, France', 'Lebanon, United Arab Emirates, France, Switzerland, Germany', 'United Kingdom, United States, Morocco', 'Thailand, Canada, United States', 'Australia, United Kingdom, Canada', 'Kenya, United States', 'Germany, United States, Italy', 'Denmark, United Kingdom, South Africa, Sweden, Belgium', 'Austria, Czech Republic', 'United States, United Kingdom, Denmark, Sweden', 'Russia, United States', 'Norway, Denmark, Sweden', 'Canada, South Korea, United States', 'United States, Japan, Canada', 'United States, India, United Arab Emirates', 'United Kingdom, Canada, United States', 'France, Canada, Belgium', 'South Korea, China, United States', 'Canada, United Kingdom, Netherlands', 'United States, Senegal', 'China, Germany, India, United States', 'France, Canada, Italy, United States, China', 'Australia, Iraq', 'France, Morocco', 'Ireland, United Kingdom, Greece, France, Netherlands', 'United States, Canada, Japan, Panama', 'Australia, United Kingdom, United Arab Emirates, Canada', 'Malaysia, Singapore, Hong Kong', 'United Arab Emirates, Jordan', 'Japan, Canada', 'Switzerland', 'Lebanon, Qatar', 'Mexico, France', 'Germany, Australia, France, China', 'United States, Morocco', 'Peru', 'Romania, Bulgaria, Hungary', 'United Kingdom, Lithuania', 'Croatia, Slovenia, Serbia, Montenegro', 'France, Germany', 'Canada, Mexico, Germany, South Africa', 'United States, United Kingdom, Morocco', 'United States, Spain, Germany', 'Canada, Australia', 'Argentina, Chile, Peru', 'Israel, Sweden, Germany, Netherlands', 'United Kingdom, South Korea', 'Netherlands, Belgium, United Kingdom, United States', 'New Zealand', 'Denmark, United Kingdom, Sweden', 'United States, Nicaragua', 'United States, Indonesia', 'Belgium', 'United Kingdom, United States, Germany, Denmark, Belgium, Japan', 'United States, United Kingdom, Spain, South Korea', 'United States, Ireland, United Kingdom, India', 'Chile, Spain, Argentina, Germany', 'United States, Sweden', 'Germany, France, Luxembourg, United Kingdom, United States', 'Russia, Spain', 'United States, Uruguay', 'United Kingdom, Ukraine, United States', 'United States, Argentina', 'United Kingdom, Russia, United States', 'Pakistan, United States', 'France, Malta, United States', 'Canada, United States, United Kingdom', 'Ireland', 'Ukraine', 'Ireland, Canada, United Kingdom, United States', 'United States, Chile, Israel', 'Russia, United States, China', 'Israel, United States', 'United Kingdom, New Zealand', 'Spain, France, Uruguay', 'France, United States, Canada', 'China, South Korea, United States', 'Iceland', 'Canada, United States, France', 'Netherlands, Denmark, South Africa', 'Netherlands, Belgium', 'Czech Republic, France', 'Ireland, United Kingdom', 'United States, Greece', 'Germany, Jordan, Netherlands', 'France, Canada, China, Cambodia', 'Canada, United Kingdom', 'United Kingdom, Germany, Canada', 'France, South Korea, Japan', 'South Africa, United States, Germany', 'Canada, India, Thailand, United States, United Arab Emirates', 'Netherlands, Belgium, Germany, Jordan', 'Denmark, China', 'United States, Canada, China', 'United States, Canada, Ireland', 'India, United Kingdom', 'United States, Venezuela', 'Chile, Argentina, France, Spain, United States', 'United Kingdom, South Africa', 'Belgium, France', 'United Kingdom, France, Germany, United States', 'Serbia, South Korea, Slovenia', 'Italy, Germany', 'United Kingdom, Spain, Belgium', 'Canada, Spain', 'United States, Spain, Italy', 'Chile, United States, France', 'Austria', 'Australia, India', 'France, Qatar', 'Peru, United States, United Kingdom', 'Sweden', 'Chile, France', 'Philippines, Qatar', 'Finland, Sweden, Norway, Latvia, Germany', 'United States, Greece, Brazil', 'Hungary', 'China, United States', 'France, Germany, Czech Republic, Belgium', 'United States, Cambodia', 'Netherlands, Denmark, France, Germany', 'Norway, Germany', 'United States, Kazakhstan', 'Czech Republic, Slovakia', 'Brazil, Netherlands, United States, Colombia, Austria, Germany', 'United States, United Kingdom, Japan', 'Sweden, United States', 'United Kingdom, France, United States, Belgium, Luxembourg, China, Germany', 'United Arab Emirates, Romania', 'South Korea, France', 'Portugal, France, Poland, United States', 'Belarus', 'Spain, United States', 'Spain, Italy, Argentina', 'Spain, Argentina', 'Turkey, India', 'United Kingdom, Germany, United Arab Emirates, New Zealand', 'United States, South Korea, China', 'United Kingdom, Spain, United States', 'United Kingdom, France, Germany', 'China, Hong Kong, United States', 'Spain, Mexico', 'Turkey, France, Germany, Poland', 'South Korea, China', 'United Kingdom, Israel, Russia', 'Spain, Switzerland', 'United Kingdom, Hong Kong', 'Slovenia, Croatia, Germany, Czech Republic, Qatar', 'Denmark, United States', 'United Kingdom, France, Belgium', 'United Kingdom, Poland, United States', 'Ireland, United Kingdom, Italy, United States', 'France, Canada', 'United Kingdom, Canada, Italy', 'United Kingdom, Italy, Israel, Peru, United States', 'Denmark, Spain', 'France, Canada, United States', 'Bulgaria, United States', 'United Kingdom, Russia', 'United States, Hungary', 'United States, Bulgaria', 'Canada, Ireland, United States', 'Israel, Germany, France', 'United Kingdom, Jordan, Qatar, Iran', 'South Korea, Canada, United States, China', 'Singapore, France', 'United States, France, South Korea, Indonesia', 'Switzerland, France', 'Italy, India', 'United States, Botswana', 'Spain, Cuba', 'Portugal, Spain', 'Belgium, Ireland, Netherlands, Germany, Afghanistan', 'United Kingdom, Finland, Germany', 'Canada, France', 'Ireland, South Africa', 'United States, Iceland', 'Denmark, Sweden, Israel, United States', 'Australia, Canada', 'Australia, Armenia, Japan, Jordan, Mexico, Mongolia, New Zealand, Philippines, South Africa, Sweden, United States, Uruguay', 'Australia, United Arab Emirates', 'Brazil, India, China, United States', 'Serbia, United States', 'Germany, Australia', 'United States, Australia, Samoa, United Kingdom', 'Italy, United States, Argentina', 'Zimbabwe', 'United Kingdom, Canada, Japan', 'India, Mexico', 'Germany, France, Russia', 'Netherlands, Germany, Denmark, United Kingdom', 'Ireland, Canada, Luxembourg, United States, United Kingdom, Philippines, India', 'Brazil, France', 'France, Belgium, China, United States', 'Cambodia, United States', 'China, Canada, United States', 'United Kingdom, France, Belgium, Canada, United States', 'Saudi Arabia, Syria, Egypt, Lebanon, Kuwait', 'Greece, United States', 'Austria, United States', 'Guatemala', 'Sweden, Netherlands', 'Israel, Germany', 'South Africa, Nigeria', 'Argentina, Brazil, France, Poland, Germany, Denmark', 'Cyprus', 'Canada, Germany, South Africa', 'United States, Canada, Indonesia, United Kingdom, China, Singapore', 'Somalia, Kenya, Sudan, South Africa, United States', 'United Kingdom, Namibia, South Africa, Zimbabwe, United States', 'Argentina, Uruguay, Spain, France', 'United Kingdom, France, Belgium, United States', 'Italy, France, Switzerland', 'United States, Denmark', 'Ghana', 'Singapore, United States', 'Australia, New Zealand, United States', 'South Africa, Germany, Netherlands, France', 'Argentina, France', 'Germany, Belgium', 'United States, South Korea, Japan', 'Venezuela', 'United States, Colombia, Mexico', 'Indonesia, Netherlands', 'Cambodia', 'United Kingdom, India, Sweden', 'Spain, Belgium, Switzerland, United States, China, United Kingdom', 'Czech Republic, United States', 'United Arab Emirates, United States, United Kingdom', 'Germany, Italy', 'Chile, Peru', 'Norway, Germany, Sweden', 'Uruguay', 'New Zealand, United Kingdom', 'United Kingdom, United States, Canada', 'Spain, United Kingdom', 'Spain, Canada, United States', 'United Kingdom, France, United States, Belgium', 'Spain, Germany', 'China, India, Nepal', 'Argentina, Italy', 'Switzerland, France, Belgium, United States', 'United Kingdom, United States, Dominican Republic', 'France, Japan', 'Peru, Germany, Norway', 'United Kingdom, China, United States, India', 'Spain, Colombia', 'Canada, Japan, Netherlands', 'Thailand, United States', 'United States, Australia, China', 'Netherlands, Germany, Italy, Canada', 'Argentina, Chile', 'United Kingdom, Singapore', 'Argentina, France, United States, Germany, Qatar', 'Netherlands, United States', 'France, Netherlands, South Africa, Finland', 'South Africa, China, United States', 'Italy, Turkey', 'United States, Australia, South Africa, United Kingdom', 'Georgia, Germany, France', 'Switzerland, United States', 'China, Japan', 'Spain, France', 'Jamaica, United States', 'Indonesia, United States', 'Japan, Canada, United States', 'India, Germany, Austria', 'Spain, Portugal', 'Ireland, United States, France', 'Germany, United States, Sweden', 'United States, China, Colombia', 'Taiwan, China, France, United States', 'Uruguay, Spain, Mexico', 'United States, Colombia', 'United Arab Emirates, United Kingdom, India', 'United Kingdom, Pakistan', 'Pakistan, United Arab Emirates', 'Ireland, Canada', 'Spain, Belgium', 'Austria, Iraq, United States', 'Canada, United States, Germany', 'Bangladesh', 'Paraguay, Argentina', 'Ghana, United States', 'Mexico, Netherlands', 'United States, China, Canada', 'Colombia, Mexico', 'United Kingdom, Belgium', 'United States, Brazil, South Korea, Mexico, Japan, Germany', 'France, China, Japan, United States', 'Georgia', 'China, United States, United Kingdom', 'Italy, Switzerland, Albania, Poland', 'United Kingdom, Ireland, United States', 'Poland, United States', 'France, Belgium, Luxembourg, Romania, Canada, United States', 'United States, Israel, United Kingdom, Canada', 'United States, Mexico, Spain, Malta', 'Croatia', 'Canada, South Africa', 'Puerto Rico, United States, Colombia', 'Hong Kong, Iceland, United States', 'United Kingdom, Spain, United States, Germany', 'South Korea, Japan', 'China, United States, Australia', 'Belgium, United Kingdom, United States', 'Indonesia, South Korea, Singapore', 'France, Luxembourg, United States', 'India, Nepal', 'France, United Kingdom', 'Mexico, Argentina', 'Romania, United Kingdom', 'United Kingdom, Kenya', 'Singapore, Japan, France', 'France, Belgium, Luxembourg, Cambodia,', 'Uruguay, Argentina, Spain', 'China, United Kingdom', 'Canada, Belgium', 'China, Morocco, Hong Kong', 'France, Iran, United States', 'Ireland, Luxembourg, Belgium', 'United Kingdom, Canada, United States, Germany', 'United States, Nigeria', 'Argentina, Uruguay, Serbia', 'Russia, Poland, Serbia', 'Spain, Italy', 'United States,', 'United States, Taiwan', 'Finland', 'Kenya', 'Bulgaria', 'United Kingdom, United States, Czech Republic', 'Egypt, Austria, United States', 'United Kingdom, South Africa, Australia, United States', 'Turkey, South Korea', 'Norway, Iceland, United States', 'Chile, Argentina', 'United Kingdom, Poland', 'United Kingdom, Canada, United States, Cayman Islands', 'United States, Brazil, India, Uganda, China', 'Italy, Switzerland, France, Germany', 'Colombia, Peru, United Kingdom', 'Canada, Japan, United States', 'Switzerland, Vatican City, Italy, Germany, France', 'Iceland, Sweden, Belgium', 'Norway, Denmark, Netherlands, Sweden', 'India, Iran', 'United States, Poland', 'United Kingdom, Belgium, Sweden', 'Uruguay, Guatemala', 'Philippines, Singapore', 'Finland, Germany, Belgium', 'India, United Kingdom, China, Canada, Japan, South Korea, United States', 'France, Netherlands, Singapore', 'Norway, Sweden', 'France, Senegal, Belgium', 'Ireland, France, Iceland, United States, Mexico, Belgium, United Kingdom, Hong Kong', 'Canada, Norway', 'Italy, Belgium', 'Italy, United Kingdom, France', 'India, Turkey', 'Romania, United States', 'Philippines, United States', 'United States, Norway, Canada', 'United Kingdom, Italy', 'Senegal', 'United States, India, South Korea, China', 'Spain, France, Canada, United States', 'United States, Mexico, Colombia', 'Uruguay, Argentina', 'Finland, United States', 'United Kingdom, Malawi', 'Uruguay, Germany', 'France, New Zealand', 'Denmark, France, Belgium, Italy, Netherlands, United States, United Kingdom', 'India, United Kingdom, Canada, United States', 'Mexico, United States, Spain, Colombia', 'Spain, France, Italy', 'China, Taiwan', 'Mexico, Finland', 'Argentina, United States', 'Singapore, Malaysia', 'Spain, France, Canada', 'Canada, Brazil', 'Canada, Germany', 'Thailand, China, United States', 'United Kingdom, Hungary, Australia', 'Denmark, Singapore, Canada, United States', 'Finland, France', 'Spain, France, United States', 'Jordan', 'Mauritius, South Africa', 'Namibia', 'France, Luxembourg, Canada', 'United States, Thailand', 'United States, United Kingdom, India', 'United States, Philippines', 'United Kingdom, China', 'United Kingdom, Australia', 'Canada, Hungary, United States', 'Taiwan, China', 'South Africa, Angola', 'Argentina, United States, Mexico', 'Finland, Germany', 'Germany, Czech Republic', 'Taiwan, Malaysia', 'Chile, Italy', 'France, Brazil, Spain, Belgium', 'France, Australia, Germany', 'Australia, New Zealand', 'United Kingdom, France, Belgium, United States, China', 'France, Algeria', 'United Kingdom, France, Germany, Spain', 'Brazil, United Kingdom', 'Austria, Germany', 'China, Spain, South Korea, United States', 'Belgium, Netherlands', 'Canada, United States, Ireland', 'Spain, Germany, Denmark, United States', 'United Kingdom, Germany, United States, France', 'Indonesia, United Kingdom', 'United Kingdom, Spain', 'Germany, China, United Kingdom', 'Norway, Denmark', 'United States, France, Canada', 'Canada, United States, Cayman Islands'], dtype=object)
netflix.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 7787 entries, 0 to 7786 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 show_id 7787 non-null object 1 type 7787 non-null object 2 title 7787 non-null object 3 director 7787 non-null object 4 cast 7069 non-null object 5 country 7280 non-null object 6 date_added 7777 non-null object 7 release_year 7787 non-null int64 8 rating 7780 non-null object 9 duration 7787 non-null object 10 listed_in 7787 non-null object 11 description 7787 non-null object 12 new_col 7787 non-null int32 13 number 7787 non-null int32 dtypes: int32(2), int64(1), object(11) memory usage: 791.0+ KB
netflix.head(3)

Example

netflix[netflix.type == "TV Show"]
len( netflix.loc[netflix["type"] == "TV Show"] )
2410
netflix[(netflix["type"] == "TV Show") & (netflix["country"] == "Peru")]
# No hay series peruanas en Netflix len(netflix[(netflix["country"] == "Brazil") | (netflix["country"] == "Peru")])
76
netflix[(netflix["type"] == "TV Show") | (netflix["country"] == "Peru")]
# peliculas americanas netflix.loc[(netflix["type"] == "Movie") & (netflix["country"] == "United States") & (netflix["release_year"] > 2019)].head(5)
# Brazil in 2020 net_B_2020 = netflix[(netflix["country"] == "Brazil") & (netflix["release_year"] == 2020)] net_B_2020.head(3)

.isin()

If you want to filter by multiple values of a categorical variable, the easiest way is to use the isin() method.

# Peliculas de Perú y Chile net_per_ch = netflix[netflix["country"].isin(["Peru","Chile"])] # filter by variables's values net_per_ch
# Peliculas diferentes de Perú y Chile netflix[~ netflix["country"].isin(["Peru","Chile"])] # ~ negación en Python ALT + 126

Alternative methods to filter

movie = netflix["type"] == "Movie" m_usa = netflix["country"] == "United States" m_actual = netflix["release_year"] > 2019 movie_usa = netflix[movie & m_usa & m_actual] movie_usa
movie_usa = netflix.loc[ ( netflix[ "type" ] == "Movie" ) & ( netflix[ "country" ] == "United States") & ( netflix[ "release_year" ] > 2019 ) ]
movie_usa
movie_usa.drop(['show_id', 'director'], axis=1, inplace = True ) # axis = 1 , drop por columna
C:\Users\Roberto\AppData\Local\Temp\ipykernel_18864\3584590493.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy movie_usa.drop(['show_id', 'director'], axis=1, inplace = True ) # axis = 1 , drop por columna
movie_usa

Axis:

  • drop, concat: axis = 0 filas, axis = 1 columnas

  • En funciones aplciado a columnas (mean, std, apply:lambda) DataFrame o matrices Axis = 1 (filas), mientras axos = 0 (columnas)

movie_usa[movie_usa.release_year != 2020]
#Rename variables movie_usa.rename(columns = {'title':'Titulo_movie', 'duration':'Duration_movie'}, inplace = True)
C:\Users\Roberto\AppData\Local\Temp\ipykernel_18864\1458485735.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy movie_usa.rename(columns = {'title':'Titulo_movie', 'duration':'Duration_movie'}, inplace = True)
movie_usa

Export data

movie_usa.to_csv("../data/movie_usa_1.csv")
movie_usa.to_excel("../data/movie_usa_1.xlsx")