Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wesm
GitHub Repository: wesm/pydata-book
Path: blob/3rd-edition/appa.ipynb
1797 views
Kernel: Python 3
import numpy as np import pandas as pd import matplotlib.pyplot as plt plt.rc('figure', figsize=(10, 6)) PREVIOUS_MAX_ROWS = pd.options.display.max_rows pd.options.display.max_columns = 20 pd.options.display.max_rows = 20 pd.options.display.max_colwidth = 80 np.set_printoptions(precision=4, suppress=True)
rng = np.random.default_rng(seed=12345)
np.ones((10, 5)).shape
np.ones((3, 4, 5), dtype=np.float64).strides
ints = np.ones(10, dtype=np.uint16) floats = np.ones(10, dtype=np.float32) np.issubdtype(ints.dtype, np.integer) np.issubdtype(floats.dtype, np.floating)
np.float64.mro()
np.issubdtype(ints.dtype, np.number)
arr = np.arange(8) arr arr.reshape((4, 2))
arr.reshape((4, 2)).reshape((2, 4))
arr = np.arange(15) arr.reshape((5, -1))
other_arr = np.ones((3, 5)) other_arr.shape arr.reshape(other_arr.shape)
arr = np.arange(15).reshape((5, 3)) arr arr.ravel()
arr.flatten()
arr = np.arange(12).reshape((3, 4)) arr arr.ravel() arr.ravel('F')
arr1 = np.array([[1, 2, 3], [4, 5, 6]]) arr2 = np.array([[7, 8, 9], [10, 11, 12]]) np.concatenate([arr1, arr2], axis=0) np.concatenate([arr1, arr2], axis=1)
np.vstack((arr1, arr2)) np.hstack((arr1, arr2))
arr = rng.standard_normal((5, 2)) arr first, second, third = np.split(arr, [1, 3]) first second third
arr = np.arange(6) arr1 = arr.reshape((3, 2)) arr2 = rng.standard_normal((3, 2)) np.r_[arr1, arr2] np.c_[np.r_[arr1, arr2], arr]
np.c_[1:6, -10:-5]
arr = np.arange(3) arr arr.repeat(3)
arr.repeat([2, 3, 4])
arr = rng.standard_normal((2, 2)) arr arr.repeat(2, axis=0)
arr.repeat([2, 3], axis=0) arr.repeat([2, 3], axis=1)
arr np.tile(arr, 2)
arr np.tile(arr, (2, 1)) np.tile(arr, (3, 2))
arr = np.arange(10) * 100 inds = [7, 1, 2, 6] arr[inds]
arr.take(inds) arr.put(inds, 42) arr arr.put(inds, [40, 41, 42, 43]) arr
inds = [2, 0, 2, 1] arr = rng.standard_normal((2, 4)) arr arr.take(inds, axis=1)
arr = np.arange(5) arr arr * 4
arr = rng.standard_normal((4, 3)) arr.mean(0) demeaned = arr - arr.mean(0) demeaned demeaned.mean(0)
arr row_means = arr.mean(1) row_means.shape row_means.reshape((4, 1)) demeaned = arr - row_means.reshape((4, 1)) demeaned.mean(1)
arr - arr.mean(1)
arr - arr.mean(1).reshape((4, 1))
arr = np.zeros((4, 4)) arr_3d = arr[:, np.newaxis, :] arr_3d.shape arr_1d = rng.standard_normal(3) arr_1d[:, np.newaxis] arr_1d[np.newaxis, :]
arr = rng.standard_normal((3, 4, 5)) depth_means = arr.mean(2) depth_means depth_means.shape demeaned = arr - depth_means[:, :, np.newaxis] demeaned.mean(2)
arr = np.zeros((4, 3)) arr[:] = 5 arr
col = np.array([1.28, -0.42, 0.44, 1.6]) arr[:] = col[:, np.newaxis] arr arr[:2] = [[-1.37], [0.509]] arr
arr = np.arange(10) np.add.reduce(arr) arr.sum()
my_rng = np.random.default_rng(12346) # for reproducibility arr = my_rng.standard_normal((5, 5)) arr arr[::2].sort(1) # sort a few rows arr[:, :-1] < arr[:, 1:] np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)
arr = np.arange(15).reshape((3, 5)) np.add.accumulate(arr, axis=1)
arr = np.arange(3).repeat([1, 2, 2]) arr np.multiply.outer(arr, np.arange(5))
x, y = rng.standard_normal((3, 4)), rng.standard_normal(5) result = np.subtract.outer(x, y) result.shape
arr = np.arange(10) np.add.reduceat(arr, [0, 5, 8])
arr = np.multiply.outer(np.arange(4), np.arange(5)) arr np.add.reduceat(arr, [0, 2, 4], axis=1)
def add_elements(x, y): return x + y add_them = np.frompyfunc(add_elements, 2, 1) add_them(np.arange(8), np.arange(8))
add_them = np.vectorize(add_elements, otypes=[np.float64]) add_them(np.arange(8), np.arange(8))
arr = rng.standard_normal(10000) %timeit add_them(arr, arr) %timeit np.add(arr, arr)
dtype = [('x', np.float64), ('y', np.int32)] sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype) sarr
sarr[0] sarr[0]['y']
sarr['x']
dtype = [('x', np.int64, 3), ('y', np.int32)] arr = np.zeros(4, dtype=dtype) arr
arr[0]['x']
arr['x']
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)] data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype) data['x'] data['y'] data['x']['a']
arr = rng.standard_normal(6) arr.sort() arr
arr = rng.standard_normal((3, 5)) arr arr[:, 0].sort() # Sort first column values in place arr
arr = rng.standard_normal(5) arr np.sort(arr) arr
arr = rng.standard_normal((3, 5)) arr arr.sort(axis=1) arr
arr[:, ::-1]
values = np.array([5, 0, 1, 3, 2]) indexer = values.argsort() indexer values[indexer]
arr = rng.standard_normal((3, 5)) arr[0] = values arr arr[:, arr[0].argsort()]
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara']) last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters']) sorter = np.lexsort((first_name, last_name)) sorter list(zip(last_name[sorter], first_name[sorter]))
values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third']) key = np.array([2, 2, 1, 1, 1]) indexer = key.argsort(kind='mergesort') indexer values.take(indexer)
rng = np.random.default_rng(12345) arr = rng.standard_normal(20) arr np.partition(arr, 3)
indices = np.argpartition(arr, 3) indices arr.take(indices)
arr = np.array([0, 1, 7, 12, 15]) arr.searchsorted(9)
arr.searchsorted([0, 8, 11, 16])
arr = np.array([0, 0, 0, 1, 1, 1, 1]) arr.searchsorted([0, 1]) arr.searchsorted([0, 1], side='right')
data = np.floor(rng.uniform(0, 10000, size=50)) bins = np.array([0, 100, 1000, 5000, 10000]) data
labels = bins.searchsorted(data) labels
pd.Series(data).groupby(labels).mean()
import numpy as np def mean_distance(x, y): nx = len(x) result = 0.0 count = 0 for i in range(nx): result += x[i] - y[i] count += 1 return result / count
mmap = np.memmap('mymmap', dtype='float64', mode='w+', shape=(10000, 10000)) mmap
section = mmap[:5]
section[:] = rng.standard_normal((5, 10000)) mmap.flush() mmap del mmap
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000)) mmap
%xdel mmap !rm mymmap
arr_c = np.ones((100, 10000), order='C') arr_f = np.ones((100, 10000), order='F') arr_c.flags arr_f.flags arr_f.flags.f_contiguous
%timeit arr_c.sum(1) %timeit arr_f.sum(1)
arr_f.copy('C').flags
arr_c[:50].flags.contiguous arr_c[:, :50].flags
%xdel arr_c %xdel arr_f
pd.options.display.max_rows = PREVIOUS_MAX_ROWS