01
Import¶
Basics¶
np.array([1, 2, 3, 4, 5])
np.arange(1, 100, 10) ## start, step, step
np.linspace(1, 100, 10) ## start, step, no of values
## idk
np.zeros(10)
np.ones(10)
## random
np.random.random(10)
np.random.randn(10)
Array Operations¶
Indexing¶
np.vectorize¶
Kinda like a for loop
Stats¶
Calculus¶
## analytic calculus (for symbolic, use sympy)
dydx = np.gradient(y, x )
y_int = np.cumsum(y) * (x[1]-x[0])
Multi-Dimensional¶
a = np.array([
[1, 2, 3],
[4, 5, 6]
])
a = np.random.randn(3, 3)
a.ravel() ## returns a 1d array
a[0] ## first row
a[:,0] ## first column
Mesh Grid¶
Linear Algebra¶
Matrix¶
Solve systems of equations¶
a = np.array([
[3, 2, 1],
[5, -5, 4],
[6, 0, 1]
])
b = np.array([
4,
3,
0
])
x = np.linalg.solve(a, b) ## ax = b
Eigenvalues¶
Find-Replace¶
if
prediction['Rating'] = np.where(
prediction['Rating'].to_numpy() > 100,
100,
prediction['Rating'].to_numpy()
)
if-else
if-elseif-else
conditions = [
prediction['Rating'].to_numpy() > 100,
prediction['Rating'].to_numpy() > 50,
prediction['Rating'].to_numpy() > 20
]
values = [
100,
50,
20
]
default = 0
prediction['Rating'] = np.select(
conditions,
values,
default = default
)
nested
conditions = [
(prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 2 == 0),
(prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 3 == 0),
(prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 4 == 0),
prediction['Rating'].to_numpy() > 50,
prediction['Rating'].to_numpy() > 20
]
values = [
102,
103,
104,
50,
20
]
default = 0
prediction['Rating'] = np.select(
conditions,
values,
default = default
)
Rounding¶
Round to Integer¶
Round to \(n\) places
Read data¶
Save¶
Cartesian¶
Indexing¶
High space complexity
import numpy as np
def cartesian(arrays, out=None):
"""
Generate a Cartesian product of input arrays.
Parameters
----------
arrays : list of array-like
1-D arrays to form the Cartesian product of.
out : ndarray
Array to place the Cartesian product in.
Returns
-------
out : ndarray
2-D array of shape (M, len(arrays)) containing Cartesian products
formed of input arrays.
Examples
--------
>>> cartesian(([1, 2, 3], [4, 5], [6, 7]))
array([[1, 4, 6],
[1, 4, 7],
[1, 5, 6],
[1, 5, 7],
[2, 4, 6],
[2, 4, 7],
[2, 5, 6],
[2, 5, 7],
[3, 4, 6],
[3, 4, 7],
[3, 5, 6],
[3, 5, 7]])
"""
arrays = [np.asarray(x) for x in arrays]
dtype = arrays[0].dtype
n = np.prod([x.size for x in arrays])
if out is None:
out = np.zeros([n, len(arrays)], dtype=dtype)
#m = n / arrays[0].size
m = int(n / arrays[0].size)
out[:,0] = np.repeat(arrays[0], m)
if arrays[1:]:
cartesian(arrays[1:], out=out[0:m, 1:])
for j in range(1, arrays[0].size):
#for j in xrange(1, arrays[0].size):
out[j*m:(j+1)*m, 1:] = out[0:m, 1:]
return out
CRPS¶
# Adapted to numpy from pyro.ops.stats.crps_empirical
# Copyright (c) 2017-2019 Uber Technologies, Inc.
# SPDX-License-Identifier: Apache-2.0
import numpy as np
def crps(y_true, y_pred, sample_weight=None):
num_samples = y_pred.shape[0]
absolute_error = np.mean(np.abs(y_pred - y_true), axis=0)
if num_samples == 1:
return np.average(absolute_error, weights=sample_weight)
y_pred = np.sort(y_pred, axis=0)
diff = y_pred[1:] - y_pred[:-1]
weight = np.arange(1, num_samples) * np.arange(num_samples - 1, 0, -1)
weight = np.expand_dims(weight, -1)
per_obs_crps = absolute_error - np.sum(diff * weight, axis=0) / num_samples**2
return np.average(per_obs_crps, weights=sample_weight)
Linear Regression¶
class WLS(RegressorMixin):
"""
- Uses element-wise multiplication () for weighted regression to avoid the more cumbersome .dot and .diag
- np.linalg.lstsq is the most stable method, others are
- np.linalg.solve
- np.linalg.pinv
- np.linalg.inv
- Add 1s if intercept required
"""
def __init__(self, fit_intercept=True, alpha=0, penalize_intercept=False, **init_params):
self.fit_intercept = fit_intercept
self.alpha = lam
self.penalize_intercept = penalize_intercept
def fit(self, X, y, sample_weight=None, **fit_params):
self.n_features = X.shape[1] # Exclude intercept
# Augment matrix and target vector
penalty = np.sqrt(self.alpha) * np.eye(self.n_features)
if sample_weight is None:
sample_weight = np.ones(X.shape[0])
if self.fit_intercept:
X = np.c_[np.ones(x.shape[0]), X]
w = np.sqrt(sample_weight)
X_aug = np.vstack([X * w.reshape(X.shape[0], 1), np.hstack([penalty, np.zeros((n_features, 1))])])
y_aug = np.concatenate([y * w, np.zeros(n_features)])
self.coef, residuals, rank, singular_values = np.linalg.lstsq(X_aug, y_aug, rcond=None)
return self
def predict(self, X, y=None):
return np.dot([1, X], self.coef)
Mini-Batch
class MiniBatchWLS(RegressorMixin):
"""
- Uses element-wise multiplication () for weighted regression to avoid the more cumbersome .dot and .diag
- np.linalg.lstsq is the most stable method, others are
- np.linalg.solve
- np.linalg.pinv
- np.linalg.inv
- Add 1s if intercept required
"""
def __init__(self, fit_intercept=True, alpha=0, penalize_intercept=False, n_splits=4, n_jobs=1, **init_params):
self.fit_intercept = fit_intercept
self.alpha = lam
self.penalize_intercept = penalize_intercept
self.n_splits = n_splits
self.n_jobs = n_jobs
def fit(self, X, y, sample_weight=None, **fit_params):
self.n_samples, self.n_features = X.shape # Exclude intercept
self.split_size = self.n_samples // self.n_splits
# Augment matrix and target vector
penalty = np.sqrt(self.alpha) * np.eye(self.n_features)
if sample_weight is None:
sample_weight = np.ones(X.shape[0])
if self.fit_intercept:
X = np.c_[np.ones(x.shape[0]), X]
w = np.sqrt(sample_weight)
# parallelize this
for split in range(self.n_splits):
idx_start = split * self.split_size
idx_end = (split+1) * self.split_size
Xg = X[start_idx:end_idx, :]
yg = y[start_idx:end_idx, :]
XtX += Xg.T @ Xg
Xty += Xg.T @ yg
X_aug = np.vstack([Xtx * w.reshape(self.n_samples, 1), np.hstack([penalty, np.zeros((self.n_features, 1))])])
y_aug = np.concatenate([Xty * w, np.zeros(self.n_features)])
self.coef, residuals, rank, singular_values = np.linalg.lstsq(X_aug, y_aug, rcond=None)
return self
def predict(self, X, y=None):
return np.dot([1, X], self.coef)