Skip to content

01

Import

import numpy as np

Basics

np.array([1, 2, 3, 4, 5])
np.arange(1, 100, 10) ## start, step, step
np.linspace(1, 100, 10) ## start, step, no of values

## idk
np.zeros(10)
np.ones(10)

## random
np.random.random(10)
np.random.randn(10)

Array Operations

## Element-wise
a+3
1/a

## Boolean
a > 4

Indexing

a[2]

a[2:]
a[-10:]

a[:10]
a[:-10]

a[::2] ## even rows
a[1::2] ## odd rows


## Masking
a[a > 4]

np.vectorize

Kinda like a for loop

names = ["Thahir", "Azhar"]
first_letter = np.vectorize(lambda x: x[0])(names) 

Stats

np.mean(a)
np.median(a)
np.std(a)
np.quantile(a, 0.90)
np.percenile(a, 90)

Calculus

## analytic calculus (for symbolic, use sympy)
dydx = np.gradient(y, x )
y_int = np.cumsum(y) * (x[1]-x[0])

Multi-Dimensional

a = np.array([
  [1, 2, 3],
  [4, 5, 6]
])
a = np.random.randn(3, 3)

a.ravel() ## returns a 1d array

a[0] ## first row
a[:,0] ## first column

Mesh Grid

xv, yv = np.meshgrid(x, y)
zv = xv**2 + yv**2
plt.contourf(xv, yv, zv, levels=100)
plt.colorbar()

Linear Algebra

Matrix

  a.T
  a*b ## element-wise operator
  a@b ## matrix multiplication
  a.dot(b)
  a.cross(b)

Solve systems of equations

  a = np.array([
    [3, 2, 1],
    [5, -5, 4],
    [6, 0, 1]
  ])
  b = np.array([
    4,
    3,
    0
  ])

  x = np.linalg.solve(a, b) ## ax = b

Eigenvalues

  temp = np.linalg.eig(A)
  eigen_values = temp[0]
  eigen_vector = temp[1][:, 0]

Find-Replace

if

  prediction['Rating'] = np.where(
    prediction['Rating'].to_numpy() > 100,
    100,
    prediction['Rating'].to_numpy()
  )

if-else

  prediction['Rating'] = np.where(
    prediction['Rating'].to_numpy() > 100,
    100,
    0
  )

if-elseif-else

  conditions = [
    prediction['Rating'].to_numpy() > 100,
    prediction['Rating'].to_numpy() > 50,
    prediction['Rating'].to_numpy() > 20
  ]

  values = [
    100,
    50,
    20  
  ]

  default = 0

  prediction['Rating'] = np.select(
    conditions,
    values,
    default = default
  )

nested

  conditions = [
    (prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 2 == 0),
    (prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 3 == 0),
    (prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 4 == 0),

    prediction['Rating'].to_numpy() > 50,
    prediction['Rating'].to_numpy() > 20
  ]

  values = [
    102,
    103,
    104,

    50,
    20  
  ]

  default = 0

  prediction['Rating'] = np.select(
    conditions,
    values,
    default = default
  )

Rounding

Round to Integer

  np.around(prediction)

  ## instead of
  ## prediction = ( round(element) for element in prediction )

Round to \(n\) places

  np.around(prediction, n)

Read data

data = np.loadtxt(
  "./data.csv",
  dtype = "object",
  delimiter = ",",
  unpack = True,
  skiprows = 1 
)

Save

np.savetxt(
    filename + ".csv",
  data,
  delimiter = ",",
  fmt = "%d",
  header = "Col1, Col2"
)

Cartesian

Indexing

High space complexity

import numpy as np

def cartesian(arrays, out=None):
    """
    Generate a Cartesian product of input arrays.

    Parameters
    ----------
    arrays : list of array-like
        1-D arrays to form the Cartesian product of.
    out : ndarray
        Array to place the Cartesian product in.

    Returns
    -------
    out : ndarray
        2-D array of shape (M, len(arrays)) containing Cartesian products
        formed of input arrays.

    Examples
    --------
    >>> cartesian(([1, 2, 3], [4, 5], [6, 7]))
    array([[1, 4, 6],
           [1, 4, 7],
           [1, 5, 6],
           [1, 5, 7],
           [2, 4, 6],
           [2, 4, 7],
           [2, 5, 6],
           [2, 5, 7],
           [3, 4, 6],
           [3, 4, 7],
           [3, 5, 6],
           [3, 5, 7]])

    """

    arrays = [np.asarray(x) for x in arrays]
    dtype = arrays[0].dtype

    n = np.prod([x.size for x in arrays])
    if out is None:
        out = np.zeros([n, len(arrays)], dtype=dtype)

    #m = n / arrays[0].size
    m = int(n / arrays[0].size)
    out[:,0] = np.repeat(arrays[0], m)
    if arrays[1:]:
        cartesian(arrays[1:], out=out[0:m, 1:])
        for j in range(1, arrays[0].size):
        #for j in xrange(1, arrays[0].size):
            out[j*m:(j+1)*m, 1:] = out[0:m, 1:]
    return out

CRPS

# Adapted to numpy from pyro.ops.stats.crps_empirical
# Copyright (c) 2017-2019 Uber Technologies, Inc.
# SPDX-License-Identifier: Apache-2.0

import numpy as np
def crps(y_true, y_pred, sample_weight=None):
    num_samples = y_pred.shape[0]
    absolute_error = np.mean(np.abs(y_pred - y_true), axis=0)

    if num_samples == 1:
        return np.average(absolute_error, weights=sample_weight)

    y_pred = np.sort(y_pred, axis=0)
    diff = y_pred[1:] - y_pred[:-1]
    weight = np.arange(1, num_samples) * np.arange(num_samples - 1, 0, -1)
    weight = np.expand_dims(weight, -1)

    per_obs_crps = absolute_error - np.sum(diff * weight, axis=0) / num_samples**2
    return np.average(per_obs_crps, weights=sample_weight)

Linear Regression

class WLS(RegressorMixin):
    """
    - Uses element-wise multiplication () for weighted regression to avoid the more cumbersome .dot and .diag
    - np.linalg.lstsq is the most stable method, others are
        - np.linalg.solve
        - np.linalg.pinv
        - np.linalg.inv
    - Add 1s if intercept required
    """
    def __init__(self, fit_intercept=True, alpha=0, penalize_intercept=False, **init_params):
        self.fit_intercept = fit_intercept
        self.alpha = lam
        self.penalize_intercept = penalize_intercept

    def fit(self, X, y, sample_weight=None, **fit_params):
        self.n_features = X.shape[1]  # Exclude intercept

        # Augment matrix and target vector
        penalty = np.sqrt(self.alpha) * np.eye(self.n_features)

        if sample_weight is None:
            sample_weight = np.ones(X.shape[0])

        if self.fit_intercept:
            X = np.c_[np.ones(x.shape[0]), X]

        w = np.sqrt(sample_weight)

        X_aug = np.vstack([X * w.reshape(X.shape[0], 1), np.hstack([penalty, np.zeros((n_features, 1))])])
        y_aug = np.concatenate([y * w, np.zeros(n_features)])

        self.coef, residuals, rank, singular_values = np.linalg.lstsq(X_aug, y_aug, rcond=None)

        return self

    def predict(self, X, y=None):
        return np.dot([1, X], self.coef)

Mini-Batch

class MiniBatchWLS(RegressorMixin):
    """
    - Uses element-wise multiplication () for weighted regression to avoid the more cumbersome .dot and .diag
    - np.linalg.lstsq is the most stable method, others are
        - np.linalg.solve
        - np.linalg.pinv
        - np.linalg.inv
    - Add 1s if intercept required
    """
    def __init__(self, fit_intercept=True, alpha=0, penalize_intercept=False, n_splits=4, n_jobs=1, **init_params):
        self.fit_intercept = fit_intercept
        self.alpha = lam
        self.penalize_intercept = penalize_intercept
        self.n_splits = n_splits
        self.n_jobs = n_jobs

    def fit(self, X, y, sample_weight=None, **fit_params):
        self.n_samples, self.n_features = X.shape  # Exclude intercept

        self.split_size = self.n_samples // self.n_splits

        # Augment matrix and target vector
        penalty = np.sqrt(self.alpha) * np.eye(self.n_features)

        if sample_weight is None:
            sample_weight = np.ones(X.shape[0])

        if self.fit_intercept:
            X = np.c_[np.ones(x.shape[0]), X]

        w = np.sqrt(sample_weight)


        # parallelize this
        for split in range(self.n_splits):
            idx_start = split * self.split_size
            idx_end = (split+1) * self.split_size

            Xg = X[start_idx:end_idx, :]
            yg = y[start_idx:end_idx, :]

            XtX += Xg.T @ Xg
            Xty += Xg.T @ yg


        X_aug = np.vstack([Xtx * w.reshape(self.n_samples, 1), np.hstack([penalty, np.zeros((self.n_features, 1))])])
        y_aug = np.concatenate([Xty * w, np.zeros(self.n_features)])

        self.coef, residuals, rank, singular_values = np.linalg.lstsq(X_aug, y_aug, rcond=None)

        return self

    def predict(self, X, y=None):
        return np.dot([1, X], self.coef)
Last Updated: 2025-03-23 ; Contributors: AhmedThahir, web-flow

Comments