Skip to content

Introduction¶

Custom Test¶

Mutual information

def mutual_information_fisherz_test(data, x, y, z):
        """Fisher's z-transform for conditional independence test, *using Mutual Information* instead of correlation

        Parameters
        ----------
        data : ndarray
            The dataset on which to test the independence condition.
        x : int
            A variable in data set
        y : int
            A variable in data set
        z : List, default []
            A list of variable names contained in the data set different
            from x and y. This is the separating set that (potentially)
            makes x and y independent.

        Returns
        -------
        _: None
        _: None
        p: float
            the p-value of conditional independence.

        Examples
        --------
        >>> import numpy as np
        >>> import pandas as pd
        >>> np.random.seed(23)
        >>> data = np.random.rand(2500, 4)

        >>> p_value = CITest.fisherz_test(data, 0, 1, [])
        >>> print(p_value)
        0.011609430716781555

        >>> p_value = CITest.fisherz_test(data, 0, 1, [3])
        >>> print(p_value)
        0.01137523908727811

        >>> p_value = CITest.fisherz_test(data, 0, 1, [2, 3])
        >>> print(p_value)
        0.011448214156529746
        """

        n = data.shape[0]
        k = len(z)
        if k == 0:
            # change this
            # r = np.corrcoef(data[:, [x, y]].T)[0][1] 
            pass
        else:
            sub_index = [x, y]
            sub_index.extend(z)

            # change this
            # sub_corr = np.corrcoef(data[:, sub_index].T)
            pass

            # inverse matrix
            try:
                PM = np.linalg.inv(sub_corr)
            except np.linalg.LinAlgError:
                PM = np.linalg.pinv(sub_corr)
            r = -1 * PM[0, 1] / math.sqrt(abs(PM[0, 0] * PM[1, 1]))
        cut_at = 0.99999
        r = min(cut_at, max(-1 * cut_at, r))  # make r between -1 and 1

        # Fisher’s z-transform
        res = math.sqrt(n - k - 3) * .5 * math.log1p((2 * r) / (1 - r))
        p_value = 2 * (1 - stats.norm.cdf(abs(res)))

        return None, None, p_value

pc = PC(alpha=0.05, ci_test=test)

Last Updated: 2025-01-13

Comments