Skip to content

Instantly share code, notes, and snippets.

@KatsuyaITO
Last active November 20, 2016 06:22
Show Gist options
  • Select an option

  • Save KatsuyaITO/e45a75526e7cd4ea59eaad3ecb79c2b1 to your computer and use it in GitHub Desktop.

Select an option

Save KatsuyaITO/e45a75526e7cd4ea59eaad3ecb79c2b1 to your computer and use it in GitHub Desktop.

Revisions

  1. Katsuya ITO revised this gist Nov 20, 2016. 1 changed file with 0 additions and 3 deletions.
    3 changes: 0 additions & 3 deletions brunner_munzel_test.py
    Original file line number Diff line number Diff line change
    @@ -36,9 +36,6 @@ def brunner_munzel_test(x,y,alternative="two_sided",alpha=0.5):
    ranky = stats.rankdata(y)
    rank_mean1 = np.mean(ranks[0:nx])
    rank_mean2 = np.mean(ranks[nx :nx+ny])
    print(ranks)
    print(ranks[0:nx],ranks[nx :nx+ny])

    pst = (rank_mean2 - (ny + 1)/2)/nx

    v1_set = [(i - j - rank_mean1 + (nx + 1)/2)**2 for (i,j) in zip(ranks[0:nx],rankx)]
  2. Katsuya ITO revised this gist Nov 16, 2016. 1 changed file with 0 additions and 1 deletion.
    1 change: 0 additions & 1 deletion brunner_munzel_test.py
    Original file line number Diff line number Diff line change
    @@ -53,7 +53,6 @@ def brunner_munzel_test(x,y,alternative="two_sided",alpha=0.5):
    elif ((alternative == "less") | (alternative == "l")) :
    prob = 1-stats.t.cdf(statistic,dfbm)
    else:
    alternative = "two_sided"
    abst = np.abs(statistic)
    prob = stats.t.cdf(abst,dfbm)
    prob= 2 * min(prob, 1-prob)
  3. Katsuya ITO created this gist Nov 15, 2016.
    61 changes: 61 additions & 0 deletions brunner_munzel_test.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,61 @@
    import pandas as pd
    import numpy as np
    from scipy import stats
    from collections import namedtuple

    BrunnerMunzelResult = namedtuple('BrunnerMunzelResult', ('statistic','pvalue'))

    def brunner_munzel_test(x,y,alternative="two_sided",alpha=0.5):

    """
    Computes the Brunner Munzel statistic
    Missing values in `x` and/or `y` are discarded.
    Parameters
    ----------
    x : sequence
    Input
    y : sequence
    Input
    alternative : {greater, less, two_sided }
    Returns
    -------
    statistic : float
    The Brunner Munzel statistics
    pvalue : float
    Approximate p-value assuming a t distribution.
    """
    x = np.ma.asarray(x).compressed().view(np.ndarray)
    y = np.ma.asarray(y).compressed().view(np.ndarray)
    ranks = stats.rankdata(np.concatenate([x,y]))
    (nx, ny) = (len(x), len(y))
    rankx = stats.rankdata(x)
    ranky = stats.rankdata(y)
    rank_mean1 = np.mean(ranks[0:nx])
    rank_mean2 = np.mean(ranks[nx :nx+ny])
    print(ranks)
    print(ranks[0:nx],ranks[nx :nx+ny])

    pst = (rank_mean2 - (ny + 1)/2)/nx

    v1_set = [(i - j - rank_mean1 + (nx + 1)/2)**2 for (i,j) in zip(ranks[0:nx],rankx)]
    v2_set = [(i - j - rank_mean2 + (ny + 1)/2)**2 for (i,j) in zip(ranks[nx :nx+ny] ,ranky)]

    v1 = np.sum(v1_set)/(nx - 1)
    v2 = np.sum(v2_set)/(ny - 1)
    statistic = nx * ny * (rank_mean2 - rank_mean1)/(nx + ny)/np.sqrt(nx * v1 + ny * v2)
    dfbm = ((nx * v1 + ny * v2)**2)/(((nx * v1)**2)/(nx - 1) + ((ny * v2)**2)/(ny - 1))
    if ((alternative == "greater") | (alternative == "g")) :
    prob = stats.t.cdf(statistic,dfbm)
    elif ((alternative == "less") | (alternative == "l")) :
    prob = 1-stats.t.cdf(statistic,dfbm)
    else:
    alternative = "two_sided"
    abst = np.abs(statistic)
    prob = stats.t.cdf(abst,dfbm)
    prob= 2 * min(prob, 1-prob)

    return BrunnerMunzelResult(statistic,prob)