KatsuyaITO · November 20, 2016 06:22 · Nov 20, 2016 · Nov 16, 2016 · Nov 15, 2016
diff --git a/brunner_munzel_test.py b/brunner_munzel_test.py
@@ -36,9 +36,6 @@ def brunner_munzel_test(x,y,alternative="two_sided",alpha=0.5):
     ranky = stats.rankdata(y)
     rank_mean1 = np.mean(ranks[0:nx])
     rank_mean2 = np.mean(ranks[nx :nx+ny])
-    print(ranks)
-    print(ranks[0:nx],ranks[nx :nx+ny])
-
     pst = (rank_mean2 - (ny + 1)/2)/nx
 
     v1_set = [(i - j - rank_mean1 + (nx + 1)/2)**2 for (i,j) in zip(ranks[0:nx],rankx)]

diff --git a/brunner_munzel_test.py b/brunner_munzel_test.py
@@ -53,7 +53,6 @@ def brunner_munzel_test(x,y,alternative="two_sided",alpha=0.5):
     elif ((alternative == "less") | (alternative == "l")) :
         prob = 1-stats.t.cdf(statistic,dfbm)
     else:
-        alternative = "two_sided"
         abst = np.abs(statistic)
         prob  = stats.t.cdf(abst,dfbm)
         prob= 2 * min(prob, 1-prob)

diff --git a/brunner_munzel_test.py b/brunner_munzel_test.py
@@ -0,0 +1,61 @@
+import pandas as pd
+import numpy as np
+from scipy import stats
+from collections import namedtuple
+
+BrunnerMunzelResult = namedtuple('BrunnerMunzelResult', ('statistic','pvalue'))
+
+def brunner_munzel_test(x,y,alternative="two_sided",alpha=0.5):
+
+    """
+    Computes the Brunner Munzel statistic
+
+    Missing values in `x` and/or `y` are discarded.
+
+    Parameters
+    ----------
+    x : sequence
+        Input
+    y : sequence
+        Input
+    alternative : {greater, less, two_sided }
+
+    Returns
+    -------
+    statistic : float
+        The Brunner Munzel  statistics
+    pvalue : float
+        Approximate p-value assuming a t distribution.
+
+     """
+    x = np.ma.asarray(x).compressed().view(np.ndarray)
+    y = np.ma.asarray(y).compressed().view(np.ndarray)
+    ranks = stats.rankdata(np.concatenate([x,y]))
+    (nx, ny) = (len(x), len(y))
+    rankx = stats.rankdata(x)
+    ranky = stats.rankdata(y)
+    rank_mean1 = np.mean(ranks[0:nx])
+    rank_mean2 = np.mean(ranks[nx :nx+ny])
+    print(ranks)
+    print(ranks[0:nx],ranks[nx :nx+ny])
+
+    pst = (rank_mean2 - (ny + 1)/2)/nx
+
+    v1_set = [(i - j - rank_mean1 + (nx + 1)/2)**2 for (i,j) in zip(ranks[0:nx],rankx)]
+    v2_set = [(i - j - rank_mean2 + (ny + 1)/2)**2 for (i,j) in zip(ranks[nx :nx+ny] ,ranky)]
+
+    v1 = np.sum(v1_set)/(nx - 1)
+    v2 = np.sum(v2_set)/(ny - 1)
+    statistic = nx * ny * (rank_mean2 - rank_mean1)/(nx + ny)/np.sqrt(nx * v1 +  ny * v2)
+    dfbm = ((nx * v1 + ny * v2)**2)/(((nx * v1)**2)/(nx - 1) + ((ny * v2)**2)/(ny - 1))
+    if ((alternative == "greater") | (alternative == "g")) :
+        prob = stats.t.cdf(statistic,dfbm)
+    elif ((alternative == "less") | (alternative == "l")) :
+        prob = 1-stats.t.cdf(statistic,dfbm)
+    else:
+        alternative = "two_sided"
+        abst = np.abs(statistic)
+        prob  = stats.t.cdf(abst,dfbm)
+        prob= 2 * min(prob, 1-prob)
+
+    return BrunnerMunzelResult(statistic,prob)
No results found