KatsuyaITO · November 20, 2016 06:22 · KatsuyaITO · Nov 15, 2016
diff --git a/brunner_munzel_test.py b/brunner_munzel_test.py
 import pandas as pd
 import numpy as np
 from scipy import stats
 from collections import namedtuple

 BrunnerMunzelResult = namedtuple('BrunnerMunzelResult', ('statistic','pvalue'))

 def brunner_munzel_test(x,y,alternative="two_sided",alpha=0.5):
    
    """
    Computes the Brunner Munzel statistic

    Missing values in `x` and/or `y` are discarded.

    Parameters
    ----------
    x : sequence
        Input
    y : sequence
        Input
    alternative : {greater, less, two_sided }

    Returns
    -------
    statistic : float
        The Brunner Munzel  statistics
    pvalue : float
        Approximate p-value assuming a t distribution.

     """
    x = np.ma.asarray(x).compressed().view(np.ndarray)
    y = np.ma.asarray(y).compressed().view(np.ndarray)
    ranks = stats.rankdata(np.concatenate([x,y]))
    (nx, ny) = (len(x), len(y))
    rankx = stats.rankdata(x)
    ranky = stats.rankdata(y)
    rank_mean1 = np.mean(ranks[0:nx])
    rank_mean2 = np.mean(ranks[nx :nx+ny])
    pst = (rank_mean2 - (ny + 1)/2)/nx
    
    v1_set = [(i - j - rank_mean1 + (nx + 1)/2)**2 for (i,j) in zip(ranks[0:nx],rankx)]
    v2_set = [(i - j - rank_mean2 + (ny + 1)/2)**2 for (i,j) in zip(ranks[nx :nx+ny] ,ranky)]

    v1 = np.sum(v1_set)/(nx - 1)
    v2 = np.sum(v2_set)/(ny - 1)
    statistic = nx * ny * (rank_mean2 - rank_mean1)/(nx + ny)/np.sqrt(nx * v1 +  ny * v2)
    dfbm = ((nx * v1 + ny * v2)**2)/(((nx * v1)**2)/(nx - 1) + ((ny * v2)**2)/(ny - 1))
    if ((alternative == "greater") | (alternative == "g")) :
        prob = stats.t.cdf(statistic,dfbm)
    elif ((alternative == "less") | (alternative == "l")) :
        prob = 1-stats.t.cdf(statistic,dfbm)
    else:
        abst = np.abs(statistic)
        prob  = stats.t.cdf(abst,dfbm)
        prob= 2 * min(prob, 1-prob)
    
    return BrunnerMunzelResult(statistic,prob)
	import pandas as pd
	import numpy as np
	from scipy import stats
	from collections import namedtuple

	BrunnerMunzelResult = namedtuple('BrunnerMunzelResult', ('statistic','pvalue'))

	def brunner_munzel_test(x,y,alternative="two_sided",alpha=0.5):

	"""
	Computes the Brunner Munzel statistic

	Missing values in `x` and/or `y` are discarded.

	Parameters
	----------
	x : sequence
	Input
	y : sequence
	Input
	alternative : {greater, less, two_sided }

	Returns
	-------
	statistic : float
	The Brunner Munzel statistics
	pvalue : float
	Approximate p-value assuming a t distribution.

	"""
	x = np.ma.asarray(x).compressed().view(np.ndarray)
	y = np.ma.asarray(y).compressed().view(np.ndarray)
	ranks = stats.rankdata(np.concatenate([x,y]))
	(nx, ny) = (len(x), len(y))
	rankx = stats.rankdata(x)
	ranky = stats.rankdata(y)
	rank_mean1 = np.mean(ranks[0:nx])
	rank_mean2 = np.mean(ranks[nx :nx+ny])
	pst = (rank_mean2 - (ny + 1)/2)/nx

	v1_set = [(i - j - rank_mean1 + (nx + 1)/2)**2 for (i,j) in zip(ranks[0:nx],rankx)]
	v2_set = [(i - j - rank_mean2 + (ny + 1)/2)**2 for (i,j) in zip(ranks[nx :nx+ny] ,ranky)]

	v1 = np.sum(v1_set)/(nx - 1)
	v2 = np.sum(v2_set)/(ny - 1)
	statistic = nx * ny * (rank_mean2 - rank_mean1)/(nx + ny)/np.sqrt(nx * v1 + ny * v2)
	dfbm = ((nx * v1 + ny * v2)*2)/(((nx v1)*2)/(nx - 1) + ((ny v2)**2)/(ny - 1))
	if ((alternative == "greater") \| (alternative == "g")) :
	prob = stats.t.cdf(statistic,dfbm)
	elif ((alternative == "less") \| (alternative == "l")) :
	prob = 1-stats.t.cdf(statistic,dfbm)
	else:
	abst = np.abs(statistic)
	prob = stats.t.cdf(abst,dfbm)
	prob= 2 * min(prob, 1-prob)

	return BrunnerMunzelResult(statistic,prob)
No results found