using StatsBase randsubseq2(A::AbstractVector, m::Integer) = randsubseq2!(Array(eltype(A),m), A) function randsubseq2!(S::AbstractVector, A::AbstractArray) m = length(S) m == 0 && return S # needed for correctness: code below requires m > 0 n = length(A) m == n && return copy!(S, A) 0 <= m <= n || throw(DomainError()) i = 0; j = 0 while true if (n - i)*rand() <= m - j S[j += 1] = A[i += 1] j == m && return S else i += 1 end end end vitter(A::AbstractArray, k::Integer) = vitter!(A,Array(eltype(A),k)) function vitter!(A::AbstractArray, x::AbstractArray) N = length(A) n = length(x) V_prime = exp(log(rand())/n) quant1 = N-n+1 quant2 = quant1/N alpha_inverse = 15 threshold = alpha_inverse * n ai=1 local S while n>1 && thresholdS bottom = N-n limit = N-S else bottom = N-S-1 limit = quant1 end for top=N-1:-1:limit y *= top/bottom bottom = bottom - 1 end if exp(log(y)/(n-1)) <= N/(N-X) # Accept S V_prime = exp(log(rand())/(n-1)) break end V_prime = exp(log(rand())/n) end # Select (S+1)st record N -= S+1 n -= 1 x[end-n] = A[ai+=S] quant1 -= S quant2 = quant1/N threshold -= alpha_inverse end if n>1 # Algo A top = N - n for n = n-1:-1:2 # FIXME: this n-1 was originally n V = rand() S = 1 quot = top/N while quot > V S = S+1 top = top-1 N = N-1 quot *= top/N end x[end-n] = A[ai+=S] N-=1 end end #One left: S = itrunc(N*V_prime) x[end] = A[ai+=S] end function rand_first_index(n, k) r = rand() p = k/n i = 1 while p < r i += 1 p += (1-p)k/(n-(i-1)) end return i end ordered_sample_norep{T}(xs::AbstractArray{T}, k::Integer) = ordered_sample_norep!(xs, Array(T,k)) function ordered_sample_norep!(xs::AbstractArray, target::AbstractArray) n = length(xs) k = length(target) i = 0 for j in 1:k step = rand_first_index(n, k) n -= step i += step target[j] = xs[i] k -= 1 end return target end function floyd1{T}(a::AbstractArray{T}, m::Integer) n = length(a) 0 <= m <= n || throw(DomainError()) # Floyd's Õ(m) algorithm from J. Bentley and B. Floyd, "Programming pearls: A sample of brilliance," # Commun. ACM Magazine 30, no. 9, 754--757 (1987). # Adapted from https://github.com/JuliaLang/julia/issues/6714#issuecomment-42057923 #s = IntSet() s = Set{Int}() x = Array(T, m) for (xind,j) = enumerate(n-m+1:n) i = rand(1:j) ind = i in s ? j : i x[xind] = a[ind] push!(s, ind) end x end function floyd1p{T}(a::AbstractArray{T}, m::Integer) n = length(a) 0 <= m <= n || throw(DomainError()) # Floyd's Õ(m) algorithm from J. Bentley and B. Floyd, "Programming pearls: A sample of brilliance," # Commun. ACM Magazine 30, no. 9, 754--757 (1987). # Adapted from https://github.com/JuliaLang/julia/issues/6714#issuecomment-42057923 #s = IntSet() s = Set{Int}() x = T[] sizehint(x, m) for j = n-m+1:n i = rand(1:j) if i in s #use j push!(x,a[j]) push!(s,j) else unshift!(x,a[j]) push!(s,i) end end x end # warmup sample(1:1000, 100) floyd1(1:1000, 100) floyd1p(1:1000, 100) ordered_sample_norep(1:1000, 100) vitter(1:1000, 100) N=10_000 krange = 5:5:200 navg=200 using PyPlot close("all") labels=["sample ordered", "sample unordered", "randsubseq2", "vitter", "floyd ordered", "floyd unordered"] #"one-more-min ordered"] times = {Float64[] for x=1:length(labels)} for i=krange gc_disable() meas = [median([@elapsed sample(1:N, i, replace=false, ordered=true) for x=1:navg]), median([@elapsed sample(1:N, i, replace=false) for x=1:navg]), median([@elapsed randsubseq2(1:N, i) for x=1:navg]), median([@elapsed vitter(1:N, i) for x=1:navg]), median([@elapsed floyd1(1:N, i) for x=1:navg]), median([@elapsed floyd1p(1:N, i) for x=1:navg])] #median([@elapsed ordered_sample_norep(1:N, i) for x=1:10])] gc_enable() gc() [push!(x,t) for (x,t) in zip(times,meas)] end [plot([krange],t,label=l,linewidth=5, alpha=0.7) for (t,l) in zip(times,labels)] legend(loc="best") title("From $(1:N), sample ...")