Skip to content

Instantly share code, notes, and snippets.

@rampion
Forked from jberryman/med.hs
Created March 22, 2012 13:44
Show Gist options
  • Select an option

  • Save rampion/2158429 to your computer and use it in GitHub Desktop.

Select an option

Save rampion/2158429 to your computer and use it in GitHub Desktop.

Revisions

  1. rampion revised this gist Mar 22, 2012. 2 changed files with 48 additions and 13 deletions.
    48 changes: 48 additions & 0 deletions Levenshtein.hs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,48 @@
    module Main where

    import qualified Data.List as L
    import qualified Data.Vector.Unboxed.Safe as V
    import Control.Monad (replicateM)
    import System.Random (randomRIO)
    import Criterion.Main

    med :: String -> String -> Int
    med s1 = V.last . V.ifoldl' scanS1 costs_i . V.fromList
    where vs1 = V.fromList s1
    costs_i = V.enumFromN 1 $ V.length vs1 -- [0.. length s1]
    scanS1 costs_W costSW_i c2 =
    let v = V.zip vs1 costs_W
    v' = V.postscanl' scanVec (costSW_i, costSW_i + 1) v
    scanVec (costSW, costS) (c1, costW) =
    (costW, min (min costS costW + 1)
    (costSW + if c1 == c2 then 0 else 2))
    in snd $ V.unzip v'

    med' :: String -> String -> Int
    med' s1 = V.last . L.foldl' (uncurry . scanS1) costs_i . zip [0..]
    where vs1 = V.fromList s1
    costs_i = V.enumFromN 1 $ V.length vs1 -- [0.. length s1]
    scanS1 costs_W costSW_i c2 =
    let v = V.zip vs1 costs_W
    v' = V.postscanl' scanVec (costSW_i, costSW_i + 1) v
    scanVec (costSW, costS) (c1, costW) =
    (costW, min (min costS costW + 1)
    (costSW + if c1 == c2 then 0 else 2))
    in snd $ V.unzip v'

    main :: IO ()
    main = do
    string1 <- replicateM 200 $ randomRIO ('a','z')
    string2 <- replicateM 200 $ randomRIO ('a','z')
    defaultMain [ bench "vectors" $ whnf (uncurry med) (string1, string2)
    , bench "one list" $ whnf (uncurry med') (string1, string2)
    ]
    {-
    benchmarking vectors
    mean: 642.2392 us, lb 639.2511 us, ub 645.0969 us, ci 0.950
    std dev: 14.95784 us, lb 12.68023 us, ub 18.27670 us, ci 0.950
    benchmarking one list
    mean: 719.9067 us, lb 717.2804 us, ub 722.7214 us, ci 0.950
    std dev: 13.96940 us, lb 12.11219 us, ub 16.36222 us, ci 0.950
    -}
    13 changes: 0 additions & 13 deletions med.hs
    Original file line number Diff line number Diff line change
    @@ -1,13 +0,0 @@
    import qualified Data.Vector.Unboxed.Safe as V

    med :: String -> String -> Int
    med s1 = V.last . V.ifoldl' scanS1 costs_i . V.fromList
    where vs1 = V.fromList s1
    costs_i = V.enumFromN 1 $ V.length vs1 -- [0.. length s1]
    scanS1 costs_W costSW_i c2 =
    let v = V.zip vs1 costs_W
    v' = V.postscanl' scanVec (costSW_i, costSW_i + 1) v
    scanVec (costSW, costS) (c1, costW) =
    (costW, min (min costS costW + 1)
    (costSW + if c1 == c2 then 0 else 2))
    in snd $ V.unzip v'
  2. @jberryman jberryman created this gist Mar 21, 2012.
    13 changes: 13 additions & 0 deletions med.hs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,13 @@
    import qualified Data.Vector.Unboxed.Safe as V

    med :: String -> String -> Int
    med s1 = V.last . V.ifoldl' scanS1 costs_i . V.fromList
    where vs1 = V.fromList s1
    costs_i = V.enumFromN 1 $ V.length vs1 -- [0.. length s1]
    scanS1 costs_W costSW_i c2 =
    let v = V.zip vs1 costs_W
    v' = V.postscanl' scanVec (costSW_i, costSW_i + 1) v
    scanVec (costSW, costS) (c1, costW) =
    (costW, min (min costS costW + 1)
    (costSW + if c1 == c2 then 0 else 2))
    in snd $ V.unzip v'