implementation module Data.SetBy import StdClass, StdMisc, StdBool, StdFunc, StdInt import Data.Maybe from Data.GenLexOrd import :: LexOrd (..) import Data.Monoid from Data.Foldable import class Foldable (..) import qualified StdList from StdList import instance == [a] /* * This function should only be used if the argument function preserves the ordering property of * the new set. */ mapSetByMonotonic :: !(a -> b) !(SetBy a) -> SetBy b mapSetByMonotonic _ TipBy = TipBy mapSetByMonotonic f (BinBy n x l r) = BinBy n (f x) (mapSetByMonotonic f l) (mapSetByMonotonic f r) /* * Sets are size balanced trees. * A set of values @a@. */ :: SetBy a = TipBy | BinBy !Int !a !(SetBy a) !(SetBy a) isEqualBy :: !(a a -> Bool) !(SetBy a) !(SetBy a) -> Bool isEqualBy comp s1 s2 = size s1 == size s2 && equalEltsBy comp (toAscList s1) (toAscList s2) where equalEltsBy :: !(a a -> Bool) ![a] ![a] -> Bool equalEltsBy _ [] [] = True equalEltsBy _ [] _ = False equalEltsBy _ [_:_] [] = False equalEltsBy comp [a:as] [b:bs] | comp a b || comp b a = False | otherwise = equalEltsBy comp as bs isOrderedBy :: !(a a -> Bool) !(SetBy a) !(SetBy a) -> Bool isOrderedBy comp s1 s2 = compare comp (toAscList s1) (toAscList s2) where compare :: !(a a -> Bool) ![a] ![a] -> Bool compare _ [] [] = False compare _ [] _ = True compare _ [_:_] [] = False compare comp [a:as] [b:bs] | comp a b = True | comp b a = False | otherwise = compare comp as bs lexOrdBy :: !(a a -> Bool) !(SetBy a) !(SetBy a) -> LexOrd lexOrdBy comp s1 s2 = ordby comp (toAscList s1) (toAscList s2) where ordby :: !(a a -> Bool) ![a] ![a] -> LexOrd ordby _ [] [] = EQ ordby _ [] _ = LT ordby _ [_:_] [] = GT ordby comp [a:as] [b:bs] | comp a b = LT | comp b a = GT | otherwise = ordby comp as bs instance Foldable SetBy where foldr f z (BinBy _ x l r) = foldr f (f x (foldr f z r)) l foldr _ z _ = z foldr f z (BinBy _ x l r) = foldr f (f x (foldr f z r)) l foldr _ z _ = z foldl f z (BinBy _ x l r) = foldl f (f (foldl f z l) x) r foldl _ z _ = z foldl f z (BinBy _ x l r) = foldl f (f (foldl f z l) x) r foldl _ z _ = z /*-------------------------------------------------------------------- * Query *--------------------------------------------------------------------*/ memberBy :: !(a a -> Bool) !a !(SetBy a) -> Bool memberBy comp x (BinBy _ y l r) | comp x y = memberBy comp x l | comp y x = memberBy comp x r | otherwise = True memberBy _ _ _ = False /*-------------------------------------------------------------------- * Construction *--------------------------------------------------------------------*/ newSet :: SetBy a newSet = TipBy singleton :: !u:a -> w:(SetBy u:a), [w <= u] singleton x = BinBy 1 x TipBy TipBy /*-------------------------------------------------------------------- * Insertion, Deletion *--------------------------------------------------------------------*/ insertBy :: !(a a -> Bool) !a !.(SetBy a) -> SetBy a insertBy comp x t=:(BinBy _ y l r) | comp x y = balanceL y (insertBy comp x l) r | comp y x = balanceR y l (insertBy comp x r) | otherwise = t insertBy _ x _ = singleton x deleteBy :: !(a a -> Bool) !a !.(SetBy a) -> SetBy a deleteBy comp x (BinBy _ y l r) | comp x y = balanceR y (deleteBy comp x l) r | comp y x = balanceL y l (deleteBy comp x r) | otherwise = glue l r deleteBy _ _ tip = tip /*-------------------------------------------------------------------- * Subset *--------------------------------------------------------------------*/ isSubsetOfXBy :: !(a a -> Bool) !(SetBy a) !(SetBy a) -> Bool isSubsetOfXBy comp (BinBy _ x l r) t | t =: TipBy = False #! (lt, found, gt) = splitMemberBy comp x t = found && isSubsetOfXBy comp l lt && isSubsetOfXBy comp r gt isSubsetOfXBy _ _ _ = True /*-------------------------------------------------------------------- * Minimal, Maximal *--------------------------------------------------------------------*/ findMin :: !(SetBy a) -> a findMin (BinBy _ x TipBy _) = x findMin (BinBy _ _ l _) = findMin l findMin TipBy = abort "SetBy.findMin: empty set has no minimal element" findMax :: !(SetBy a) -> a findMax (BinBy _ x _ TipBy) = x findMax (BinBy _ _ _ r) = findMax r findMax TipBy = abort "SetBy.findMax: empty set has no maximal element" deleteMin :: !.(SetBy a) -> SetBy a deleteMin (BinBy _ _ TipBy r) = r deleteMin (BinBy _ x l r) = balanceR x (deleteMin l) r deleteMin TipBy = TipBy deleteMax :: !.(SetBy a) -> SetBy a deleteMax (BinBy _ _ l TipBy) = l deleteMax (BinBy _ x l r) = balanceL x l (deleteMax r) deleteMax TipBy = TipBy /*-------------------------------------------------------------------- * Union. *--------------------------------------------------------------------*/ unionBy :: !(a a -> Bool) !u:(SetBy a) !u:(SetBy a) -> SetBy a unionBy _ t1 TipBy = t1 unionBy comp t1 (BinBy _ x TipBy TipBy) = insertBy comp x t1 unionBy comp (BinBy _ x TipBy TipBy) t2 = insertBy comp x t2 unionBy _ TipBy t2 = t2 unionBy comp t1=:(BinBy _ x l1 r1) t2 = link x l1l2 r1r2 where (l2,r2) = splitS comp x t2 l1l2 = unionBy comp l1 l2 r1r2 = unionBy comp r1 r2 splitS :: !(a a -> Bool) !a !(SetBy a) -> (!SetBy a, !SetBy a) splitS _ _ TipBy = (TipBy,TipBy) splitS comp x (BinBy _ y l r) | comp x y = let (lt,gt) = splitS comp x l in (lt, link y gt r) | comp y x = let (lt,gt) = splitS comp x r in (link y l lt, gt) | otherwise = (l,r) /*-------------------------------------------------------------------- * Difference *--------------------------------------------------------------------*/ differenceBy :: !(a a -> Bool) !(SetBy a) !(SetBy a) -> SetBy a differenceBy _ TipBy _ = TipBy differenceBy comp t1 t2 = case t2 of BinBy _ x l2 r2 -> case splitBy comp x t1 of (l1, r1) | size l1l2 + size r1r2 == size t1 -> t1 | otherwise -> merge l1l2 r1r2 where l1l2 = differenceBy comp l1 l2 r1r2 = differenceBy comp r1 r2 _ -> t1 /*-------------------------------------------------------------------- * Intersection *--------------------------------------------------------------------*/ intersectionsBy :: !(a a -> Bool) ![SetBy a] -> SetBy a intersectionsBy _ [t] = t intersectionsBy comp [t:ts] = 'StdList'.foldl (intersectionBy comp) t ts intersectionsBy _ [] = abort "SetBy.intersectionsBy called with []\n" intersectionBy :: !(a a -> Bool) !(SetBy a) !(SetBy a) -> SetBy a intersectionBy _ TipBy _ = TipBy intersectionBy _ _ TipBy = TipBy intersectionBy comp t1 t2 = hedgeInt comp NothingS NothingS t1 t2 hedgeInt :: !(a a -> Bool) !(MaybeS a) !(MaybeS a) !(SetBy a) !(SetBy a) -> SetBy a hedgeInt _ _ _ _ TipBy = TipBy hedgeInt _ _ _ TipBy _ = TipBy hedgeInt comp blo bhi (BinBy _ x l r) t2 #! bmi = JustS x #! l = hedgeInt comp blo bmi l (trimBy comp blo bmi t2) #! r = hedgeInt comp bmi bhi r (trimBy comp bmi bhi t2) = if (memberBy comp x t2) (link x l r) (merge l r) /*-------------------------------------------------------------------- * Filter and partition *--------------------------------------------------------------------*/ filter :: !(a -> Bool) !(SetBy a) -> SetBy a filter p (BinBy _ x l r) | p x = link x (filter p l) (filter p r) | otherwise = merge (filter p l) (filter p r) filter _ tip = tip partition :: !(a -> Bool) !(SetBy a) -> (!SetBy a, !SetBy a) partition p (BinBy _ x l r) #! (l1,l2) = partition p l #! (r1,r2) = partition p r | p x = (link x l1 r1,merge l2 r2) | otherwise = (merge l1 r1,link x l2 r2) partition _ t = (t, t) /*-------------------------------------------------------------------- * Lists *--------------------------------------------------------------------*/ fromListBy :: !(a a -> Bool) ![a] -> SetBy a fromListBy comp xs = 'StdList'.foldl (ins comp) newSet xs where ins :: !(a a -> Bool) !(SetBy a) !a -> SetBy a ins comp t x = insertBy comp x t /*-------------------------------------------------------------------- Utility functions that return sub-ranges of the original tree. Some functions take a comparison function as argument to allow comparisons against infinite values. A function [cmplo x] should be read as [compare lo x]. [trimBy comp cmplo cmphi t] A tree that is either empty or where [cmplo x == LT] and [cmphi x == GT] for the value [x] of the root. [splitBy comp k t] Returns two trees [l] and [r] where all values in [l] are <[k] and all keys in [r] are >[k]. [splitMemberBy comp k t] Just like [splitBy] but also returns whether [k] was found in the tree. --------------------------------------------------------------------*/ :: MaybeS a = NothingS | JustS !a /*-------------------------------------------------------------------- [trimBy comp lo hi t] trims away all subtrees that surely contain no values between the range [lo] to [hi]. The returned tree is either empty or the key of the root is between @lo@ and @hi@. --------------------------------------------------------------------*/ trimBy :: !(a a -> Bool) !(MaybeS a) !(MaybeS a) !(SetBy a) -> SetBy a trimBy _ NothingS NothingS t = t trimBy comp (JustS lx) NothingS t = greater comp lx t where greater comp lo (BinBy _ x _ r) | not (comp lo x) = greater comp lo r greater _ _ t = t trimBy comp NothingS (JustS hx) t = lesser comp hx t where lesser comp hi (BinBy _ x l _) | not (comp x hi) = lesser comp hi l lesser _ _ t = t trimBy comp (JustS lx) (JustS hx) t = middle comp lx hx t where middle comp lo hi (BinBy _ x _ r) | not (comp lo x) = middle comp lo hi r middle comp lo hi (BinBy _ x l _) | not (comp x hi) = middle comp lo hi l middle _ _ _ t = t /*-------------------------------------------------------------------- * Split *--------------------------------------------------------------------*/ splitBy :: !(a a -> Bool) !a !(SetBy a) -> (!SetBy a, !SetBy a) splitBy comp x (BinBy _ y l r) | comp x y #! (lt, gt) = splitBy comp x l = (lt, link y gt r) | comp y x #! (lt,gt) = splitBy comp x r = (link y l lt,gt) | otherwise = (l, r) splitBy _ _ t = (t, t) splitMemberBy :: !(a a -> Bool) !a !(SetBy a) -> (!SetBy a, !Bool, !SetBy a) splitMemberBy comp x (BinBy _ y l r) | comp x y #! (lt, found, gt) = splitMemberBy comp x l = (lt, found, link y gt r) | comp y x #! (lt, found, gt) = splitMemberBy comp x r = (link y l lt, found, gt) | otherwise = (l, True, r) splitMemberBy _ _ t = (t, False, t) /*-------------------------------------------------------------------- Utility functions that maintain the balance properties of the tree. All constructors assume that all values in [l] < [x] and all values in [r] > [x], and that [l] and [r] are valid trees. In order of sophistication: [BinBy sz x l r] The type constructor. [bin x l r] Maintains the correct size, assumes that both [l] and [r] are balanced with respect to each other. [balance x l r] Restores the balance and size. Assumes that the original tree was balanced and that [l] or [r] has changed by at most one element. [join x l r] Restores balance and size. Furthermore, we can construct a new tree from two trees. Both operations assume that all values in [l] < all values in [r] and that [l] and [r] are valid: [glue l r] Glues [l] and [r] together. Assumes that [l] and [r] are already balanced with respect to each other. [merge l r] Merges two trees and restores balance. Note: in contrast to Adam's paper, we use (<=) comparisons instead of (<) comparisons in [join], [merge] and [balance]. Quickcheck (on [difference]) showed that this was necessary in order to maintain the invariants. It is quite unsatisfactory that I haven't been able to find out why this is actually the case! Fortunately, it doesn't hurt to be a bit more conservative. --------------------------------------------------------------------*/ /*-------------------------------------------------------------------- * Join *--------------------------------------------------------------------*/ link :: !a !(SetBy a) !(SetBy a) -> SetBy a link x l=:(BinBy sizeL y ly ry) r=:(BinBy sizeR z lz rz) | delta*sizeL < sizeR = balanceL z (link x l lz) rz | delta*sizeR < sizeL = balanceR y ly (link x ry r) | otherwise = bin x l r link x TipBy r = insertMin x r link x l _ = insertMax x l // insertMin and insertMax don't perform potentially expensive comparisons. insertMax :: !a !(SetBy a) -> SetBy a insertMax x (BinBy _ y l r) = balanceR y l (insertMax x r) insertMax x _ = singleton x insertMin :: !a !(SetBy a) -> SetBy a insertMin x (BinBy _ y l r) = balanceL y (insertMin x l) r insertMin x _ = singleton x /*-------------------------------------------------------------------- * [merge l r]: merges two trees. *--------------------------------------------------------------------*/ merge :: !(SetBy a) !(SetBy a) -> SetBy a merge l=:(BinBy sizeL x lx rx) r=:(BinBy sizeR y ly ry) | delta*sizeL < sizeR = balanceL y (merge l ly) ry | delta*sizeR < sizeL = balanceR x lx (merge rx r) | otherwise = glue l r merge TipBy r = r merge l _ = l /*-------------------------------------------------------------------- * [glue l r]: glues two trees together. * Assumes that [l] and [r] are already balanced with respect to each other. *--------------------------------------------------------------------*/ glue :: !.(SetBy a) !.(SetBy a) -> SetBy a glue TipBy r = r glue l TipBy = l glue l r | size l > size r #! (m, l) = deleteFindMax l = balanceR m l r | otherwise #! (m, r) = deleteFindMin r = balanceL m l r deleteFindMin :: !.(SetBy a) -> (!a, !SetBy a) deleteFindMin (BinBy _ x TipBy r) = (x, r) deleteFindMin (BinBy _ x l r) #! (xm, l) = deleteFindMin l = (xm, balanceR x l r) deleteFindMin TipBy = (abort "SetBy.deleteFindMin: can not return the minimal element of an empty set", TipBy) deleteFindMax :: !.(SetBy a) -> (!a, !SetBy a) deleteFindMax (BinBy _ x l TipBy ) = (x, l) deleteFindMax (BinBy _ x l r) #! (xm, r) = deleteFindMax r = (xm, balanceL x l r) deleteFindMax TipBy = (abort "SetBy.deleteFindMax: can not return the maximal element of an empty set", TipBy) minView :: !.(SetBy a) -> .(Maybe (!a, !SetBy a)) minView TipBy = Nothing minView x = Just (deleteFindMin x) maxView :: !.(SetBy a) -> .(Maybe (!a, !SetBy a)) maxView TipBy = Nothing maxView x = Just (deleteFindMax x) /*-------------------------------------------------------------------- [balance x l r] balances two trees with value x. The sizes of the trees should balance after decreasing the size of one of them. (a rotation). [delta] is the maximal relative difference between the sizes of two trees, it corresponds with the [w] in Adams' paper, or equivalently, [1/delta] corresponds with the $\alpha$ in Nievergelt's paper. Adams shows that [delta] should be larger than 3.745 in order to garantee that the rotations can always restore balance. [ratio] is the ratio between an outer and inner sibling of the heavier subtree in an unbalanced setting. It determines whether a double or single rotation should be performed to restore balance. It is correspondes with the inverse of $\alpha$ in Adam's article. Note that: - [delta] should be larger than 4.646 with a [ratio] of 2. - [delta] should be larger than 3.745 with a [ratio] of 1.534. - A lower [delta] leads to a more 'perfectly' balanced tree. - A higher [delta] performs less rebalancing. - Balancing is automatic for random data and a balancing scheme is only necessary to avoid pathological worst cases. Almost any choice will do in practice - Allthough it seems that a rather large [delta] may perform better than smaller one, measurements have shown that the smallest [delta] of 4 is actually the fastest on a wide range of operations. It especially improves performance on worst-case scenarios like a sequence of ordered insertions. Note: in contrast to Adams' paper, we use a ratio of (at least) 2 to decide whether a single or double rotation is needed. Allthough he actually proves that this ratio is needed to maintain the invariants, his implementation uses a (invalid) ratio of 1. He is aware of the problem though since he has put a comment in his original source code that he doesn't care about generating a slightly inbalanced tree since it doesn't seem to matter in practice. However (since we use quickcheck :-) we will stick to strictly balanced trees. --------------------------------------------------------------------*/ delta :== 4 ratio :== 2 // Functions balanceL and balanceR are specialised versions of balance. // balanceL only checks whether the left subtree is too big, // balanceR only checks whether the right subtree is too big. // balanceL is called when left subtree might have been inserted to or when // right subtree might have been deleted from. balanceL :: !a !(SetBy a) !(SetBy a) -> SetBy a balanceL x l r = case r of BinBy rs _ _ _ -> case l of BinBy ls lx ll lr | ls > delta*rs # (BinBy lls _ _ _ ) = ll # (BinBy lrs lrx lrl lrr) = lr | lrs < ratio*lls -> BinBy (1+ls+rs) lx ll (BinBy (1+rs+lrs) x lr r) | otherwise -> BinBy (1+ls+rs) lrx (BinBy (1+lls+size lrl) lx ll lrl) (BinBy (1+rs+size lrr) x lrr r) | otherwise -> BinBy (1+ls+rs) x l r _ -> BinBy (1+rs) x TipBy r _ -> case l of BinBy ls lx ll=:(BinBy lls _ _ _) lr=:(BinBy lrs lrx lrl lrr) | lrs < ratio*lls -> BinBy (1+ls) lx ll (BinBy (1+lrs) x lr TipBy) | otherwise -> BinBy (1+ls) lrx (BinBy (1+lls+size lrl) lx ll lrl) (BinBy (1+size lrr) x lrr TipBy) BinBy _ lx TipBy (BinBy _ lrx _ _) -> BinBy 3 lrx (BinBy 1 lx TipBy TipBy) (BinBy 1 x TipBy TipBy) BinBy _ lx ll=:(BinBy _ _ _ _) TipBy -> BinBy 3 lx ll (BinBy 1 x TipBy TipBy) BinBy _ _ _ _ -> BinBy 2 x l TipBy _ -> BinBy 1 x TipBy TipBy // balanceR is called when right subtree might have been inserted to or when // left subtree might have been deleted from. balanceR :: !a !(SetBy a) !(SetBy a) -> SetBy a balanceR x l r = case l of BinBy ls _ _ _ -> case r of BinBy rs rx rl rr | rs > delta*ls # (BinBy rls rlx rll rlr) = rl # (BinBy rrs _ _ _ ) = rr | rls < ratio*rrs -> BinBy (1+ls+rs) rx (BinBy (1+ls+rls) x l rl) rr | otherwise -> BinBy (1+ls+rs) rlx (BinBy (1+ls+size rll) x l rll) (BinBy (1+rrs+size rlr) rx rlr rr) | otherwise -> BinBy (1+ls+rs) x l r _ -> BinBy (1+ls) x l TipBy _ -> case r of BinBy rs rx rl=:(BinBy rls rlx rll rlr) rr=:(BinBy rrs _ _ _) | rls < ratio*rrs -> BinBy (1+rs) rx (BinBy (1+rls) x TipBy rl) rr | otherwise -> BinBy (1+rs) rlx (BinBy (1+size rll) x TipBy rll) (BinBy (1+rrs+size rlr) rx rlr rr) BinBy _ rx TipBy rr=:(BinBy _ _ _ _) -> BinBy 3 rx (BinBy 1 x TipBy TipBy) rr BinBy _ rx (BinBy _ rlx _ _) TipBy -> BinBy 3 rlx (BinBy 1 x TipBy TipBy) (BinBy 1 rx TipBy TipBy) BinBy _ _ _ _ -> BinBy 2 x TipBy r _ -> BinBy 1 x TipBy TipBy // rotate rotateL :: !a !(SetBy a) !(SetBy a) -> SetBy a rotateL x l r=:(BinBy _ _ ly ry) | size ly < ratio*size ry = singleL x l r | otherwise = doubleL x l r rotateL _ _ TipBy = abort "rotateL TipBy" rotateR :: !a !(SetBy a) !(SetBy a) -> SetBy a rotateR x l=:(BinBy _ _ ly ry) r | size ry < ratio*size ly = singleR x l r | otherwise = doubleR x l r rotateR _ TipBy _ = abort "rotateL TipBy" // basic rotations singleL :: !a !(SetBy a) !(SetBy a) -> SetBy a singleL x1 t1 (BinBy _ x2 t2 t3) = bin x2 (bin x1 t1 t2) t3 singleL _ _ TipBy = abort "singleL" singleR :: !a !(SetBy a) !(SetBy a) -> SetBy a singleR x1 (BinBy _ x2 t1 t2) t3 = bin x2 t1 (bin x1 t2 t3) singleR _ TipBy _ = abort "singleR" doubleL :: !a !(SetBy a) !(SetBy a) -> SetBy a doubleL x1 t1 (BinBy _ x2 (BinBy _ x3 t2 t3) t4) = bin x3 (bin x1 t1 t2) (bin x2 t3 t4) doubleL _ _ _ = abort "doubleL" doubleR :: !a !(SetBy a) !(SetBy a) -> SetBy a doubleR x1 (BinBy _ x2 t1 (BinBy _ x3 t2 t3)) t4 = bin x3 (bin x2 t1 t2) (bin x1 t3 t4) doubleR _ _ _ = abort "doubleR" /*-------------------------------------------------------------------- * The bin constructor maintains the size of the tree *--------------------------------------------------------------------*/ //bin :: !a !(SetBy a) !(SetBy a) -> SetBy a bin x l r :== BinBy (size l + size r + 1) x l r