{-# LANGUAGE CPP #-}
{-# LANGUAGE BangPatterns #-}
#ifdef __GLASGOW_HASKELL__
{-# LANGUAGE Trustworthy #-}
#endif

-----------------------------------------------------------------------------
-- |
-- Module      :  Data.Containers.ListUtils
-- Copyright   :  (c) Gershom Bazerman 2018
-- License     :  BSD-style
-- Maintainer  :  libraries@haskell.org
-- Portability :  portable
--
-- This module provides efficient containers-based functions on the list type.
--
-- In the documentation, \(n\) is the number of elements in the list while
-- \(d\) is the number of distinct elements in the list. \(W\) is the number
-- of bits in an 'Int'.
-----------------------------------------------------------------------------

module Data.Containers.ListUtils (
       nubOrd,
       nubOrdOn,
       nubInt,
       nubIntOn
       ) where

import Data.Set (Set)
import qualified Data.Set as Set
import qualified Data.IntSet as IntSet
import Data.IntSet (IntSet)
#ifdef __GLASGOW_HASKELL__
import GHC.Exts ( build )
#endif

-- *** Ord-based nubbing ***


-- | \( O(n \log d) \). The @nubOrd@ function removes duplicate elements from a
-- list. In particular, it keeps only the first occurrence of each element. By
-- using a 'Set' internally it has better asymptotics than the standard
-- 'Data.List.nub' function.
--
-- ==== Strictness
--
-- @nubOrd@ is strict in the elements of the list.
--
-- ==== Efficiency note
--
-- When applicable, it is almost always better to use 'nubInt' or 'nubIntOn'
-- instead of this function, although it can be a little worse in certain
-- pathological cases. For example, to nub a list of characters, use
--
-- @ nubIntOn fromEnum xs @
nubOrd :: Ord a => [a] -> [a]
nubOrd :: [a] -> [a]
nubOrd = (a -> a) -> [a] -> [a]
forall b a. Ord b => (a -> b) -> [a] -> [a]
nubOrdOn a -> a
forall a. a -> a
id
{-# INLINE nubOrd #-}

-- | The @nubOrdOn@ function behaves just like 'nubOrd' except it performs
-- comparisons not on the original datatype, but a user-specified projection
-- from that datatype.
--
-- ==== Strictness
--
-- @nubOrdOn@ is strict in the values of the function applied to the
-- elements of the list.
nubOrdOn :: Ord b => (a -> b) -> [a] -> [a]
-- For some reason we need to write an explicit lambda here to allow this
-- to inline when only applied to a function.
nubOrdOn :: (a -> b) -> [a] -> [a]
nubOrdOn a -> b
f = \[a]
xs -> (a -> b) -> Set b -> [a] -> [a]
forall b a. Ord b => (a -> b) -> Set b -> [a] -> [a]
nubOrdOnExcluding a -> b
f Set b
forall a. Set a
Set.empty [a]
xs
{-# INLINE nubOrdOn #-}

-- Splitting nubOrdOn like this means that we don't have to worry about
-- matching specifically on Set.empty in the rewrite-back rule.
nubOrdOnExcluding :: Ord b => (a -> b) -> Set b -> [a] -> [a]
nubOrdOnExcluding :: (a -> b) -> Set b -> [a] -> [a]
nubOrdOnExcluding a -> b
f = Set b -> [a] -> [a]
go
  where
    go :: Set b -> [a] -> [a]
go Set b
_ [] = []
    go Set b
s (a
x:[a]
xs)
      | b
fx b -> Set b -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set b
s = Set b -> [a] -> [a]
go Set b
s [a]
xs
      | Bool
otherwise = a
x a -> [a] -> [a]
forall a. a -> [a] -> [a]
: Set b -> [a] -> [a]
go (b -> Set b -> Set b
forall a. Ord a => a -> Set a -> Set a
Set.insert b
fx Set b
s) [a]
xs
      where !fx :: b
fx = a -> b
f a
x

#ifdef __GLASGOW_HASKELL__
-- We want this inlinable to specialize to the necessary Ord instance.
{-# INLINABLE [1] nubOrdOnExcluding #-}

{-# RULES
-- Rewrite to a fusible form.
"nubOrdOn" [~1] forall f as s. nubOrdOnExcluding  f s as =
  build (\c n -> foldr (nubOrdOnFB f c) (constNubOn n) as s)

-- Rewrite back to a plain form
"nubOrdOnList" [1] forall f as s.
    foldr (nubOrdOnFB f (:)) (constNubOn []) as s =
       nubOrdOnExcluding f s as
 #-}

nubOrdOnFB :: Ord b
           => (a -> b)
           -> (a -> r -> r)
           -> a
           -> (Set b -> r)
           -> Set b
           -> r
nubOrdOnFB :: (a -> b) -> (a -> r -> r) -> a -> (Set b -> r) -> Set b -> r
nubOrdOnFB a -> b
f a -> r -> r
c a
x Set b -> r
r Set b
s
  | b
fx b -> Set b -> Bool
forall a. Ord a => a -> Set a -> Bool
`Set.member` Set b
s = Set b -> r
r Set b
s
  | Bool
otherwise = a
x a -> r -> r
`c` Set b -> r
r (b -> Set b -> Set b
forall a. Ord a => a -> Set a -> Set a
Set.insert b
fx Set b
s)
  where !fx :: b
fx = a -> b
f a
x
{-# INLINABLE [0] nubOrdOnFB #-}

constNubOn :: a -> b -> a
constNubOn :: a -> b -> a
constNubOn a
x b
_ = a
x
{-# INLINE [0] constNubOn #-}
#endif


-- *** Int-based nubbing ***


-- | \( O(n \min(d,W)) \). The @nubInt@ function removes duplicate 'Int'
-- values from a list. In particular, it keeps only the first occurrence
-- of each element. By using an 'IntSet' internally, it attains better
-- asymptotics than the standard 'Data.List.nub' function.
--
-- See also 'nubIntOn', a more widely applicable generalization.
--
-- ==== Strictness
--
-- @nubInt@ is strict in the elements of the list.
nubInt :: [Int] -> [Int]
nubInt :: [Int] -> [Int]
nubInt = (Int -> Int) -> [Int] -> [Int]
forall a. (a -> Int) -> [a] -> [a]
nubIntOn Int -> Int
forall a. a -> a
id
{-# INLINE nubInt #-}

-- | The @nubIntOn@ function behaves just like 'nubInt' except it performs
-- comparisons not on the original datatype, but a user-specified projection
-- from that datatype. For example, @nubIntOn 'fromEnum'@ can be used to
-- nub characters and typical fixed-with numerical types efficiently.
--
-- ==== Strictness
--
-- @nubIntOn@ is strict in the values of the function applied to the
-- elements of the list.
nubIntOn :: (a -> Int) -> [a] -> [a]
-- For some reason we need to write an explicit lambda here to allow this
-- to inline when only applied to a function.
nubIntOn :: (a -> Int) -> [a] -> [a]
nubIntOn a -> Int
f = \[a]
xs -> (a -> Int) -> IntSet -> [a] -> [a]
forall a. (a -> Int) -> IntSet -> [a] -> [a]
nubIntOnExcluding a -> Int
f IntSet
IntSet.empty [a]
xs
{-# INLINE nubIntOn #-}

-- Splitting nubIntOn like this means that we don't have to worry about
-- matching specifically on IntSet.empty in the rewrite-back rule.
nubIntOnExcluding :: (a -> Int) -> IntSet -> [a] -> [a]
nubIntOnExcluding :: (a -> Int) -> IntSet -> [a] -> [a]
nubIntOnExcluding a -> Int
f = IntSet -> [a] -> [a]
go
  where
    go :: IntSet -> [a] -> [a]
go IntSet
_ [] = []
    go IntSet
s (a
x:[a]
xs)
      | Int
fx Int -> IntSet -> Bool
`IntSet.member` IntSet
s = IntSet -> [a] -> [a]
go IntSet
s [a]
xs
      | Bool
otherwise = a
x a -> [a] -> [a]
forall a. a -> [a] -> [a]
: IntSet -> [a] -> [a]
go (Int -> IntSet -> IntSet
IntSet.insert Int
fx IntSet
s) [a]
xs
      where !fx :: Int
fx = a -> Int
f a
x

#ifdef __GLASGOW_HASKELL__
-- We don't mark this INLINABLE because it doesn't seem obviously useful
-- to inline it anywhere; the elements the function operates on are actually
-- pulled from a list and installed in a list; the situation is very different
-- when fusion occurs. In this case, we let GHC make the call.
{-# NOINLINE [1] nubIntOnExcluding #-}

{-# RULES
"nubIntOn" [~1] forall f as s. nubIntOnExcluding  f s as =
  build (\c n -> foldr (nubIntOnFB f c) (constNubOn n) as s)
"nubIntOnList" [1] forall f as s. foldr (nubIntOnFB f (:)) (constNubOn []) as s =
  nubIntOnExcluding f s as
 #-}

nubIntOnFB :: (a -> Int)
           -> (a -> r -> r)
           -> a
           -> (IntSet -> r)
           -> IntSet
           -> r
nubIntOnFB :: (a -> Int) -> (a -> r -> r) -> a -> (IntSet -> r) -> IntSet -> r
nubIntOnFB a -> Int
f a -> r -> r
c a
x IntSet -> r
r IntSet
s
  | Int
fx Int -> IntSet -> Bool
`IntSet.member` IntSet
s = IntSet -> r
r IntSet
s
  | Bool
otherwise = a
x a -> r -> r
`c` IntSet -> r
r (Int -> IntSet -> IntSet
IntSet.insert Int
fx IntSet
s)
  where !fx :: Int
fx = a -> Int
f a
x
{-# INLINABLE [0] nubIntOnFB #-}
#endif