{-# LANGUAGE BangPatterns, MagicHash #-}

-- |
-- Module      : Data.Text.Internal.Fusion
-- Copyright   : (c) Tom Harper 2008-2009,
--               (c) Bryan O'Sullivan 2009-2010,
--               (c) Duncan Coutts 2009
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- /Warning/: this is an internal module, and does not have a stable
-- API or name. Functions in this module may not check or enforce
-- preconditions expected by public modules. Use at your own risk!
--
-- Text manipulation functions represented as fusible operations over
-- streams.
module Data.Text.Internal.Fusion
    (
    -- * Types
      Stream(..)
    , Step(..)

    -- * Creation and elimination
    , stream
    , unstream
    , reverseStream

    , length

    -- * Transformations
    , reverse

    -- * Construction
    -- ** Scans
    , reverseScanr

    -- ** Accumulating maps
    , mapAccumL

    -- ** Generation and unfolding
    , unfoldrN

    -- * Indexing
    , index
    , findIndex
    , countChar
    ) where

import Prelude (Bool(..), Char, Maybe(..), Monad(..), Int,
                Num(..), Ord(..), ($), (&&),
                fromIntegral, otherwise)
import Data.Bits ((.&.))
import Data.Text.Internal (Text(..))
import Data.Text.Internal.Private (runText)
import Data.Text.Internal.Unsafe.Char (ord, unsafeChr, unsafeWrite)
import Data.Text.Internal.Unsafe.Shift (shiftL, shiftR)
import qualified Data.Text.Array as A
import qualified Data.Text.Internal.Fusion.Common as S
import Data.Text.Internal.Fusion.Types
import Data.Text.Internal.Fusion.Size
import qualified Data.Text.Internal as I
import qualified Data.Text.Internal.Encoding.Utf16 as U16

default(Int)

-- | /O(n)/ Convert a 'Text' into a 'Stream Char'.
stream :: Text -> Stream Char
stream :: Text -> Stream Char
stream (Text Array
arr Int
off Int
len) = (Int -> Step Int Char) -> Int -> Size -> Stream Char
forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next Int
off (Int -> Int -> Size
betweenSize (Int
len Int -> Int -> Int
forall a. UnsafeShift a => a -> Int -> a
`shiftR` Int
1) Int
len)
    where
      !end :: Int
end = Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
len
      next :: Int -> Step Int Char
next !Int
i
          | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
end                   = Step Int Char
forall s a. Step s a
Done
          | Word16
n Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xD800 Bool -> Bool -> Bool
&& Word16
n Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word16
0xDBFF = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word16 -> Word16 -> Char
U16.chr2 Word16
n Word16
n2) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
          | Bool
otherwise                  = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word16 -> Char
unsafeChr Word16
n) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
          where
            n :: Word16
n  = Array -> Int -> Word16
A.unsafeIndex Array
arr Int
i
            n2 :: Word16
n2 = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
{-# INLINE [0] stream #-}

-- | /O(n)/ Convert a 'Text' into a 'Stream Char', but iterate
-- backwards.
reverseStream :: Text -> Stream Char
reverseStream :: Text -> Stream Char
reverseStream (Text Array
arr Int
off Int
len) = (Int -> Step Int Char) -> Int -> Size -> Stream Char
forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Int -> Step Int Char
next (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) (Int -> Int -> Size
betweenSize (Int
len Int -> Int -> Int
forall a. UnsafeShift a => a -> Int -> a
`shiftR` Int
1) Int
len)
    where
      {-# INLINE next #-}
      next :: Int -> Step Int Char
next !Int
i
          | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
off                    = Step Int Char
forall s a. Step s a
Done
          | Word16
n Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xDC00 Bool -> Bool -> Bool
&& Word16
n Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word16
0xDFFF = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word16 -> Word16 -> Char
U16.chr2 Word16
n2 Word16
n) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
2)
          | Bool
otherwise                  = Char -> Int -> Step Int Char
forall s a. a -> s -> Step s a
Yield (Word16 -> Char
unsafeChr Word16
n) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
          where
            n :: Word16
n  = Array -> Int -> Word16
A.unsafeIndex Array
arr Int
i
            n2 :: Word16
n2 = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
{-# INLINE [0] reverseStream #-}

-- | /O(n)/ Convert a 'Stream Char' into a 'Text'.
unstream :: Stream Char -> Text
unstream :: Stream Char -> Text
unstream (Stream s -> Step s Char
next0 s
s0 Size
len) = (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
runText ((forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text)
-> (forall s. (MArray s -> Int -> ST s Text) -> ST s Text) -> Text
forall a b. (a -> b) -> a -> b
$ \MArray s -> Int -> ST s Text
done -> do
  -- Before encoding each char we perform a buffer realloc check assuming
  -- worst case encoding size of two 16-bit units for the char. Just add an
  -- extra space to the buffer so that we do not end up reallocating even when
  -- all the chars are encoded as single unit.
  let mlen :: Int
mlen = Int -> Size -> Int
upperBound Int
4 Size
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
  MArray s
arr0 <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
mlen
  let outer :: MArray s -> Int -> s -> Int -> ST s Text
outer !MArray s
arr !Int
maxi = s -> Int -> ST s Text
encode
       where
        -- keep the common case loop as small as possible
        encode :: s -> Int -> ST s Text
encode !s
si !Int
di =
            case s -> Step s Char
next0 s
si of
                Step s Char
Done        -> MArray s -> Int -> ST s Text
done MArray s
arr Int
di
                Skip s
si'    -> s -> Int -> ST s Text
encode s
si' Int
di
                Yield Char
c s
si'
                    -- simply check for the worst case
                    | Int
maxi Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 -> s -> Int -> ST s Text
realloc s
si Int
di
                    | Bool
otherwise -> do
                            Int
n <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
di Char
c
                            s -> Int -> ST s Text
encode s
si' (Int
di Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n)

        -- keep uncommon case separate from the common case code
        {-# NOINLINE realloc #-}
        realloc :: s -> Int -> ST s Text
realloc !s
si !Int
di = do
            let newlen :: Int
newlen = (Int
maxi Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2
            MArray s
arr' <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
newlen
            MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
A.copyM MArray s
arr' Int
0 MArray s
arr Int
0 Int
di
            MArray s -> Int -> s -> Int -> ST s Text
outer MArray s
arr' (Int
newlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) s
si Int
di

  MArray s -> Int -> s -> Int -> ST s Text
outer MArray s
arr0 (Int
mlen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) s
s0 Int
0
{-# INLINE [0] unstream #-}
{-# RULES "STREAM stream/unstream fusion" forall s. stream (unstream s) = s #-}


-- ----------------------------------------------------------------------------
-- * Basic stream functions

length :: Stream Char -> Int
length :: Stream Char -> Int
length = Stream Char -> Int
forall a. Integral a => Stream Char -> a
S.lengthI
{-# INLINE[0] length #-}

-- | /O(n)/ Reverse the characters of a string.
reverse :: Stream Char -> Text
reverse :: Stream Char -> Text
reverse (Stream s -> Step s Char
next s
s Size
len0)
    | Size -> Bool
isEmpty Size
len0 = Text
I.empty
    | Bool
otherwise    = Array -> Int -> Int -> Text
I.text Array
arr Int
off' Int
len'
  where
    len0' :: Int
len0' = Int -> Size -> Int
upperBound Int
4 (Size -> Size -> Size
larger Size
len0 Size
4)
    (Array
arr, (Int
off', Int
len')) = (forall s. ST s (MArray s, (Int, Int))) -> (Array, (Int, Int))
forall a. (forall s. ST s (MArray s, a)) -> (Array, a)
A.run2 (Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
len0' ST s (MArray s)
-> (MArray s -> ST s (MArray s, (Int, Int)))
-> ST s (MArray s, (Int, Int))
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= s -> Int -> Int -> MArray s -> ST s (MArray s, (Int, Int))
forall s.
s -> Int -> Int -> MArray s -> ST s (MArray s, (Int, Int))
loop s
s (Int
len0'Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Int
len0')
    loop :: s -> Int -> Int -> MArray s -> ST s (MArray s, (Int, Int))
loop !s
s0 !Int
i !Int
len MArray s
marr =
        case s -> Step s Char
next s
s0 of
          Step s Char
Done -> (MArray s, (Int, Int)) -> ST s (MArray s, (Int, Int))
forall (m :: * -> *) a. Monad m => a -> m a
return (MArray s
marr, (Int
j, Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
j))
              where j :: Int
j = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
          Skip s
s1    -> s -> Int -> Int -> MArray s -> ST s (MArray s, (Int, Int))
loop s
s1 Int
i Int
len MArray s
marr
          Yield Char
x s
s1 | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
least -> {-# SCC "reverse/resize" #-} do
                       let newLen :: Int
newLen = Int
len Int -> Int -> Int
forall a. UnsafeShift a => a -> Int -> a
`shiftL` Int
1
                       MArray s
marr' <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
newLen
                       MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
A.copyM MArray s
marr' (Int
newLenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
len) MArray s
marr Int
0 Int
len
                       s -> Int -> Int -> MArray s -> ST s (MArray s, (Int, Int))
write s
s1 (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i) Int
newLen MArray s
marr'
                     | Bool
otherwise -> s -> Int -> Int -> MArray s -> ST s (MArray s, (Int, Int))
write s
s1 Int
i Int
len MArray s
marr
            where n :: Int
n = Char -> Int
ord Char
x
                  least :: Int
least | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0x10000 = Int
0
                        | Bool
otherwise   = Int
1
                  m :: Int
m = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
0x10000
                  lo :: Word16
lo = Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word16) -> Int -> Word16
forall a b. (a -> b) -> a -> b
$ (Int
m Int -> Int -> Int
forall a. UnsafeShift a => a -> Int -> a
`shiftR` Int
10) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0xD800
                  hi :: Word16
hi = Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word16) -> Int -> Word16
forall a b. (a -> b) -> a -> b
$ (Int
m Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3FF) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0xDC00
                  write :: s -> Int -> Int -> MArray s -> ST s (MArray s, (Int, Int))
write s
t Int
j Int
l MArray s
mar
                      | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0x10000 = do
                          MArray s -> Int -> Word16 -> ST s ()
forall s. MArray s -> Int -> Word16 -> ST s ()
A.unsafeWrite MArray s
mar Int
j (Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n)
                          s -> Int -> Int -> MArray s -> ST s (MArray s, (Int, Int))
loop s
t (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Int
l MArray s
mar
                      | Bool
otherwise = do
                          MArray s -> Int -> Word16 -> ST s ()
forall s. MArray s -> Int -> Word16 -> ST s ()
A.unsafeWrite MArray s
mar (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) Word16
lo
                          MArray s -> Int -> Word16 -> ST s ()
forall s. MArray s -> Int -> Word16 -> ST s ()
A.unsafeWrite MArray s
mar Int
j Word16
hi
                          s -> Int -> Int -> MArray s -> ST s (MArray s, (Int, Int))
loop s
t (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2) Int
l MArray s
mar
{-# INLINE [0] reverse #-}

-- | /O(n)/ Perform the equivalent of 'scanr' over a list, only with
-- the input and result reversed.
reverseScanr :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
reverseScanr :: (Char -> Char -> Char) -> Char -> Stream Char -> Stream Char
reverseScanr Char -> Char -> Char
f Char
z0 (Stream s -> Step s Char
next0 s
s0 Size
len) = (Scan s -> Step (Scan s) Char) -> Scan s -> Size -> Stream Char
forall a s. (s -> Step s a) -> s -> Size -> Stream a
Stream Scan s -> Step (Scan s) Char
next (Char -> s -> Scan s
forall s. Char -> s -> Scan s
Scan1 Char
z0 s
s0) (Size
lenSize -> Size -> Size
forall a. Num a => a -> a -> a
+Size
1) -- HINT maybe too low
  where
    {-# INLINE next #-}
    next :: Scan s -> Step (Scan s) Char
next (Scan1 Char
z s
s) = Char -> Scan s -> Step (Scan s) Char
forall s a. a -> s -> Step s a
Yield Char
z (Char -> s -> Scan s
forall s. Char -> s -> Scan s
Scan2 Char
z s
s)
    next (Scan2 Char
z s
s) = case s -> Step s Char
next0 s
s of
                         Yield Char
x s
s' -> let !x' :: Char
x' = Char -> Char -> Char
f Char
x Char
z
                                       in Char -> Scan s -> Step (Scan s) Char
forall s a. a -> s -> Step s a
Yield Char
x' (Char -> s -> Scan s
forall s. Char -> s -> Scan s
Scan2 Char
x' s
s')
                         Skip s
s'    -> Scan s -> Step (Scan s) Char
forall s a. s -> Step s a
Skip (Char -> s -> Scan s
forall s. Char -> s -> Scan s
Scan2 Char
z s
s')
                         Step s Char
Done       -> Step (Scan s) Char
forall s a. Step s a
Done
{-# INLINE reverseScanr #-}

-- | /O(n)/ Like 'unfoldr', 'unfoldrN' builds a stream from a seed
-- value. However, the length of the result is limited by the
-- first argument to 'unfoldrN'. This function is more efficient than
-- 'unfoldr' when the length of the result is known.
unfoldrN :: Int -> (a -> Maybe (Char,a)) -> a -> Stream Char
unfoldrN :: Int -> (a -> Maybe (Char, a)) -> a -> Stream Char
unfoldrN Int
n = Int -> (a -> Maybe (Char, a)) -> a -> Stream Char
forall a b.
Integral a =>
a -> (b -> Maybe (Char, b)) -> b -> Stream Char
S.unfoldrNI Int
n
{-# INLINE [0] unfoldrN #-}

-------------------------------------------------------------------------------
-- ** Indexing streams

-- | /O(n)/ stream index (subscript) operator, starting from 0.
index :: Stream Char -> Int -> Char
index :: Stream Char -> Int -> Char
index = Stream Char -> Int -> Char
forall a. Integral a => Stream Char -> a -> Char
S.indexI
{-# INLINE [0] index #-}

-- | The 'findIndex' function takes a predicate and a stream and
-- returns the index of the first element in the stream
-- satisfying the predicate.
findIndex :: (Char -> Bool) -> Stream Char -> Maybe Int
findIndex :: (Char -> Bool) -> Stream Char -> Maybe Int
findIndex = (Char -> Bool) -> Stream Char -> Maybe Int
forall a. Integral a => (Char -> Bool) -> Stream Char -> Maybe a
S.findIndexI
{-# INLINE [0] findIndex #-}

-- | /O(n)/ The 'count' function returns the number of times the query
-- element appears in the given stream.
countChar :: Char -> Stream Char -> Int
countChar :: Char -> Stream Char -> Int
countChar = Char -> Stream Char -> Int
forall a. Integral a => Char -> Stream Char -> a
S.countCharI
{-# INLINE [0] countChar #-}

-- | /O(n)/ Like a combination of 'map' and 'foldl''. Applies a
-- function to each element of a 'Text', passing an accumulating
-- parameter from left to right, and returns a final 'Text'.
mapAccumL :: (a -> Char -> (a,Char)) -> a -> Stream Char -> (a, Text)
mapAccumL :: (a -> Char -> (a, Char)) -> a -> Stream Char -> (a, Text)
mapAccumL a -> Char -> (a, Char)
f a
z0 (Stream s -> Step s Char
next0 s
s0 Size
len) = (a
nz, Array -> Int -> Int -> Text
I.text Array
na Int
0 Int
nl)
  where
    (Array
na,(a
nz,Int
nl)) = (forall s. ST s (MArray s, (a, Int))) -> (Array, (a, Int))
forall a. (forall s. ST s (MArray s, a)) -> (Array, a)
A.run2 (Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
mlen ST s (MArray s)
-> (MArray s -> ST s (MArray s, (a, Int)))
-> ST s (MArray s, (a, Int))
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \MArray s
arr -> MArray s -> Int -> a -> s -> Int -> ST s (MArray s, (a, Int))
forall s.
MArray s -> Int -> a -> s -> Int -> ST s (MArray s, (a, Int))
outer MArray s
arr Int
mlen a
z0 s
s0 Int
0)
      where mlen :: Int
mlen = Int -> Size -> Int
upperBound Int
4 Size
len
    outer :: MArray s -> Int -> a -> s -> Int -> ST s (MArray s, (a, Int))
outer MArray s
arr Int
top = a -> s -> Int -> ST s (MArray s, (a, Int))
loop
      where
        loop :: a -> s -> Int -> ST s (MArray s, (a, Int))
loop !a
z !s
s !Int
i =
            case s -> Step s Char
next0 s
s of
              Step s Char
Done          -> (MArray s, (a, Int)) -> ST s (MArray s, (a, Int))
forall (m :: * -> *) a. Monad m => a -> m a
return (MArray s
arr, (a
z,Int
i))
              Skip s
s'       -> a -> s -> Int -> ST s (MArray s, (a, Int))
loop a
z s
s' Int
i
              Yield Char
x s
s'
                | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
top  -> {-# SCC "mapAccumL/resize" #-} do
                               let top' :: Int
top' = (Int
top Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Int -> Int -> Int
forall a. UnsafeShift a => a -> Int -> a
`shiftL` Int
1
                               MArray s
arr' <- Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
top'
                               MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
forall s. MArray s -> Int -> MArray s -> Int -> Int -> ST s ()
A.copyM MArray s
arr' Int
0 MArray s
arr Int
0 Int
top
                               MArray s -> Int -> a -> s -> Int -> ST s (MArray s, (a, Int))
outer MArray s
arr' Int
top' a
z s
s Int
i
                | Bool
otherwise -> do Int
d <- MArray s -> Int -> Char -> ST s Int
forall s. MArray s -> Int -> Char -> ST s Int
unsafeWrite MArray s
arr Int
i Char
c
                                  a -> s -> Int -> ST s (MArray s, (a, Int))
loop a
z' s
s' (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
d)
                where (a
z',Char
c) = a -> Char -> (a, Char)
f a
z Char
x
                      j :: Int
j | Char -> Int
ord Char
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0x10000 = Int
i
                        | Bool
otherwise       = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1
{-# INLINE [0] mapAccumL #-}