{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving #-}
-- |
-- Module      : Data.Text.Foreign
-- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Portability : GHC
--
-- Support for using 'Text' data with native code via the Haskell
-- foreign function interface.

module Data.Text.Foreign
    (
    -- * Interoperability with native code
    -- $interop
      I16
    -- * Safe conversion functions
    , fromPtr
    , useAsPtr
    , asForeignPtr
    -- ** Encoding as UTF-8
    , peekCStringLen
    , withCStringLen
    -- * Unsafe conversion code
    , lengthWord16
    , unsafeCopyToPtr
    -- * Low-level manipulation
    -- $lowlevel
    , dropWord16
    , takeWord16
    ) where

#if defined(ASSERTS)
import Control.Exception (assert)
#endif
#if __GLASGOW_HASKELL__ >= 702
import Control.Monad.ST.Unsafe (unsafeIOToST)
#else
import Control.Monad.ST (unsafeIOToST)
#endif
import Data.ByteString.Unsafe (unsafePackCStringLen, unsafeUseAsCStringLen)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Text.Internal (Text(..), empty)
import Data.Text.Unsafe (lengthWord16)
import Data.Word (Word16)
import Foreign.C.String (CStringLen)
import Foreign.ForeignPtr (ForeignPtr, mallocForeignPtrArray, withForeignPtr)
import Foreign.Marshal.Alloc (allocaBytes)
import Foreign.Ptr (Ptr, castPtr, plusPtr)
import Foreign.Storable (peek, poke)
import qualified Data.Text.Array as A

-- $interop
--
-- The 'Text' type is implemented using arrays that are not guaranteed
-- to have a fixed address in the Haskell heap. All communication with
-- native code must thus occur by copying data back and forth.
--
-- The 'Text' type's internal representation is UTF-16, using the
-- platform's native endianness.  This makes copied data suitable for
-- use with native libraries that use a similar representation, such
-- as ICU.  To interoperate with native libraries that use different
-- internal representations, such as UTF-8 or UTF-32, consider using
-- the functions in the 'Data.Text.Encoding' module.

-- | A type representing a number of UTF-16 code units.
newtype I16 = I16 Int
    deriving (I16
I16 -> I16 -> Bounded I16
forall a. a -> a -> Bounded a
maxBound :: I16
$cmaxBound :: I16
minBound :: I16
$cminBound :: I16
Bounded, Int -> I16
I16 -> Int
I16 -> [I16]
I16 -> I16
I16 -> I16 -> [I16]
I16 -> I16 -> I16 -> [I16]
(I16 -> I16)
-> (I16 -> I16)
-> (Int -> I16)
-> (I16 -> Int)
-> (I16 -> [I16])
-> (I16 -> I16 -> [I16])
-> (I16 -> I16 -> [I16])
-> (I16 -> I16 -> I16 -> [I16])
-> Enum I16
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: I16 -> I16 -> I16 -> [I16]
$cenumFromThenTo :: I16 -> I16 -> I16 -> [I16]
enumFromTo :: I16 -> I16 -> [I16]
$cenumFromTo :: I16 -> I16 -> [I16]
enumFromThen :: I16 -> I16 -> [I16]
$cenumFromThen :: I16 -> I16 -> [I16]
enumFrom :: I16 -> [I16]
$cenumFrom :: I16 -> [I16]
fromEnum :: I16 -> Int
$cfromEnum :: I16 -> Int
toEnum :: Int -> I16
$ctoEnum :: Int -> I16
pred :: I16 -> I16
$cpred :: I16 -> I16
succ :: I16 -> I16
$csucc :: I16 -> I16
Enum, I16 -> I16 -> Bool
(I16 -> I16 -> Bool) -> (I16 -> I16 -> Bool) -> Eq I16
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: I16 -> I16 -> Bool
$c/= :: I16 -> I16 -> Bool
== :: I16 -> I16 -> Bool
$c== :: I16 -> I16 -> Bool
Eq, Enum I16
Real I16
Real I16
-> Enum I16
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> (I16, I16))
-> (I16 -> I16 -> (I16, I16))
-> (I16 -> Integer)
-> Integral I16
I16 -> Integer
I16 -> I16 -> (I16, I16)
I16 -> I16 -> I16
forall a.
Real a
-> Enum a
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> (a, a))
-> (a -> a -> (a, a))
-> (a -> Integer)
-> Integral a
toInteger :: I16 -> Integer
$ctoInteger :: I16 -> Integer
divMod :: I16 -> I16 -> (I16, I16)
$cdivMod :: I16 -> I16 -> (I16, I16)
quotRem :: I16 -> I16 -> (I16, I16)
$cquotRem :: I16 -> I16 -> (I16, I16)
mod :: I16 -> I16 -> I16
$cmod :: I16 -> I16 -> I16
div :: I16 -> I16 -> I16
$cdiv :: I16 -> I16 -> I16
rem :: I16 -> I16 -> I16
$crem :: I16 -> I16 -> I16
quot :: I16 -> I16 -> I16
$cquot :: I16 -> I16 -> I16
$cp2Integral :: Enum I16
$cp1Integral :: Real I16
Integral, Integer -> I16
I16 -> I16
I16 -> I16 -> I16
(I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> (I16 -> I16)
-> (I16 -> I16)
-> (I16 -> I16)
-> (Integer -> I16)
-> Num I16
forall a.
(a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a)
-> (a -> a)
-> (a -> a)
-> (Integer -> a)
-> Num a
fromInteger :: Integer -> I16
$cfromInteger :: Integer -> I16
signum :: I16 -> I16
$csignum :: I16 -> I16
abs :: I16 -> I16
$cabs :: I16 -> I16
negate :: I16 -> I16
$cnegate :: I16 -> I16
* :: I16 -> I16 -> I16
$c* :: I16 -> I16 -> I16
- :: I16 -> I16 -> I16
$c- :: I16 -> I16 -> I16
+ :: I16 -> I16 -> I16
$c+ :: I16 -> I16 -> I16
Num, Eq I16
Eq I16
-> (I16 -> I16 -> Ordering)
-> (I16 -> I16 -> Bool)
-> (I16 -> I16 -> Bool)
-> (I16 -> I16 -> Bool)
-> (I16 -> I16 -> Bool)
-> (I16 -> I16 -> I16)
-> (I16 -> I16 -> I16)
-> Ord I16
I16 -> I16 -> Bool
I16 -> I16 -> Ordering
I16 -> I16 -> I16
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: I16 -> I16 -> I16
$cmin :: I16 -> I16 -> I16
max :: I16 -> I16 -> I16
$cmax :: I16 -> I16 -> I16
>= :: I16 -> I16 -> Bool
$c>= :: I16 -> I16 -> Bool
> :: I16 -> I16 -> Bool
$c> :: I16 -> I16 -> Bool
<= :: I16 -> I16 -> Bool
$c<= :: I16 -> I16 -> Bool
< :: I16 -> I16 -> Bool
$c< :: I16 -> I16 -> Bool
compare :: I16 -> I16 -> Ordering
$ccompare :: I16 -> I16 -> Ordering
$cp1Ord :: Eq I16
Ord, ReadPrec [I16]
ReadPrec I16
Int -> ReadS I16
ReadS [I16]
(Int -> ReadS I16)
-> ReadS [I16] -> ReadPrec I16 -> ReadPrec [I16] -> Read I16
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [I16]
$creadListPrec :: ReadPrec [I16]
readPrec :: ReadPrec I16
$creadPrec :: ReadPrec I16
readList :: ReadS [I16]
$creadList :: ReadS [I16]
readsPrec :: Int -> ReadS I16
$creadsPrec :: Int -> ReadS I16
Read, Num I16
Ord I16
Num I16 -> Ord I16 -> (I16 -> Rational) -> Real I16
I16 -> Rational
forall a. Num a -> Ord a -> (a -> Rational) -> Real a
toRational :: I16 -> Rational
$ctoRational :: I16 -> Rational
$cp2Real :: Ord I16
$cp1Real :: Num I16
Real, Int -> I16 -> ShowS
[I16] -> ShowS
I16 -> String
(Int -> I16 -> ShowS)
-> (I16 -> String) -> ([I16] -> ShowS) -> Show I16
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [I16] -> ShowS
$cshowList :: [I16] -> ShowS
show :: I16 -> String
$cshow :: I16 -> String
showsPrec :: Int -> I16 -> ShowS
$cshowsPrec :: Int -> I16 -> ShowS
Show)

-- | /O(n)/ Create a new 'Text' from a 'Ptr' 'Word16' by copying the
-- contents of the array.
fromPtr :: Ptr Word16           -- ^ source array
        -> I16                  -- ^ length of source array (in 'Word16' units)
        -> IO Text
fromPtr :: Ptr Word16 -> I16 -> IO Text
fromPtr Ptr Word16
_   (I16 Int
0)   = Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
empty
fromPtr Ptr Word16
ptr (I16 Int
len) =
#if defined(ASSERTS)
    assert (len > 0) $
#endif
    Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> IO Text) -> Text -> IO Text
forall a b. (a -> b) -> a -> b
$! Array -> Int -> Int -> Text
Text Array
arr Int
0 Int
len
  where
    arr :: Array
arr = (forall s. ST s (MArray s)) -> Array
A.run (Int -> ST s (MArray s)
forall s. Int -> ST s (MArray s)
A.new Int
len ST s (MArray s) -> (MArray s -> ST s (MArray s)) -> ST s (MArray s)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= MArray s -> ST s (MArray s)
forall s. MArray s -> ST s (MArray s)
copy)
    copy :: MArray s -> ST s (MArray s)
copy MArray s
marr = Ptr Word16 -> Int -> ST s (MArray s)
loop Ptr Word16
ptr Int
0
      where
        loop :: Ptr Word16 -> Int -> ST s (MArray s)
loop !Ptr Word16
p !Int
i | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len = MArray s -> ST s (MArray s)
forall (m :: * -> *) a. Monad m => a -> m a
return MArray s
marr
                   | Bool
otherwise = do
          MArray s -> Int -> Word16 -> ST s ()
forall s. MArray s -> Int -> Word16 -> ST s ()
A.unsafeWrite MArray s
marr Int
i (Word16 -> ST s ()) -> ST s Word16 -> ST s ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< IO Word16 -> ST s Word16
forall a s. IO a -> ST s a
unsafeIOToST (Ptr Word16 -> IO Word16
forall a. Storable a => Ptr a -> IO a
peek Ptr Word16
p)
          Ptr Word16 -> Int -> ST s (MArray s)
loop (Ptr Word16
p Ptr Word16 -> Int -> Ptr Word16
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
2) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

-- $lowlevel
--
-- Foreign functions that use UTF-16 internally may return indices in
-- units of 'Word16' instead of characters.  These functions may
-- safely be used with such indices, as they will adjust offsets if
-- necessary to preserve the validity of a Unicode string.

-- | /O(1)/ Return the prefix of the 'Text' of @n@ 'Word16' units in
-- length.
--
-- If @n@ would cause the 'Text' to end inside a surrogate pair, the
-- end of the prefix will be advanced by one additional 'Word16' unit
-- to maintain its validity.
takeWord16 :: I16 -> Text -> Text
takeWord16 :: I16 -> Text -> Text
takeWord16 (I16 Int
n) t :: Text
t@(Text Array
arr Int
off Int
len)
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0               = Text
empty
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len Bool -> Bool -> Bool
|| Int
m Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len = Text
t
    | Bool
otherwise            = Array -> Int -> Int -> Text
Text Array
arr Int
off Int
m
  where
    m :: Int
m | Word16
w Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
w Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
0xDBFF = Int
n
      | Bool
otherwise                = Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
    w :: Word16
w = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)

-- | /O(1)/ Return the suffix of the 'Text', with @n@ 'Word16' units
-- dropped from its beginning.
--
-- If @n@ would cause the 'Text' to begin inside a surrogate pair, the
-- beginning of the suffix will be advanced by one additional 'Word16'
-- unit to maintain its validity.
dropWord16 :: I16 -> Text -> Text
dropWord16 :: I16 -> Text -> Text
dropWord16 (I16 Int
n) t :: Text
t@(Text Array
arr Int
off Int
len)
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0               = Text
t
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len Bool -> Bool -> Bool
|| Int
m Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len = Text
empty
    | Bool
otherwise            = Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
m) (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
m)
  where
    m :: Int
m | Word16
w Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
w Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
0xDBFF = Int
n
      | Bool
otherwise                = Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1
    w :: Word16
w = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)

-- | /O(n)/ Copy a 'Text' to an array.  The array is assumed to be big
-- enough to hold the contents of the entire 'Text'.
unsafeCopyToPtr :: Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr :: Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr (Text Array
arr Int
off Int
len) Ptr Word16
ptr = Ptr Word16 -> Int -> IO ()
loop Ptr Word16
ptr Int
off
  where
    end :: Int
end = Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
len
    loop :: Ptr Word16 -> Int -> IO ()
loop !Ptr Word16
p !Int
i | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
end  = () -> IO ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
               | Bool
otherwise = do
      Ptr Word16 -> Word16 -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr Word16
p (Array -> Int -> Word16
A.unsafeIndex Array
arr Int
i)
      Ptr Word16 -> Int -> IO ()
loop (Ptr Word16
p Ptr Word16 -> Int -> Ptr Word16
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
2) (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)

-- | /O(n)/ Perform an action on a temporary, mutable copy of a
-- 'Text'.  The copy is freed as soon as the action returns.
useAsPtr :: Text -> (Ptr Word16 -> I16 -> IO a) -> IO a
useAsPtr :: Text -> (Ptr Word16 -> I16 -> IO a) -> IO a
useAsPtr t :: Text
t@(Text Array
_arr Int
_off Int
len) Ptr Word16 -> I16 -> IO a
action =
    Int -> (Ptr Word16 -> IO a) -> IO a
forall a b. Int -> (Ptr a -> IO b) -> IO b
allocaBytes (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2) ((Ptr Word16 -> IO a) -> IO a) -> (Ptr Word16 -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \Ptr Word16
buf -> do
      Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr Text
t Ptr Word16
buf
      Ptr Word16 -> I16 -> IO a
action (Ptr Word16 -> Ptr Word16
forall a b. Ptr a -> Ptr b
castPtr Ptr Word16
buf) (Int -> I16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)

-- | /O(n)/ Make a mutable copy of a 'Text'.
asForeignPtr :: Text -> IO (ForeignPtr Word16, I16)
asForeignPtr :: Text -> IO (ForeignPtr Word16, I16)
asForeignPtr t :: Text
t@(Text Array
_arr Int
_off Int
len) = do
  ForeignPtr Word16
fp <- Int -> IO (ForeignPtr Word16)
forall a. Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArray Int
len
  ForeignPtr Word16 -> (Ptr Word16 -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word16
fp ((Ptr Word16 -> IO ()) -> IO ()) -> (Ptr Word16 -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr Text
t
  (ForeignPtr Word16, I16) -> IO (ForeignPtr Word16, I16)
forall (m :: * -> *) a. Monad m => a -> m a
return (ForeignPtr Word16
fp, Int -> I16
I16 Int
len)

-- | /O(n)/ Decode a C string with explicit length, which is assumed
-- to have been encoded as UTF-8. If decoding fails, a
-- 'UnicodeException' is thrown.
--
-- @since 1.0.0.0
peekCStringLen :: CStringLen -> IO Text
peekCStringLen :: CStringLen -> IO Text
peekCStringLen CStringLen
cs = do
  ByteString
bs <- CStringLen -> IO ByteString
unsafePackCStringLen CStringLen
cs
  Text -> IO Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> IO Text) -> Text -> IO Text
forall a b. (a -> b) -> a -> b
$! ByteString -> Text
decodeUtf8 ByteString
bs

-- | Marshal a 'Text' into a C string encoded as UTF-8 in temporary
-- storage, with explicit length information. The encoded string may
-- contain NUL bytes, and is not followed by a trailing NUL byte.
--
-- The temporary storage is freed when the subcomputation terminates
-- (either normally or via an exception), so the pointer to the
-- temporary storage must /not/ be used after this function returns.
--
-- @since 1.0.0.0
withCStringLen :: Text -> (CStringLen -> IO a) -> IO a
withCStringLen :: Text -> (CStringLen -> IO a) -> IO a
withCStringLen Text
t CStringLen -> IO a
act = ByteString -> (CStringLen -> IO a) -> IO a
forall a. ByteString -> (CStringLen -> IO a) -> IO a
unsafeUseAsCStringLen (Text -> ByteString
encodeUtf8 Text
t) CStringLen -> IO a
act