{-# LANGUAGE BangPatterns, MagicHash, UnboxedTuples, GeneralizedNewtypeDeriving, DerivingStrategies, CPP #-}
{-# OPTIONS_GHC -O2 -funbox-strict-fields #-}
-- gross hack: we manuvered ourselves into a position where we can't boot GHC with a LLVM based GHC anymore.
-- LLVM based GHC's fail to compile memcmp ffi calls.  These end up as memcmp$def in the llvm ir, however we
-- don't have any prototypes and subsequently the llvm toolchain chokes on them.  Since 7fdcce6d, we use
-- ShortText for the package database.  This however introduces this very module; which through inlining ends
-- up bringing memcmp_ByteArray from bytestring:Data.ByteString.Short.Internal into scope, which results in
-- the memcmp call we choke on.
--
-- The solution thusly is to force late binding via the linker instead of inlining when comping with the
-- bootstrap compiler.  This will produce a slower (slightly less optimised) stage1 compiler only.
--
-- See issue 18857. hsyl20 deserves credit for coming up with the idea for the soltuion.
--
-- This can be removed when we exit the boot compiler window. Thus once we drop GHC-9.2 as boot compiler,
-- we can drop this code as well.
#if GHC_STAGE < 1
{-# OPTIONS_GHC -fignore-interface-pragmas #-}
#endif
-- |
-- An Unicode string for internal GHC use. Meant to replace String
-- in places where being a lazy linked is not very useful and a more
-- memory efficient data structure is desirable.

-- Very similar to FastString, but not hash-consed and with some extra instances and
-- functions for serialisation and I/O. Should be imported qualified.

module GHC.Data.ShortText (
        -- * ShortText
        ShortText(..),
        -- ** Conversion to and from String
        pack,
        unpack,
        -- ** Operations
        codepointLength,
        byteLength,
        GHC.Data.ShortText.null,
        splitFilePath,
        GHC.Data.ShortText.head,
        stripPrefix
  ) where

import Prelude

import Control.Monad (guard)
import Control.DeepSeq as DeepSeq
import Data.Binary
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Short.Internal as SBS
import GHC.Exts
import GHC.IO
import GHC.Utils.Encoding
import System.FilePath (isPathSeparator)

{-| A 'ShortText' is a modified UTF-8 encoded string meant for short strings like
file paths, module descriptions, etc.
-}
newtype ShortText = ShortText { ShortText -> ShortByteString
contents :: SBS.ShortByteString
                              }
                              deriving stock (Int -> ShortText -> ShowS
[ShortText] -> ShowS
ShortText -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ShortText] -> ShowS
$cshowList :: [ShortText] -> ShowS
show :: ShortText -> String
$cshow :: ShortText -> String
showsPrec :: Int -> ShortText -> ShowS
$cshowsPrec :: Int -> ShortText -> ShowS
Show)
                              deriving newtype (ShortText -> ShortText -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ShortText -> ShortText -> Bool
$c/= :: ShortText -> ShortText -> Bool
== :: ShortText -> ShortText -> Bool
$c== :: ShortText -> ShortText -> Bool
Eq, Eq ShortText
ShortText -> ShortText -> Bool
ShortText -> ShortText -> Ordering
ShortText -> ShortText -> ShortText
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: ShortText -> ShortText -> ShortText
$cmin :: ShortText -> ShortText -> ShortText
max :: ShortText -> ShortText -> ShortText
$cmax :: ShortText -> ShortText -> ShortText
>= :: ShortText -> ShortText -> Bool
$c>= :: ShortText -> ShortText -> Bool
> :: ShortText -> ShortText -> Bool
$c> :: ShortText -> ShortText -> Bool
<= :: ShortText -> ShortText -> Bool
$c<= :: ShortText -> ShortText -> Bool
< :: ShortText -> ShortText -> Bool
$c< :: ShortText -> ShortText -> Bool
compare :: ShortText -> ShortText -> Ordering
$ccompare :: ShortText -> ShortText -> Ordering
Ord, Get ShortText
[ShortText] -> Put
ShortText -> Put
forall t. (t -> Put) -> Get t -> ([t] -> Put) -> Binary t
putList :: [ShortText] -> Put
$cputList :: [ShortText] -> Put
get :: Get ShortText
$cget :: Get ShortText
put :: ShortText -> Put
$cput :: ShortText -> Put
Binary, NonEmpty ShortText -> ShortText
ShortText -> ShortText -> ShortText
forall b. Integral b => b -> ShortText -> ShortText
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
stimes :: forall b. Integral b => b -> ShortText -> ShortText
$cstimes :: forall b. Integral b => b -> ShortText -> ShortText
sconcat :: NonEmpty ShortText -> ShortText
$csconcat :: NonEmpty ShortText -> ShortText
<> :: ShortText -> ShortText -> ShortText
$c<> :: ShortText -> ShortText -> ShortText
Semigroup, Semigroup ShortText
ShortText
[ShortText] -> ShortText
ShortText -> ShortText -> ShortText
forall a.
Semigroup a -> a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
mconcat :: [ShortText] -> ShortText
$cmconcat :: [ShortText] -> ShortText
mappend :: ShortText -> ShortText -> ShortText
$cmappend :: ShortText -> ShortText -> ShortText
mempty :: ShortText
$cmempty :: ShortText
Monoid, ShortText -> ()
forall a. (a -> ()) -> NFData a
rnf :: ShortText -> ()
$crnf :: ShortText -> ()
NFData)

-- We don't want to derive this one from ShortByteString since that one won't handle
-- UTF-8 characters correctly.
instance IsString ShortText where
  fromString :: String -> ShortText
fromString = String -> ShortText
pack

-- | /O(n)/ Returns the length of the 'ShortText' in characters.
codepointLength :: ShortText -> Int
codepointLength :: ShortText -> Int
codepointLength ShortText
st = forall a. IO a -> a
unsafeDupablePerformIO forall a b. (a -> b) -> a -> b
$ ShortByteString -> IO Int
countUTF8Chars (ShortText -> ShortByteString
contents ShortText
st)
-- | /O(1)/ Returns the length of the 'ShortText' in bytes.
byteLength :: ShortText -> Int
byteLength :: ShortText -> Int
byteLength ShortText
st = ShortByteString -> Int
SBS.length forall a b. (a -> b) -> a -> b
$ ShortText -> ShortByteString
contents ShortText
st

-- | /O(n)/ Convert a 'String' into a 'ShortText'.
pack :: String -> ShortText
pack :: String -> ShortText
pack String
s = forall a. IO a -> a
unsafeDupablePerformIO forall a b. (a -> b) -> a -> b
$ ShortByteString -> ShortText
ShortText forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ShortByteString
utf8EncodeShortByteString String
s

-- | /O(n)/ Convert a 'ShortText' into a 'String'.
unpack :: ShortText -> String
unpack :: ShortText -> String
unpack ShortText
st = ShortByteString -> String
utf8DecodeShortByteString forall a b. (a -> b) -> a -> b
$ ShortText -> ShortByteString
contents ShortText
st

-- | /O(1)/ Test whether the 'ShortText' is the empty string.
null :: ShortText -> Bool
null :: ShortText -> Bool
null ShortText
st = ShortByteString -> Bool
SBS.null forall a b. (a -> b) -> a -> b
$ ShortText -> ShortByteString
contents ShortText
st

-- | /O(n)/ Split a 'ShortText' representing a file path into its components by separating
-- on the file separator characters for this platform.
splitFilePath :: ShortText -> [ShortText]
-- This seems dangerous, but since the path separators are in the ASCII set they map down
-- to a single byte when encoded in UTF-8 and so this should work even when casting to ByteString.
-- We DeepSeq.force the resulting list so that we can be sure that no references to the
-- bytestring in `st'` remain in unevaluated thunks, which might prevent `st'` from being
-- collected by the GC.
splitFilePath :: ShortText -> [ShortText]
splitFilePath ShortText
st = forall a. NFData a => a -> a
DeepSeq.force forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (ShortByteString -> ShortText
ShortText forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ShortByteString
SBS.toShort) forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> ByteString -> [ByteString]
B8.splitWith Char -> Bool
isPathSeparator ByteString
st'
  where st' :: ByteString
st' = ShortByteString -> ByteString
SBS.fromShort forall a b. (a -> b) -> a -> b
$ ShortText -> ShortByteString
contents ShortText
st

-- | /O(1)/ Returns the first UTF-8 codepoint in the 'ShortText'. Depending on the string in
-- question, this may or may not be the actual first character in the string due to Unicode
-- non-printable characters.
head :: ShortText -> Char
head :: ShortText -> Char
head ShortText
st
  | ShortByteString -> Bool
SBS.null forall a b. (a -> b) -> a -> b
$ ShortText -> ShortByteString
contents ShortText
st = forall a. HasCallStack => String -> a
error String
"head: Empty ShortText"
  | Bool
otherwise              = forall a. [a] -> a
Prelude.head forall a b. (a -> b) -> a -> b
$ ShortText -> String
unpack ShortText
st

-- | /O(n)/ The 'stripPrefix' function takes two 'ShortText's and returns 'Just' the remainder of
-- the second iff the first is its prefix, and otherwise Nothing.
stripPrefix :: ShortText -> ShortText -> Maybe ShortText
stripPrefix :: ShortText -> ShortText -> Maybe ShortText
stripPrefix ShortText
prefix ShortText
st = do
  let !(SBS.SBS ByteArray#
prefixBA) = ShortText -> ShortByteString
contents ShortText
prefix
  let !(SBS.SBS ByteArray#
stBA)     = ShortText -> ShortByteString
contents ShortText
st
  let prefixLength :: Int#
prefixLength        = ByteArray# -> Int#
sizeofByteArray# ByteArray#
prefixBA
  let stLength :: Int#
stLength            = ByteArray# -> Int#
sizeofByteArray# ByteArray#
stBA
  -- If the length of 'st' is not >= than the length of 'prefix', it is impossible for 'prefix'
  -- to be the prefix of `st`.
  forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ (Int# -> Int
I# Int#
stLength) forall a. Ord a => a -> a -> Bool
>= (Int# -> Int
I# Int#
prefixLength)
  -- 'prefix' is a prefix of 'st' if the first <length of prefix> bytes of 'st'
  -- are equal to 'prefix'
  forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ Int# -> Int
I# (ByteArray# -> Int# -> ByteArray# -> Int# -> Int# -> Int#
compareByteArrays# ByteArray#
prefixBA Int#
0# ByteArray#
stBA Int#
0# Int#
prefixLength) forall a. Eq a => a -> a -> Bool
== Int
0
  -- Allocate a new ByteArray# and copy the remainder of the 'st' into it
  forall a. IO a -> a
unsafeDupablePerformIO forall a b. (a -> b) -> a -> b
$ do
    let newBAsize :: Int#
newBAsize = (Int#
stLength Int# -> Int# -> Int#
-# Int#
prefixLength)
    ShortByteString
newSBS <- forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO forall a b. (a -> b) -> a -> b
$ \State# RealWorld
s0 ->
      let !(# State# RealWorld
s1, MutableByteArray# RealWorld
ba #)  = forall d. Int# -> State# d -> (# State# d, MutableByteArray# d #)
newByteArray# Int#
newBAsize State# RealWorld
s0
          s2 :: State# RealWorld
s2             = forall d.
ByteArray#
-> Int#
-> MutableByteArray# d
-> Int#
-> Int#
-> State# d
-> State# d
copyByteArray# ByteArray#
stBA Int#
prefixLength MutableByteArray# RealWorld
ba Int#
0# Int#
newBAsize State# RealWorld
s1
          !(# State# RealWorld
s3, ByteArray#
fba #) = forall d.
MutableByteArray# d -> State# d -> (# State# d, ByteArray# #)
unsafeFreezeByteArray# MutableByteArray# RealWorld
ba State# RealWorld
s2
      in  (# State# RealWorld
s3, ByteArray# -> ShortByteString
SBS.SBS ByteArray#
fba #)
    forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. a -> Maybe a
Just forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> ShortText
ShortText forall a b. (a -> b) -> a -> b
$ ShortByteString
newSBS