{-# LANGUAGE BangPatterns, ScopedTypeVariables #-}

-- |
-- Module      : Data.Text.Internal.Search
-- Copyright   : (c) Bryan O'Sullivan 2009
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- Fast substring search for 'Text', based on work by Boyer, Moore,
-- Horspool, Sunday, and Lundh.
--
-- References:
--
-- * R. S. Boyer, J. S. Moore: A Fast String Searching Algorithm.
--   Communications of the ACM, 20, 10, 762-772 (1977)
--
-- * R. N. Horspool: Practical Fast Searching in Strings.  Software -
--   Practice and Experience 10, 501-506 (1980)
--
-- * D. M. Sunday: A Very Fast Substring Search Algorithm.
--   Communications of the ACM, 33, 8, 132-142 (1990)
--
-- * F. Lundh: The Fast Search Algorithm.
--   <http://effbot.org/zone/stringlib.htm> (2006)

module Data.Text.Internal.Search
    (
      indices
    ) where

import qualified Data.Text.Array as A
import Data.Word (Word64, Word16)
import Data.Text.Internal (Text(..))
import Data.Bits ((.|.), (.&.))
import Data.Text.Internal.Unsafe.Shift (shiftL)

data T = {-# UNPACK #-} !Word64 :* {-# UNPACK #-} !Int

-- | /O(n+m)/ Find the offsets of all non-overlapping indices of
-- @needle@ within @haystack@.  The offsets returned represent
-- uncorrected indices in the low-level \"needle\" array, to which its
-- offset must be added.
--
-- In (unlikely) bad cases, this algorithm's complexity degrades
-- towards /O(n*m)/.
indices :: Text                -- ^ Substring to search for (@needle@)
        -> Text                -- ^ Text to search in (@haystack@)
        -> [Int]
indices :: Text -> Text -> [Int]
indices _needle :: Text
_needle@(Text Array
narr Int
noff Int
nlen) _haystack :: Text
_haystack@(Text Array
harr Int
hoff Int
hlen)
    | Int
nlen forall a. Eq a => a -> a -> Bool
== Int
1              = Word16 -> [Int]
scanOne (Int -> Word16
nindex Int
0)
    | Int
nlen forall a. Ord a => a -> a -> Bool
<= Int
0 Bool -> Bool -> Bool
|| Int
ldiff forall a. Ord a => a -> a -> Bool
< Int
0 = []
    | Bool
otherwise              = Int -> [Int]
scan Int
0
  where
    ldiff :: Int
ldiff    = Int
hlen forall a. Num a => a -> a -> a
- Int
nlen
    nlast :: Int
nlast    = Int
nlen forall a. Num a => a -> a -> a
- Int
1
    z :: Word16
z        = Int -> Word16
nindex Int
nlast
    nindex :: Int -> Word16
nindex Int
k = Array -> Int -> Word16
A.unsafeIndex Array
narr (Int
noffforall a. Num a => a -> a -> a
+Int
k)
    hindex :: Int -> Word16
hindex Int
k = Array -> Int -> Word16
A.unsafeIndex Array
harr (Int
hoffforall a. Num a => a -> a -> a
+Int
k)
    hindex' :: Int -> Word16
hindex' Int
k | Int
k forall a. Eq a => a -> a -> Bool
== Int
hlen  = Word16
0
              | Bool
otherwise = Array -> Int -> Word16
A.unsafeIndex Array
harr (Int
hoffforall a. Num a => a -> a -> a
+Int
k)
    buildTable :: Int -> Word64 -> Int -> T
buildTable !Int
i !Word64
msk !Int
skp
        | Int
i forall a. Ord a => a -> a -> Bool
>= Int
nlast           = (Word64
msk forall a. Bits a => a -> a -> a
.|. Word16 -> Word64
swizzle Word16
z) Word64 -> Int -> T
:* Int
skp
        | Bool
otherwise            = Int -> Word64 -> Int -> T
buildTable (Int
iforall a. Num a => a -> a -> a
+Int
1) (Word64
msk forall a. Bits a => a -> a -> a
.|. Word16 -> Word64
swizzle Word16
c) Int
skp'
        where c :: Word16
c                = Int -> Word16
nindex Int
i
              skp' :: Int
skp' | Word16
c forall a. Eq a => a -> a -> Bool
== Word16
z    = Int
nlen forall a. Num a => a -> a -> a
- Int
i forall a. Num a => a -> a -> a
- Int
2
                   | Bool
otherwise = Int
skp

    swizzle :: Word16 -> Word64
    swizzle :: Word16 -> Word64
swizzle Word16
k = Word64
1 forall a. UnsafeShift a => a -> Int -> a
`shiftL` (Word16 -> Int
word16ToInt Word16
k forall a. Bits a => a -> a -> a
.&. Int
0x3f)

    scan :: Int -> [Int]
scan !Int
i
        | Int
i forall a. Ord a => a -> a -> Bool
> Int
ldiff                  = []
        | Word16
c forall a. Eq a => a -> a -> Bool
== Word16
z Bool -> Bool -> Bool
&& Int -> Bool
candidateMatch Int
0 = Int
i forall a. a -> [a] -> [a]
: Int -> [Int]
scan (Int
i forall a. Num a => a -> a -> a
+ Int
nlen)
        | Bool
otherwise                  = Int -> [Int]
scan (Int
i forall a. Num a => a -> a -> a
+ Int
delta)
        where c :: Word16
c = Int -> Word16
hindex (Int
i forall a. Num a => a -> a -> a
+ Int
nlast)
              candidateMatch :: Int -> Bool
candidateMatch !Int
j
                    | Int
j forall a. Ord a => a -> a -> Bool
>= Int
nlast               = Bool
True
                    | Int -> Word16
hindex (Int
iforall a. Num a => a -> a -> a
+Int
j) forall a. Eq a => a -> a -> Bool
/= Int -> Word16
nindex Int
j = Bool
False
                    | Bool
otherwise                = Int -> Bool
candidateMatch (Int
jforall a. Num a => a -> a -> a
+Int
1)
              delta :: Int
delta | Bool
nextInPattern = Int
nlen forall a. Num a => a -> a -> a
+ Int
1
                    | Word16
c forall a. Eq a => a -> a -> Bool
== Word16
z        = Int
skip forall a. Num a => a -> a -> a
+ Int
1
                    | Bool
otherwise     = Int
1
                where nextInPattern :: Bool
nextInPattern = Word64
mask forall a. Bits a => a -> a -> a
.&. Word16 -> Word64
swizzle (Int -> Word16
hindex' (Int
iforall a. Num a => a -> a -> a
+Int
nlen)) forall a. Eq a => a -> a -> Bool
== Word64
0
              !(Word64
mask :* Int
skip)       = Int -> Word64 -> Int -> T
buildTable Int
0 Word64
0 (Int
nlenforall a. Num a => a -> a -> a
-Int
2)
    scanOne :: Word16 -> [Int]
scanOne Word16
c = Int -> [Int]
loop Int
0
        where loop :: Int -> [Int]
loop !Int
i | Int
i forall a. Ord a => a -> a -> Bool
>= Int
hlen     = []
                      | Int -> Word16
hindex Int
i forall a. Eq a => a -> a -> Bool
== Word16
c = Int
i forall a. a -> [a] -> [a]
: Int -> [Int]
loop (Int
iforall a. Num a => a -> a -> a
+Int
1)
                      | Bool
otherwise     = Int -> [Int]
loop (Int
iforall a. Num a => a -> a -> a
+Int
1)
{-# INLINE indices #-}

word16ToInt :: Word16 -> Int
word16ToInt :: Word16 -> Int
word16ToInt = forall a b. (Integral a, Num b) => a -> b
fromIntegral