module Distribution.Utils.String
(
decodeStringUtf8
, encodeStringUtf8
) where
import Data.Word
import Data.Bits
import Data.Char (chr,ord)
decodeStringUtf8 :: [Word8] -> String
decodeStringUtf8 :: [Word8] -> String
decodeStringUtf8 = [Word8] -> String
go
where
go :: [Word8] -> String
go :: [Word8] -> String
go [] = []
go (Word8
c : [Word8]
cs)
| Word8
c forall a. Ord a => a -> a -> Bool
<= Word8
0x7F = Int -> Char
chr (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
c) forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs
| Word8
c forall a. Ord a => a -> a -> Bool
<= Word8
0xBF = Char
replacementChar forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs
| Word8
c forall a. Ord a => a -> a -> Bool
<= Word8
0xDF = Word8 -> [Word8] -> String
twoBytes Word8
c [Word8]
cs
| Word8
c forall a. Ord a => a -> a -> Bool
<= Word8
0xEF = Int -> Int -> [Word8] -> Int -> String
moreBytes Int
3 Int
0x800 [Word8]
cs (forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Word8
c forall a. Bits a => a -> a -> a
.&. Word8
0xF)
| Word8
c forall a. Ord a => a -> a -> Bool
<= Word8
0xF7 = Int -> Int -> [Word8] -> Int -> String
moreBytes Int
4 Int
0x10000 [Word8]
cs (forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Word8
c forall a. Bits a => a -> a -> a
.&. Word8
0x7)
| Word8
c forall a. Ord a => a -> a -> Bool
<= Word8
0xFB = Int -> Int -> [Word8] -> Int -> String
moreBytes Int
5 Int
0x200000 [Word8]
cs (forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Word8
c forall a. Bits a => a -> a -> a
.&. Word8
0x3)
| Word8
c forall a. Ord a => a -> a -> Bool
<= Word8
0xFD = Int -> Int -> [Word8] -> Int -> String
moreBytes Int
6 Int
0x4000000 [Word8]
cs (forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ Word8
c forall a. Bits a => a -> a -> a
.&. Word8
0x1)
| Bool
otherwise = Char
replacementChar forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs
twoBytes :: Word8 -> [Word8] -> String
twoBytes :: Word8 -> [Word8] -> String
twoBytes Word8
c0 (Word8
c1:[Word8]
cs')
| Word8
c1 forall a. Bits a => a -> a -> a
.&. Word8
0xC0 forall a. Eq a => a -> a -> Bool
== Word8
0x80
= let d :: Int
d = (forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
c0 forall a. Bits a => a -> a -> a
.&. Word8
0x1F) forall a. Bits a => a -> Int -> a
`shiftL` Int
6)
forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
c1 forall a. Bits a => a -> a -> a
.&. Word8
0x3F)
in if Int
d forall a. Ord a => a -> a -> Bool
>= Int
0x80
then Int -> Char
chr Int
d forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'
else Char
replacementChar forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'
twoBytes Word8
_ [Word8]
cs' = Char
replacementChar forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'
moreBytes :: Int -> Int -> [Word8] -> Int -> [Char]
moreBytes :: Int -> Int -> [Word8] -> Int -> String
moreBytes Int
1 Int
overlong [Word8]
cs' Int
acc
| Int
overlong forall a. Ord a => a -> a -> Bool
<= Int
acc, Int
acc forall a. Ord a => a -> a -> Bool
<= Int
0x10FFFF, Int
acc forall a. Ord a => a -> a -> Bool
< Int
0xD800 Bool -> Bool -> Bool
|| Int
0xDFFF forall a. Ord a => a -> a -> Bool
< Int
acc
= Int -> Char
chr Int
acc forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'
| Bool
otherwise
= Char
replacementChar forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'
moreBytes Int
byteCount Int
overlong (Word8
cn:[Word8]
cs') Int
acc
| Word8
cn forall a. Bits a => a -> a -> a
.&. Word8
0xC0 forall a. Eq a => a -> a -> Bool
== Word8
0x80
= Int -> Int -> [Word8] -> Int -> String
moreBytes (Int
byteCountforall a. Num a => a -> a -> a
-Int
1) Int
overlong [Word8]
cs'
((Int
acc forall a. Bits a => a -> Int -> a
`shiftL` Int
6) forall a. Bits a => a -> a -> a
.|. forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
cn forall a. Bits a => a -> a -> a
.&. Int
0x3F)
moreBytes Int
_ Int
_ [Word8]
cs' Int
_
= Char
replacementChar forall a. a -> [a] -> [a]
: [Word8] -> String
go [Word8]
cs'
replacementChar :: Char
replacementChar = Char
'\xfffd'
encodeStringUtf8 :: String -> [Word8]
encodeStringUtf8 :: String -> [Word8]
encodeStringUtf8 [] = []
encodeStringUtf8 (Char
c:String
cs)
| Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x07F' = Word8
w8
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
| Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x7FF' = (Word8
0xC0 forall a. Bits a => a -> a -> a
.|. Int -> Word8
w8ShiftR Int
6 )
forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Word8
w8 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
| Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xD7FF'= (Word8
0xE0 forall a. Bits a => a -> a -> a
.|. Int -> Word8
w8ShiftR Int
12 )
forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR Int
6 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Word8
w8 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
| Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xDFFF'= Word8
0xEF forall a. a -> [a] -> [a]
: Word8
0xBF forall a. a -> [a] -> [a]
: Word8
0xBD
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
| Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\xFFFF'= (Word8
0xE0 forall a. Bits a => a -> a -> a
.|. Int -> Word8
w8ShiftR Int
12 )
forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR Int
6 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Word8
w8 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
| Bool
otherwise = (Word8
0xf0 forall a. Bits a => a -> a -> a
.|. Int -> Word8
w8ShiftR Int
18 )
forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR Int
12 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Int -> Word8
w8ShiftR Int
6 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
forall a. a -> [a] -> [a]
: (Word8
0x80 forall a. Bits a => a -> a -> a
.|. (Word8
w8 forall a. Bits a => a -> a -> a
.&. Word8
0x3F))
forall a. a -> [a] -> [a]
: String -> [Word8]
encodeStringUtf8 String
cs
where
w8 :: Word8
w8 = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
c) :: Word8
w8ShiftR :: Int -> Word8
w8ShiftR :: Int -> Word8
w8ShiftR = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Bits a => a -> Int -> a
shiftR (Char -> Int
ord Char
c)