Data.String.UTF8

utf8-string-0.3.4: Support for reading and writing UTF8 Strings

Source code

Contents

Index

Data.String.UTF8

Contents

Representation
Character based operations
Representation based operations

Synopsis

data UTF8 string

class (Num s, Ord s) => UTF8Bytes b s | b -> s

fromString :: UTF8Bytes string index => String -> UTF8 string

toString :: UTF8Bytes string index => UTF8 string -> String

fromRep :: string -> UTF8 string

toRep :: UTF8 string -> string

replacement_char :: Char

uncons :: UTF8Bytes string index => UTF8 string -> Maybe (Char, UTF8 string)

splitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)

take :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string

drop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string

span :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)

break :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)

foldl :: UTF8Bytes string index => (a -> Char -> a) -> a -> UTF8 string -> a

foldr :: UTF8Bytes string index => (Char -> a -> a) -> a -> UTF8 string -> a

length :: UTF8Bytes string index => UTF8 string -> index

lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string]

lines' :: UTF8Bytes string index => UTF8 string -> [UTF8 string]

null :: UTF8Bytes string index => UTF8 string -> Bool

decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index)

byteSplitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)

byteTake :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string

byteDrop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string

Representation

data UTF8 string

Source

The type of strngs that are represented using tthe UTF8 encoding. The parameters is the type of the container for the representation.

Instances

Eq string => Eq (UTF8 string)

Ord string => Ord (UTF8 string)

UTF8Bytes string index => Show (UTF8 string)

class (Num s, Ord s) => UTF8Bytes b s | b -> s

Source

Instances

UTF8Bytes ByteString Int64

UTF8Bytes ByteString Int

UTF8Bytes [Word8] Int

fromString :: UTF8Bytes string index => String -> UTF8 string

Source

Converts a Haskell string into a UTF8 encoded string. Complexity: linear.

toString :: UTF8Bytes string index => UTF8 string -> String

Source

Convert a UTF8 encoded string into a Haskell string. Invalid characters are replaced by replacement_char. Complexity: linear.

fromRep :: string -> UTF8 string

Source

toRep :: UTF8 string -> string

Source

replacement_char :: Char

Source

This character is used to mark errors in a UTF8 encoded string.

Character based operations

uncons :: UTF8Bytes string index => UTF8 string -> Maybe (Char, UTF8 string)

Source

Get the first character of a byte string, if any. Invalid characters are replaced by replacement_char.

splitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)

Source

Split after a given number of characters. Negative values are treated as if they are 0. See also bytesSplitAt.

take :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string

Source

take n s returns the first n characters of s. If s has less then n characters, then we return the whole of s.

drop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string

Source

drop n s returns the s without its first n characters. If s has less then n characters, then we return the an empty string.

span :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)

Source

Split a string into two parts: the first is the longest prefix that contains only characters that satisfy the predicate; the second part is the rest of the string. Invalid characters are passed as '\0xFFFD' to the predicate.

break :: UTF8Bytes string index => (Char -> Bool) -> UTF8 string -> (UTF8 string, UTF8 string)

Source

Split a string into two parts: the first is the longest prefix that contains only characters that do not satisfy the predicate; the second part is the rest of the string. Invalid characters are passed as replacement_char to the predicate.

foldl :: UTF8Bytes string index => (a -> Char -> a) -> a -> UTF8 string -> a

Source

Traverse a bytestring (left biased). This fuction is strict in the accumulator.

foldr :: UTF8Bytes string index => (Char -> a -> a) -> a -> UTF8 string -> a

Source

Traverse a bytestring (right biased).

length :: UTF8Bytes string index => UTF8 string -> index

Source

Counts the number of characters encoded in the bytestring. Note that this includes replacment characters. The function is linear in the number of bytes in the representation.

lines :: UTF8Bytes string index => UTF8 string -> [UTF8 string]

Source

Split a string into a list of lines. Lines are termianted by '\n' or the end of the string. Empty line may not be terminated by the end of the string. See also 'lines\''.

lines' :: UTF8Bytes string index => UTF8 string -> [UTF8 string]

Source

Split a string into a list of lines. Lines are termianted by '\n' or the end of the string. Empty line may not be terminated by the end of the string. This function preserves the terminators. See also lines.

Representation based operations

null :: UTF8Bytes string index => UTF8 string -> Bool

Source

Checks if there are no more bytes in the underlying representation.

decode :: UTF8Bytes string index => UTF8 string -> Maybe (Char, index)

Source

Extract the first character for the underlying representation, if one is avaialble. It also returns the number of bytes used in the representation of the character. See also uncons, dropBytes.

byteSplitAt :: UTF8Bytes string index => index -> UTF8 string -> (UTF8 string, UTF8 string)

Source

Split after a given number of bytes in the underlying representation. See also splitAt.

byteTake :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string

Source

Take only the given number of bytes from the underlying representation. See also take.

byteDrop :: UTF8Bytes string index => index -> UTF8 string -> UTF8 string

Source

Drop the given number of bytes from the underlying representation. See also drop.

Produced by Haddock version 2.6.0