{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  Data.Char
-- Copyright   :  (c) The University of Glasgow 2001
-- License     :  BSD-style (see the file libraries/base/LICENSE)
--
-- Maintainer  :  libraries@haskell.org
-- Stability   :  stable
-- Portability :  portable
--
-- The Char type and associated operations.
--
-----------------------------------------------------------------------------

module Data.Char
    (
      Char

    -- * Character classification
    -- | Unicode characters are divided into letters, numbers, marks,
    -- punctuation, symbols, separators (including spaces) and others
    -- (including control characters).
    , isControl, isSpace
    , isLower, isLowerCase, isUpper, isUpperCase, isAlpha, isAlphaNum, isPrint
    , isDigit, isOctDigit, isHexDigit
    , isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator

    -- ** Subranges
    , isAscii, isLatin1
    , isAsciiUpper, isAsciiLower

    -- ** Unicode general categories
    , GeneralCategory(..), generalCategory

    -- * Case conversion
    , toUpper, toLower, toTitle

    -- * Single digit characters
    , digitToInt
    , intToDigit

    -- * Numeric representations
    , ord
    , chr

    -- * String representations
    , showLitChar
    , lexLitChar
    , readLitChar
    ) where

import GHC.Internal.Base
import GHC.Internal.Char
import GHC.Internal.Real (fromIntegral)
import GHC.Internal.Show
import GHC.Internal.Read (readLitChar, lexLitChar)
import GHC.Internal.Unicode
import GHC.Internal.Num

-- $setup
-- Allow the use of Prelude in doctests.
-- >>> import Prelude

-- | Convert a single digit 'Char' to the corresponding 'Int'.  This
-- function fails unless its argument satisfies 'isHexDigit', but
-- recognises both upper- and lower-case hexadecimal digits (that
-- is, @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@).
--
-- ==== __Examples__
--
-- Characters @\'0\'@ through @\'9\'@ are converted properly to
-- @0..9@:
--
-- >>> map digitToInt ['0'..'9']
-- [0,1,2,3,4,5,6,7,8,9]
--
-- Both upper- and lower-case @\'A\'@ through @\'F\'@ are converted
-- as well, to @10..15@.
--
-- >>> map digitToInt ['a'..'f']
-- [10,11,12,13,14,15]
-- >>> map digitToInt ['A'..'F']
-- [10,11,12,13,14,15]
--
-- Anything else throws an exception:
--
-- >>> digitToInt 'G'
-- *** Exception: Char.digitToInt: not a digit 'G'
-- >>> digitToInt '♥'
-- *** Exception: Char.digitToInt: not a digit '\9829'
--
digitToInt :: Char -> Int
digitToInt :: Char -> Int
digitToInt Char
c
  | (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
dec::Word) Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
9 = Int
dec
  | (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
hexl::Word) Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
5 = Int
hexl Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
10
  | (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
hexu::Word) Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
5 = Int
hexu Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
10
  | Bool
otherwise = [Char] -> Int
forall a. [Char] -> a
errorWithoutStackTrace ([Char]
"Char.digitToInt: not a digit " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Char -> [Char]
forall a. Show a => a -> [Char]
show Char
c) -- sigh
  where
    dec :: Int
dec = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'0'
    hexl :: Int
hexl = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'a'
    hexu :: Int
hexu = Char -> Int
ord Char
c Int -> Int -> Int
forall a. Num a => a -> a -> a
- Char -> Int
ord Char
'A'

-- derived character classifiers

-- | Selects alphabetic Unicode characters (lower-case, upper-case and
-- title-case letters, plus letters of caseless scripts and
-- modifiers letters). This function is equivalent to
-- 'Data.Char.isAlpha'.
--
-- This function returns 'True' if its argument has one of the
-- following 'GeneralCategory's, or 'False' otherwise:
--
-- * 'UppercaseLetter'
-- * 'LowercaseLetter'
-- * 'TitlecaseLetter'
-- * 'ModifierLetter'
-- * 'OtherLetter'
--
-- These classes are defined in the
-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,
-- part of the Unicode standard. The same document defines what is
-- and is not a \"Letter\".
--
-- ==== __Examples__
--
-- Basic usage:
--
-- >>> isLetter 'a'
-- True
-- >>> isLetter 'A'
-- True
-- >>> isLetter 'λ'
-- True
-- >>> isLetter '0'
-- False
-- >>> isLetter '%'
-- False
-- >>> isLetter '♥'
-- False
-- >>> isLetter '\31'
-- False
--
-- Ensure that 'isLetter' and 'isAlpha' are equivalent.
--
-- >>> let chars = [(chr 0)..]
-- >>> let letters = map isLetter chars
-- >>> let alphas = map isAlpha chars
-- >>> letters == alphas
-- True
--
isLetter :: Char -> Bool
isLetter :: Char -> Bool
isLetter Char
c = case Char -> GeneralCategory
generalCategory Char
c of
        GeneralCategory
UppercaseLetter         -> Bool
True
        GeneralCategory
LowercaseLetter         -> Bool
True
        GeneralCategory
TitlecaseLetter         -> Bool
True
        GeneralCategory
ModifierLetter          -> Bool
True
        GeneralCategory
OtherLetter             -> Bool
True
        GeneralCategory
_                       -> Bool
False

-- | Selects Unicode mark characters, for example accents and the
-- like, which combine with preceding characters.
--
-- This function returns 'True' if its argument has one of the
-- following 'GeneralCategory's, or 'False' otherwise:
--
-- * 'NonSpacingMark'
-- * 'SpacingCombiningMark'
-- * 'EnclosingMark'
--
-- These classes are defined in the
-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,
-- part of the Unicode standard. The same document defines what is
-- and is not a \"Mark\".
--
-- ==== __Examples__
--
-- Basic usage:
--
-- >>> isMark 'a'
-- False
-- >>> isMark '0'
-- False
--
-- Combining marks such as accent characters usually need to follow
-- another character before they become printable:
--
-- >>> map isMark "ò"
-- [False,True]
--
-- Puns are not necessarily supported:
--
-- >>> isMark '✓'
-- False
--
isMark :: Char -> Bool
isMark :: Char -> Bool
isMark Char
c = case Char -> GeneralCategory
generalCategory Char
c of
        GeneralCategory
NonSpacingMark          -> Bool
True
        GeneralCategory
SpacingCombiningMark    -> Bool
True
        GeneralCategory
EnclosingMark           -> Bool
True
        GeneralCategory
_                       -> Bool
False

-- | Selects Unicode numeric characters, including digits from various
-- scripts, Roman numerals, et cetera.
--
-- This function returns 'True' if its argument has one of the
-- following 'GeneralCategory's, or 'False' otherwise:
--
-- * 'DecimalNumber'
-- * 'LetterNumber'
-- * 'OtherNumber'
--
-- These classes are defined in the
-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,
-- part of the Unicode standard. The same document defines what is
-- and is not a \"Number\".
--
-- ==== __Examples__
--
-- Basic usage:
--
-- >>> isNumber 'a'
-- False
-- >>> isNumber '%'
-- False
-- >>> isNumber '3'
-- True
--
-- ASCII @\'0\'@ through @\'9\'@ are all numbers:
--
-- >>> and $ map isNumber ['0'..'9']
-- True
--
-- Unicode Roman numerals are \"numbers\" as well:
--
-- >>> isNumber 'Ⅸ'
-- True
--
isNumber :: Char -> Bool
isNumber :: Char -> Bool
isNumber Char
c = case Char -> GeneralCategory
generalCategory Char
c of
        GeneralCategory
DecimalNumber           -> Bool
True
        GeneralCategory
LetterNumber            -> Bool
True
        GeneralCategory
OtherNumber             -> Bool
True
        GeneralCategory
_                       -> Bool
False

-- | Selects Unicode space and separator characters.
--
-- This function returns 'True' if its argument has one of the
-- following 'GeneralCategory's, or 'False' otherwise:
--
-- * 'Space'
-- * 'LineSeparator'
-- * 'ParagraphSeparator'
--
-- These classes are defined in the
-- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>,
-- part of the Unicode standard. The same document defines what is
-- and is not a \"Separator\".
--
-- ==== __Examples__
--
-- Basic usage:
--
-- >>> isSeparator 'a'
-- False
-- >>> isSeparator '6'
-- False
-- >>> isSeparator ' '
-- True
--
-- Warning: newlines and tab characters are not considered
-- separators.
--
-- >>> isSeparator '\n'
-- False
-- >>> isSeparator '\t'
-- False
--
-- But some more exotic characters are (like HTML's @&nbsp;@):
--
-- >>> isSeparator '\160'
-- True
--
isSeparator :: Char -> Bool
isSeparator :: Char -> Bool
isSeparator Char
c = case Char -> GeneralCategory
generalCategory Char
c of
        GeneralCategory
Space                   -> Bool
True
        GeneralCategory
LineSeparator           -> Bool
True
        GeneralCategory
ParagraphSeparator      -> Bool
True
        GeneralCategory
_                       -> Bool
False