123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140 |
- module Data.Attoparsec.ByteString.Char8.Extras (
- -- * Tools for character and Word8 tests
- isCHorizontalSpace,
- skipHorizontalSpace,
- asW8,
- -- * Parsing for printable types
- parseShow,
- parseShowCI,
- parsePrintable,
- parsePrintableCI,
- parseEnum,
- parseEnumCI,
- -- * Other parsing utilities
- decodeBase64,
- parserFold,
- quotedString
- ) where
- import Data.Word8 as W
- import qualified Data.Char as C
- import Data.Attoparsec.ByteString.Char8
- import qualified Data.Attoparsec.ByteString.Char8 as A
- import Data.ByteString (ByteString)
- import Control.Applicative ((<|>))
- import qualified Data.ByteString.Base64 as B64
- import qualified Codec.Binary.UTF8.String as UTF8
- import qualified Data.ByteString as BS
- utf8bs :: String -> ByteString
- utf8bs = BS.pack . UTF8.encode
- -- | A version of attoparsec's isHorizontalSpace that takes a Char as argument.
- isCHorizontalSpace :: Char -> Bool
- isCHorizontalSpace = A.isHorizontalSpace . asW8
- -- | Skips through the characters that satisfy attoparsec's isHorizontalSpace.
- -- Never fails.
- skipHorizontalSpace :: Parser ()
- skipHorizontalSpace = A.skipWhile (A.isHorizontalSpace . asW8)
- -- | Converts a character into it's Word8 value, truncating any extra bytes.
- asW8 :: Char -> Word8
- asW8 = fromIntegral . C.ord
- {- |
- parserFold f x0
- Acquires functions by parsing the data with f, and applies it to the result of the previous function,
- until f fails.
- -}
- parserFold :: Parser (a -> a) -> a -> Parser a
- parserFold f a = (
- do
- r <- f
- let a' = r a
- parserFold f a'
- ) <|> return a
- {- |
- Given a list of options, returns the first one that is show like the input.
- Does not consume input on the case of failure.
- -}
- parseShow :: Show a => [a] -> Parser a
- parseShow = foldr
- (\ s -> (<|>) ((string . utf8bs . show $ s) *> return s))
- (fail "Invalid string")
- -- | Like parseShow, but case insensitive.
- parseShowCI :: Show a => [a] -> Parser a
- parseShowCI = foldr
- (\ s -> (<|>) ((A.stringCI . utf8bs . show $ s) *> return s))
- (fail "Invalid string")
- {- |
- Given a list of options and a printing function, returns the first option that is
- printed like the input.
- -}
- parsePrintable :: (a -> String) -> [a] -> Parser a
- parsePrintable _ [] = fail "Invalid string"
- parsePrintable f (s:ss) = ((string . utf8bs . f $ s) *> return s) <|> parsePrintable f ss
- -- | Like parsePrintable, but case insensitive
- parsePrintableCI :: (a -> String) -> [a] -> Parser a
- parsePrintableCI _ [] = fail "Invalid string"
- parsePrintableCI f (s:ss) = ((A.stringCI . utf8bs . f $ s) *> return s) <|> parsePrintableCI f ss
- -- | Acts like parseShow, trying every possible value
- parseEnum :: (Enum a, Bounded a, Show a) => Parser a
- parseEnum = let
- l = [minBound .. maxBound]
- in parseShow l
- -- | Acts like parseShowCI, trying every possible value
- parseEnumCI :: (Enum a, Bounded a, Show a) => Parser a
- parseEnumCI = let
- l = [minBound .. maxBound]
- in parseShowCI l
- -- | Consumes base64 encoded text, returning its binary decoded value.
- decodeBase64 :: Parser ByteString
- decodeBase64 = do
- b64 <- A.takeWhile isBase64Char
- case B64.decode b64 of
- Left _ -> fail "Invalid base64 character sequence"
- Right v -> return v
- where
- isBase64Char :: Char -> Bool
- isBase64Char c = C.isAlphaNum c || c == '+' || c == '/'
- data QuoteScannerState = Quote | Escape
- {- |
- quotedStr escape forbiden_plain forbiden_quote
- Parses a possibly quoted string, where the characters in
- forbiden_plain are forbiden in unquoted text, and the
- characters in forbiden_quote are forbiden in quoted text.
- No forbiden characters are assumed (not even space). Thus,
- if no forbiden character is supplied, the parser will not
- terminate.
- Any character may be escaped with the escape character.
- -}
- quotedString :: C.Char -> [C.Char] -> [C.Char] -> A.Parser ByteString
- quotedString e fp fq = A.scan [] quoteScanner
- where
- quoteScanner :: [QuoteScannerState] -> Char -> Maybe [QuoteScannerState]
- quoteScanner [] c
- | c `elem` "'\"" = Just [Quote]
- | c == e = Just [Escape]
- | c `elem` fp = Nothing
- | otherwise = Just []
- quoteScanner fs@(Quote:ss) c
- | c `elem` "'\"" = Just ss
- | c == e = Just (Escape:fs)
- | c `elem` fq = Nothing
- | otherwise = Just fs
- quoteScanner (Escape:ss) _ = Just ss
|