module Data.Attoparsec.ByteString.Char8.Extras ( -- * Tools for character and Word8 tests isCHorizontalSpace, skipHorizontalSpace, asW8, -- * Parsing for printable types parseShow, parseShowCI, parsePrintable, parsePrintableCI, parseEnum, parseEnumCI, -- * Other parsing utilities decodeBase64, parserFold, quotedString ) where import Data.Word8 as W import qualified Data.Char as C import Data.Attoparsec.ByteString.Char8 import qualified Data.Attoparsec.ByteString.Char8 as A import Data.ByteString (ByteString) import Control.Applicative ((<|>)) import qualified Data.ByteString.Base64 as B64 import qualified Codec.Binary.UTF8.String as UTF8 import qualified Data.ByteString as BS utf8bs :: String -> ByteString utf8bs = BS.pack . UTF8.encode -- | A version of attoparsec's isHorizontalSpace that takes a Char as argument. isCHorizontalSpace :: Char -> Bool isCHorizontalSpace = A.isHorizontalSpace . asW8 -- | Skips through the characters that satisfy attoparsec's isHorizontalSpace. -- Never fails. skipHorizontalSpace :: Parser () skipHorizontalSpace = A.skipWhile (A.isHorizontalSpace . asW8) -- | Converts a character into it's Word8 value, truncating any extra bytes. asW8 :: Char -> Word8 asW8 = fromIntegral . C.ord {- | parserFold f x0 Acquires functions by parsing the data with f, and applies it to the result of the previous function, until f fails. -} parserFold :: Parser (a -> a) -> a -> Parser a parserFold f a = ( do r <- f let a' = r a parserFold f a' ) <|> return a {- | Given a list of options, returns the first one that is show like the input. Does not consume input on the case of failure. -} parseShow :: Show a => [a] -> Parser a parseShow = foldr (\ s -> (<|>) ((string . utf8bs . show $ s) *> return s)) (fail "Invalid string") -- | Like parseShow, but case insensitive. parseShowCI :: Show a => [a] -> Parser a parseShowCI = foldr (\ s -> (<|>) ((A.stringCI . utf8bs . show $ s) *> return s)) (fail "Invalid string") {- | Given a list of options and a printing function, returns the first option that is printed like the input. -} parsePrintable :: (a -> String) -> [a] -> Parser a parsePrintable _ [] = fail "Invalid string" parsePrintable f (s:ss) = ((string . utf8bs . f $ s) *> return s) <|> parsePrintable f ss -- | Like parsePrintable, but case insensitive parsePrintableCI :: (a -> String) -> [a] -> Parser a parsePrintableCI _ [] = fail "Invalid string" parsePrintableCI f (s:ss) = ((A.stringCI . utf8bs . f $ s) *> return s) <|> parsePrintableCI f ss -- | Acts like parseShow, trying every possible value parseEnum :: (Enum a, Bounded a, Show a) => Parser a parseEnum = let l = [minBound .. maxBound] in parseShow l -- | Acts like parseShowCI, trying every possible value parseEnumCI :: (Enum a, Bounded a, Show a) => Parser a parseEnumCI = let l = [minBound .. maxBound] in parseShowCI l -- | Consumes base64 encoded text, returning its binary decoded value. decodeBase64 :: Parser ByteString decodeBase64 = do b64 <- A.takeWhile isBase64Char case B64.decode b64 of Left _ -> fail "Invalid base64 character sequence" Right v -> return v where isBase64Char :: Char -> Bool isBase64Char c = C.isAlphaNum c || c == '+' || c == '/' data QuoteScannerState = Quote | Escape {- | quotedStr escape forbiden_plain forbiden_quote Parses a possibly quoted string, where the characters in forbiden_plain are forbiden in unquoted text, and the characters in forbiden_quote are forbiden in quoted text. No forbiden characters are assumed (not even space). Thus, if no forbiden character is supplied, the parser will not terminate. Any character may be escaped with the escape character. -} quotedString :: C.Char -> [C.Char] -> [C.Char] -> A.Parser ByteString quotedString e fp fq = A.scan [] quoteScanner where quoteScanner :: [QuoteScannerState] -> Char -> Maybe [QuoteScannerState] quoteScanner [] c | c `elem` "'\"" = Just [Quote] | c == e = Just [Escape] | c `elem` fp = Nothing | otherwise = Just [] quoteScanner fs@(Quote:ss) c | c `elem` "'\"" = Just ss | c == e = Just (Escape:fs) | c `elem` fq = Nothing | otherwise = Just fs quoteScanner (Escape:ss) _ = Just ss