Extras.hs 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. module Data.Attoparsec.ByteString.Char8.Extras (
  2. -- * Tools for character and Word8 tests
  3. isCHorizontalSpace,
  4. skipHorizontalSpace,
  5. asW8,
  6. -- * Parsing for printable types
  7. parseShow,
  8. parseShowCI,
  9. parsePrintable,
  10. parsePrintableCI,
  11. parseEnum,
  12. parseEnumCI,
  13. -- * Other parsing utilities
  14. decodeBase64,
  15. parserFold,
  16. quotedString
  17. ) where
  18. import Data.Word8 as W
  19. import qualified Data.Char as C
  20. import Data.Attoparsec.ByteString.Char8
  21. import qualified Data.Attoparsec.ByteString.Char8 as A
  22. import Data.ByteString (ByteString)
  23. import Control.Applicative ((<|>))
  24. import qualified Data.ByteString.Base64 as B64
  25. import qualified Codec.Binary.UTF8.String as UTF8
  26. import qualified Data.ByteString as BS
  27. utf8bs :: String -> ByteString
  28. utf8bs = BS.pack . UTF8.encode
  29. -- | A version of attoparsec's isHorizontalSpace that takes a Char as argument.
  30. isCHorizontalSpace :: Char -> Bool
  31. isCHorizontalSpace = A.isHorizontalSpace . asW8
  32. -- | Skips through the characters that satisfy attoparsec's isHorizontalSpace.
  33. -- Never fails.
  34. skipHorizontalSpace :: Parser ()
  35. skipHorizontalSpace = A.skipWhile (A.isHorizontalSpace . asW8)
  36. -- | Converts a character into it's Word8 value, truncating any extra bytes.
  37. asW8 :: Char -> Word8
  38. asW8 = fromIntegral . C.ord
  39. {- |
  40. parserFold f x0
  41. Acquires functions by parsing the data with f, and applies it to the result of the previous function,
  42. until f fails.
  43. -}
  44. parserFold :: Parser (a -> a) -> a -> Parser a
  45. parserFold f a = (
  46. do
  47. r <- f
  48. let a' = r a
  49. parserFold f a'
  50. ) <|> return a
  51. {- |
  52. Given a list of options, returns the first one that is show like the input.
  53. Does not consume input on the case of failure.
  54. -}
  55. parseShow :: Show a => [a] -> Parser a
  56. parseShow = foldr
  57. (\ s -> (<|>) ((string . utf8bs . show $ s) *> return s))
  58. (fail "Invalid string")
  59. -- | Like parseShow, but case insensitive.
  60. parseShowCI :: Show a => [a] -> Parser a
  61. parseShowCI = foldr
  62. (\ s -> (<|>) ((A.stringCI . utf8bs . show $ s) *> return s))
  63. (fail "Invalid string")
  64. {- |
  65. Given a list of options and a printing function, returns the first option that is
  66. printed like the input.
  67. -}
  68. parsePrintable :: (a -> String) -> [a] -> Parser a
  69. parsePrintable _ [] = fail "Invalid string"
  70. parsePrintable f (s:ss) = ((string . utf8bs . f $ s) *> return s) <|> parsePrintable f ss
  71. -- | Like parsePrintable, but case insensitive
  72. parsePrintableCI :: (a -> String) -> [a] -> Parser a
  73. parsePrintableCI _ [] = fail "Invalid string"
  74. parsePrintableCI f (s:ss) = ((A.stringCI . utf8bs . f $ s) *> return s) <|> parsePrintableCI f ss
  75. -- | Acts like parseShow, trying every possible value
  76. parseEnum :: (Enum a, Bounded a, Show a) => Parser a
  77. parseEnum = let
  78. l = [minBound .. maxBound]
  79. in parseShow l
  80. -- | Acts like parseShowCI, trying every possible value
  81. parseEnumCI :: (Enum a, Bounded a, Show a) => Parser a
  82. parseEnumCI = let
  83. l = [minBound .. maxBound]
  84. in parseShowCI l
  85. -- | Consumes base64 encoded text, returning its binary decoded value.
  86. decodeBase64 :: Parser ByteString
  87. decodeBase64 = do
  88. b64 <- A.takeWhile isBase64Char
  89. case B64.decode b64 of
  90. Left _ -> fail "Invalid base64 character sequence"
  91. Right v -> return v
  92. where
  93. isBase64Char :: Char -> Bool
  94. isBase64Char c = C.isAlphaNum c || c == '+' || c == '/'
  95. data QuoteScannerState = Quote | Escape
  96. {- |
  97. quotedStr escape forbiden_plain forbiden_quote
  98. Parses a possibly quoted string, where the characters in
  99. forbiden_plain are forbiden in unquoted text, and the
  100. characters in forbiden_quote are forbiden in quoted text.
  101. No forbiden characters are assumed (not even space). Thus,
  102. if no forbiden character is supplied, the parser will not
  103. terminate.
  104. Any character may be escaped with the escape character.
  105. -}
  106. quotedString :: C.Char -> [C.Char] -> [C.Char] -> A.Parser ByteString
  107. quotedString e fp fq = A.scan [] quoteScanner
  108. where
  109. quoteScanner :: [QuoteScannerState] -> Char -> Maybe [QuoteScannerState]
  110. quoteScanner [] c
  111. | c `elem` "'\"" = Just [Quote]
  112. | c == e = Just [Escape]
  113. | c `elem` fp = Nothing
  114. | otherwise = Just []
  115. quoteScanner fs@(Quote:ss) c
  116. | c `elem` "'\"" = Just ss
  117. | c == e = Just (Escape:fs)
  118. | c `elem` fq = Nothing
  119. | otherwise = Just fs
  120. quoteScanner (Escape:ss) _ = Just ss