{-# OPTIONS #-}
module Language.Python.Common.LexerUtils where
import Control.Monad (liftM)
import Data.List (foldl')
import Data.Word (Word8)
import Language.Python.Common.Token as Token
import Language.Python.Common.ParserMonad hiding (location)
import Language.Python.Common.SrcLocation
import Codec.Binary.UTF8.String as UTF8 (encode)
type Byte = Word8
data BO = BOF | BOL
type StartCode = Int
type Action = SrcSpan -> Int -> String -> P Token
lineJoin :: Action
lineJoin :: Action
lineJoin span :: SrcSpan
span _len :: Int
_len _str :: String
_str =
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> Token
LineJoinToken (SrcSpan -> Token) -> SrcSpan -> Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> SrcSpan
spanStartPoint SrcSpan
span
endOfLine :: P Token -> Action
endOfLine :: StateT ParseState (Either ParseError) Token -> Action
endOfLine lexToken :: StateT ParseState (Either ParseError) Token
lexToken span :: SrcSpan
span _len :: Int
_len _str :: String
_str = do
SrcSpan -> P ()
setLastEOL (SrcSpan -> P ()) -> SrcSpan -> P ()
forall a b. (a -> b) -> a -> b
$ SrcSpan -> SrcSpan
spanStartPoint SrcSpan
span
StateT ParseState (Either ParseError) Token
lexToken
bolEndOfLine :: P Token -> Int -> Action
bolEndOfLine :: StateT ParseState (Either ParseError) Token -> Int -> Action
bolEndOfLine lexToken :: StateT ParseState (Either ParseError) Token
lexToken bol :: Int
bol span :: SrcSpan
span len :: Int
len inp :: String
inp = do
Int -> P ()
pushStartCode Int
bol
StateT ParseState (Either ParseError) Token -> Action
endOfLine StateT ParseState (Either ParseError) Token
lexToken SrcSpan
span Int
len String
inp
dedentation :: P Token -> Action
dedentation :: StateT ParseState (Either ParseError) Token -> Action
dedentation lexToken :: StateT ParseState (Either ParseError) Token
lexToken span :: SrcSpan
span _len :: Int
_len _str :: String
_str = do
Int
topIndent <- P Int
getIndent
case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (SrcSpan -> Int
startCol SrcSpan
span) Int
topIndent of
EQ -> do P ()
popStartCode
StateT ParseState (Either ParseError) Token
lexToken
LT -> do P ()
popIndent
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
dedentToken
GT -> SrcSpan -> String -> StateT ParseState (Either ParseError) Token
forall a b. Span a => a -> String -> P b
spanError SrcSpan
span "indentation error"
indentation :: P Token -> Int -> BO -> Action
indentation :: StateT ParseState (Either ParseError) Token -> Int -> BO -> Action
indentation lexToken :: StateT ParseState (Either ParseError) Token
lexToken _dedentCode :: Int
_dedentCode bo :: BO
bo _loc :: SrcSpan
_loc _len :: Int
_len [] = do
P ()
popStartCode
case BO
bo of
BOF -> StateT ParseState (Either ParseError) Token
lexToken
BOL -> StateT ParseState (Either ParseError) Token
newlineToken
indentation lexToken :: StateT ParseState (Either ParseError) Token
lexToken dedentCode :: Int
dedentCode bo :: BO
bo span :: SrcSpan
span _len :: Int
_len _str :: String
_str = do
P ()
popStartCode
Int
parenDepth <- P Int
getParenStackDepth
if Int
parenDepth Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> 0
then StateT ParseState (Either ParseError) Token
lexToken
else do
Int
topIndent <- P Int
getIndent
case Int -> Int -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (SrcSpan -> Int
startCol SrcSpan
span) Int
topIndent of
EQ -> case BO
bo of
BOF -> StateT ParseState (Either ParseError) Token
lexToken
BOL -> StateT ParseState (Either ParseError) Token
newlineToken
LT -> do Int -> P ()
pushStartCode Int
dedentCode
StateT ParseState (Either ParseError) Token
newlineToken
GT -> do Int -> P ()
pushIndent (SrcSpan -> Int
startCol SrcSpan
span)
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
indentToken
where
indentToken :: Token
indentToken = SrcSpan -> Token
IndentToken SrcSpan
span
symbolToken :: (SrcSpan -> Token) -> Action
symbolToken :: (SrcSpan -> Token) -> Action
symbolToken mkToken :: SrcSpan -> Token
mkToken location :: SrcSpan
location _ _ = Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (SrcSpan -> Token
mkToken SrcSpan
location)
token :: (SrcSpan -> String -> a -> Token) -> (String -> a) -> Action
token :: (SrcSpan -> String -> a -> Token) -> (String -> a) -> Action
token mkToken :: SrcSpan -> String -> a -> Token
mkToken read :: String -> a
read location :: SrcSpan
location len :: Int
len str :: String
str
= Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> String -> a -> Token
mkToken SrcSpan
location String
literal (String -> a
read String
literal)
where
literal :: String
literal = Int -> String -> String
forall a. Int -> [a] -> [a]
take Int
len String
str
endOfFileToken :: Token
endOfFileToken :: Token
endOfFileToken = SrcSpan -> Token
EOFToken SrcSpan
SpanEmpty
dedentToken :: Token
dedentToken = SrcSpan -> Token
DedentToken SrcSpan
SpanEmpty
newlineToken :: P Token
newlineToken :: StateT ParseState (Either ParseError) Token
newlineToken = do
SrcSpan
loc <- P SrcSpan
getLastEOL
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> Token
NewlineToken SrcSpan
loc
atEOLorEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
atEOLorEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
atEOLorEOF _user :: a
_user _inputBeforeToken :: AlexInput
_inputBeforeToken _tokenLength :: Int
_tokenLength (_loc :: SrcLocation
_loc, _bs :: [Byte]
_bs, inputAfterToken :: String
inputAfterToken)
= String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
inputAfterToken Bool -> Bool -> Bool
|| Char
nextChar Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\n' Bool -> Bool -> Bool
|| Char
nextChar Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '\r'
where
nextChar :: Char
nextChar = String -> Char
forall a. [a] -> a
head String
inputAfterToken
notEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
notEOF :: a -> AlexInput -> Int -> AlexInput -> Bool
notEOF _user :: a
_user _inputBeforeToken :: AlexInput
_inputBeforeToken _tokenLength :: Int
_tokenLength (_loc :: SrcLocation
_loc, _bs :: [Byte]
_bs, inputAfterToken :: String
inputAfterToken)
= Bool -> Bool
not (String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
inputAfterToken)
delUnderscores :: String -> String
delUnderscores :: String -> String
delUnderscores [] = []
delUnderscores ('_':xs :: String
xs) = String -> String
delUnderscores String
xs
delUnderscores (x :: Char
x :xs :: String
xs) = Char
x Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
delUnderscores String
xs
readBinary :: String -> Integer
readBinary :: String -> Integer
readBinary
= String -> Integer
toBinary (String -> Integer) -> (String -> String) -> String -> Integer
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> String
forall a. Int -> [a] -> [a]
drop 2
where
toBinary :: String -> Integer
toBinary = (Integer -> Char -> Integer) -> Integer -> String -> Integer
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Integer -> Char -> Integer
forall a. Num a => a -> Char -> a
acc 0
acc :: a -> Char -> a
acc b :: a
b '0' = 2 a -> a -> a
forall a. Num a => a -> a -> a
* a
b
acc b :: a
b '1' = 2 a -> a -> a
forall a. Num a => a -> a -> a
* a
b a -> a -> a
forall a. Num a => a -> a -> a
+ 1
acc _ _ = String -> a
forall a. HasCallStack => String -> a
error "Lexer ensures all digits passed to readBinary are 0 or 1."
readFloat :: String -> Double
readFloat :: String -> Double
readFloat str :: String
str@('.':cs :: String
cs) = String -> Double
forall a. Read a => String -> a
read ('0'Char -> String -> String
forall a. a -> [a] -> [a]
:String -> String
readFloatRest String
str)
readFloat str :: String
str = String -> Double
forall a. Read a => String -> a
read (String -> String
readFloatRest String
str)
readFloatRest :: String -> String
readFloatRest :: String -> String
readFloatRest [] = []
readFloatRest ['.'] = ".0"
readFloatRest (c :: Char
c:cs :: String
cs) = Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String -> String
readFloatRest String
cs
mkString :: (SrcSpan -> String -> Token) -> Action
mkString :: (SrcSpan -> String -> Token) -> Action
mkString toToken :: SrcSpan -> String -> Token
toToken loc :: SrcSpan
loc len :: Int
len str :: String
str = do
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> StateT ParseState (Either ParseError) Token)
-> Token -> StateT ParseState (Either ParseError) Token
forall a b. (a -> b) -> a -> b
$ SrcSpan -> String -> Token
toToken SrcSpan
loc (Int -> String -> String
forall a. Int -> [a] -> [a]
take Int
len String
str)
stringToken :: SrcSpan -> String -> Token
stringToken :: SrcSpan -> String -> Token
stringToken = SrcSpan -> String -> Token
StringToken
rawStringToken :: SrcSpan -> String -> Token
rawStringToken :: SrcSpan -> String -> Token
rawStringToken = SrcSpan -> String -> Token
StringToken
byteStringToken :: SrcSpan -> String -> Token
byteStringToken :: SrcSpan -> String -> Token
byteStringToken = SrcSpan -> String -> Token
ByteStringToken
formatStringToken :: SrcSpan -> String -> Token
formatStringToken :: SrcSpan -> String -> Token
formatStringToken = SrcSpan -> String -> Token
StringToken
formatRawStringToken :: SrcSpan -> String -> Token
formatRawStringToken :: SrcSpan -> String -> Token
formatRawStringToken = SrcSpan -> String -> Token
StringToken
unicodeStringToken :: SrcSpan -> String -> Token
unicodeStringToken :: SrcSpan -> String -> Token
unicodeStringToken = SrcSpan -> String -> Token
UnicodeStringToken
rawByteStringToken :: SrcSpan -> String -> Token
rawByteStringToken :: SrcSpan -> String -> Token
rawByteStringToken = SrcSpan -> String -> Token
ByteStringToken
openParen :: (SrcSpan -> Token) -> Action
openParen :: (SrcSpan -> Token) -> Action
openParen mkToken :: SrcSpan -> Token
mkToken loc :: SrcSpan
loc _len :: Int
_len _str :: String
_str = do
let token :: Token
token = SrcSpan -> Token
mkToken SrcSpan
loc
Token -> P ()
pushParen Token
token
Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
token
closeParen :: (SrcSpan -> Token) -> Action
closeParen :: (SrcSpan -> Token) -> Action
closeParen mkToken :: SrcSpan -> Token
mkToken loc :: SrcSpan
loc _len :: Int
_len _str :: String
_str = do
let token :: Token
token = SrcSpan -> Token
mkToken SrcSpan
loc
Maybe Token
topParen <- P (Maybe Token)
getParen
case Maybe Token
topParen of
Nothing -> SrcSpan -> String -> StateT ParseState (Either ParseError) Token
forall a b. Span a => a -> String -> P b
spanError SrcSpan
loc String
err1
Just open :: Token
open -> if Token -> Token -> Bool
matchParen Token
open Token
token
then P ()
popParen P ()
-> StateT ParseState (Either ParseError) Token
-> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Token -> StateT ParseState (Either ParseError) Token
forall (m :: * -> *) a. Monad m => a -> m a
return Token
token
else SrcSpan -> String -> StateT ParseState (Either ParseError) Token
forall a b. Span a => a -> String -> P b
spanError SrcSpan
loc String
err2
where
err1 :: String
err1 = "Lexical error ! unmatched closing paren"
err2 :: String
err2 = "Lexical error ! unmatched closing paren"
matchParen :: Token -> Token -> Bool
matchParen :: Token -> Token -> Bool
matchParen (LeftRoundBracketToken {}) (RightRoundBracketToken {}) = Bool
True
matchParen (LeftBraceToken {}) (RightBraceToken {}) = Bool
True
matchParen (LeftSquareBracketToken {}) (RightSquareBracketToken {}) = Bool
True
matchParen _ _ = Bool
False
type AlexInput = (SrcLocation,
[Byte],
String)
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar _ = String -> Char
forall a. HasCallStack => String -> a
error "alexInputPrevChar not used"
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar (loc :: SrcLocation
loc, [], input :: String
input)
| String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
input = Maybe (Char, AlexInput)
forall a. Maybe a
Nothing
| Bool
otherwise = SrcLocation -> Maybe (Char, AlexInput) -> Maybe (Char, AlexInput)
forall a b. a -> b -> b
seq SrcLocation
nextLoc ((Char, AlexInput) -> Maybe (Char, AlexInput)
forall a. a -> Maybe a
Just (Char
nextChar, (SrcLocation
nextLoc, [], String
rest)))
where
nextChar :: Char
nextChar = String -> Char
forall a. [a] -> a
head String
input
rest :: String
rest = String -> String
forall a. [a] -> [a]
tail String
input
nextLoc :: SrcLocation
nextLoc = Char -> SrcLocation -> SrcLocation
moveChar Char
nextChar SrcLocation
loc
alexGetChar (loc :: SrcLocation
loc, _:_, _) = String -> Maybe (Char, AlexInput)
forall a. HasCallStack => String -> a
error "alexGetChar called with non-empty byte buffer"
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
alexGetByte (loc :: SrcLocation
loc, b :: Byte
b:bs :: [Byte]
bs, input :: String
input) = (Byte, AlexInput) -> Maybe (Byte, AlexInput)
forall a. a -> Maybe a
Just (Byte
b, (SrcLocation
loc, [Byte]
bs, String
input))
alexGetByte (loc :: SrcLocation
loc, [], []) = Maybe (Byte, AlexInput)
forall a. Maybe a
Nothing
alexGetByte (loc :: SrcLocation
loc, [], nextChar :: Char
nextChar:rest :: String
rest) =
SrcLocation -> Maybe (Byte, AlexInput) -> Maybe (Byte, AlexInput)
forall a b. a -> b -> b
seq SrcLocation
nextLoc ((Byte, AlexInput) -> Maybe (Byte, AlexInput)
forall a. a -> Maybe a
Just (Byte
byte, (SrcLocation
nextLoc, [Byte]
restBytes, String
rest)))
where
nextLoc :: SrcLocation
nextLoc = Char -> SrcLocation -> SrcLocation
moveChar Char
nextChar SrcLocation
loc
byte :: Byte
byte:restBytes :: [Byte]
restBytes = String -> [Byte]
UTF8.encode [Char
nextChar]
moveChar :: Char -> SrcLocation -> SrcLocation
moveChar :: Char -> SrcLocation -> SrcLocation
moveChar '\n' = Int -> SrcLocation -> SrcLocation
incLine 1
moveChar '\t' = SrcLocation -> SrcLocation
incTab
moveChar '\r' = SrcLocation -> SrcLocation
forall a. a -> a
id
moveChar _ = Int -> SrcLocation -> SrcLocation
incColumn 1
lexicalError :: P a
lexicalError :: P a
lexicalError = do
SrcLocation
location <- P SrcLocation
getLocation
Char
c <- (String -> Char)
-> StateT ParseState (Either ParseError) String
-> StateT ParseState (Either ParseError) Char
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM String -> Char
forall a. [a] -> a
head StateT ParseState (Either ParseError) String
getInput
ParseError -> P a
forall a. ParseError -> P a
throwError (ParseError -> P a) -> ParseError -> P a
forall a b. (a -> b) -> a -> b
$ Char -> SrcLocation -> ParseError
UnexpectedChar Char
c SrcLocation
location
readOctNoO :: String -> Integer
readOctNoO :: String -> Integer
readOctNoO (zero :: Char
zero:rest :: String
rest) = String -> Integer
forall a. Read a => String -> a
read (Char
zeroChar -> String -> String
forall a. a -> [a] -> [a]
:'O'Char -> String -> String
forall a. a -> [a] -> [a]
:String
rest)
readOctNoO [] = String -> Integer
forall a. HasCallStack => String -> a
error "Lexer ensures readOctNoO is never called on an empty string"