{-# LANGUAGE OverloadedStrings #-}

-- | Provides lexer parts for the [Parser]("Parser.Parser") module.
module Parser.Lexer where

import Control.Monad (void)
import Data.Text (Text, concat, pack, singleton)
import Data.Void (Void)
import Numeric (readBin, readDec, readHex, readOct)
import qualified Parser.Ast as Ast (Identifier)
import StdLib (StdLibFunction (name), lenFunction, panicFunction, printFunction, printlnFunction)
import Text.Megaparsec (MonadParsec (..), Parsec, anySingle, between, choice, many, oneOf, optional, sepBy1, sepEndBy, sepEndBy1, (<|>))
import Text.Megaparsec.Char (binDigitChar, char, char', digitChar, hexDigitChar, letterChar, newline, octDigitChar, space1)
import qualified Text.Megaparsec.Char.Lexer as L
import Prelude hiding (concat)

---------------------------------------------------Basic lexer parts----------------------------------------------------

-- * Basic lexer parts

-- | Parser monad.
type Parser = Parsec Void Text

-- | Space consumer, parses whitespace and comments.
sc :: Parser ()
sc :: Parser ()
sc = forall e s (m :: * -> *).
MonadParsec e s m =>
m () -> m () -> m () -> m ()
L.space forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space1 (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Tokens s -> m ()
L.skipLineComment Tokens Text
"//") (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Tokens s -> Tokens s -> m ()
L.skipBlockComment Tokens Text
"/*" Tokens Text
"*/")

-- | Lexeme, automatically parses trailing whitespace and comments.
lexeme :: Parser a -> Parser a
lexeme :: forall a. Parser a -> Parser a
lexeme = forall e s (m :: * -> *) a. MonadParsec e s m => m () -> m a -> m a
L.lexeme Parser ()
sc

-- | Symbol, automatically parses trailing whitespace and comments.
symbol :: Text -> Parser Text
symbol :: Text -> Parser Text
symbol = forall e s (m :: * -> *).
MonadParsec e s m =>
m () -> Tokens s -> m (Tokens s)
L.symbol Parser ()
sc

--------------------------------------------------------Symbols---------------------------------------------------------

-- * Symbols

-- | Wraps given parser with parenthesis.
parens :: Parser a -> Parser a
parens :: forall a. Parser a -> Parser a
parens = forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (Text -> Parser Text
symbol Text
"(") (Text -> Parser Text
symbol Text
")")

-- | Wraps given parser with braces.
braces :: Parser a -> Parser a
braces :: forall a. Parser a -> Parser a
braces = forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (Text -> Parser Text
symbol Text
"{") (Text -> Parser Text
symbol Text
"}")

-- | Wraps given parser with brackets.
brackets :: Parser a -> Parser a
brackets :: forall a. Parser a -> Parser a
brackets = forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (Text -> Parser Text
symbol Text
"[") (Text -> Parser Text
symbol Text
"]")

-- | Comma parser.
comma :: Parser Text
comma :: Parser Text
comma = Text -> Parser Text
symbol Text
","

-- | Semicolon parser.
semicolon :: Parser Text
semicolon :: Parser Text
semicolon = Text -> Parser Text
symbol Text
";"

-- | List parser.
listed :: Parser a -> Parser [a]
listed :: forall a. Parser a -> Parser [a]
listed Parser a
p = forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepEndBy Parser a
p Parser Text
comma

listed1 :: Parser a -> Parser [a]
listed1 :: forall a. Parser a -> Parser [a]
listed1 Parser a
p = forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepEndBy1 Parser a
p Parser Text
comma

-- | List parser.
listedInPar :: Parser a -> Parser [a]
listedInPar :: forall a. Parser a -> Parser [a]
listedInPar Parser a
p = forall a. Parser a -> Parser a
parens forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepEndBy Parser a
p Parser Text
comma

--------------------------------------------------------Literals--------------------------------------------------------

-- * Literals

-- | Integer literal parser.
intLitP :: Parser Integer
intLitP :: Parser Integer
intLitP = forall (f :: * -> *) (m :: * -> *) a.
(Foldable f, Alternative m) =>
f (m a) -> m a
choice forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Parser Integer
binaryInt, Parser Integer
octalInt, Parser Integer
hexInt, Parser Integer
decimalInt]

-- | Decimal integer literal parser.
decimalInt :: Parser Integer
decimalInt :: Parser Integer
decimalInt =
  forall a. Parser a -> Parser a
lexeme forall a b. (a -> b) -> a -> b
$
    (Integer
0 forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'0') forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> do
      Char
first <- forall (f :: * -> *) e s (m :: * -> *).
(Foldable f, MonadParsec e s m) =>
f (Token s) -> m (Token s)
oneOf [Char
'1' .. Char
'9']
      [Char]
other <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'_') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
digitChar
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ ReadS Integer -> [Char] -> Integer
readInteger forall a. (Eq a, Num a) => ReadS a
readDec forall a b. (a -> b) -> a -> b
$ Char
first forall a. a -> [a] -> [a]
: [Char]
other

-- | Binary integer literal parser.
binaryInt :: Parser Integer
binaryInt :: Parser Integer
binaryInt = forall a.
Parser a
-> ParsecT Void Text Identity Char
-> ReadS Integer
-> Parser Integer
abstractInt (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char' Char
'b') forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
binDigitChar forall a. (Eq a, Num a) => ReadS a
readBin

-- | Octal integer literal parser.
octalInt :: Parser Integer
octalInt :: Parser Integer
octalInt = forall a.
Parser a
-> ParsecT Void Text Identity Char
-> ReadS Integer
-> Parser Integer
abstractInt (forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char' Char
'o') forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
octDigitChar forall a. (Eq a, Num a) => ReadS a
readOct

-- | Hex integer literal parser.
hexInt :: Parser Integer
hexInt :: Parser Integer
hexInt = forall a.
Parser a
-> ParsecT Void Text Identity Char
-> ReadS Integer
-> Parser Integer
abstractInt (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char' Char
'x') forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
hexDigitChar forall a. (Eq a, Num a) => ReadS a
readHex

-- | Abstract integer parser, encapsulates integer parser structure.
abstractInt :: Parser a -> Parser Char -> ReadS Integer -> Parser Integer
abstractInt :: forall a.
Parser a
-> ParsecT Void Text Identity Char
-> ReadS Integer
-> Parser Integer
abstractInt Parser a
charIdP ParsecT Void Text Identity Char
digitP ReadS Integer
reader = forall a. Parser a -> Parser a
lexeme forall a b. (a -> b) -> a -> b
$ do
  forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'0' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser a
charIdP forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'_')
  [Char]
intStr <- forall (m :: * -> *) a sep. MonadPlus m => m a -> m sep -> m [a]
sepBy1 ParsecT Void Text Identity Char
digitP forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'_'
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ ReadS Integer -> [Char] -> Integer
readInteger ReadS Integer
reader [Char]
intStr

-- | Parse integer using given reader and integer string.
readInteger :: ReadS Integer -> String -> Integer
readInteger :: ReadS Integer -> [Char] -> Integer
readInteger ReadS Integer
reader [Char]
s = forall a b. (a, b) -> a
fst forall a b. (a -> b) -> a -> b
$ forall a. [a] -> a
head forall a b. (a -> b) -> a -> b
$ ReadS Integer
reader [Char]
s

-- | Boolean literal parser.
boolLitP :: Parser Bool
boolLitP :: Parser Bool
boolLitP = Bool
True forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser Text
idTrue forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Bool
False forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser Text
idFalse

-- | String literal parser.
stringLitP :: Parser Text
stringLitP :: Parser Text
stringLitP = forall a. Parser a -> Parser a
lexeme forall a b. (a -> b) -> a -> b
$ [Text] -> Text
concat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'"') (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'"') (forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many Parser Text
stringChar)

-- | String character parser.
stringChar :: Parser Text
stringChar :: Parser Text
stringChar = forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy (forall (f :: * -> *) (m :: * -> *) a.
(Foldable f, Alternative m) =>
f (m a) -> m a
choice [forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
newline, forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'\\', forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'"']) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Char -> Text
singleton forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text
escapedChar

-- | Escaped character parser.
escapedChar :: Parser Text
escapedChar :: Parser Text
escapedChar =
  forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'\\'
    forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) (m :: * -> *) a.
(Foldable f, Alternative m) =>
f (m a) -> m a
choice
      [ Text
"\a" forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'a',
        Text
"\b" forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'b',
        Text
"\f" forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'f',
        Text
"\n" forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'n',
        Text
"\r" forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'r',
        Text
"\t" forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
't',
        Text
"\v" forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'v',
        Text
"\\" forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'\\',
        Text
"\"" forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'\"'
      ]

------------------------------------------------Identifiers and reserved------------------------------------------------

-- * Identifiers and reserved

-- ** Identifier

-- | Custom identifier parser.
identifierP :: Parser Ast.Identifier
identifierP :: Parser Text
identifierP = do
  Text
n <- forall a. Parser a -> Parser a
lexeme forall a b. (a -> b) -> a -> b
$
    do
      Char
first <- ParsecT Void Text Identity (Token Text)
letterP
      [Char]
other <- forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many forall a b. (a -> b) -> a -> b
$ ParsecT Void Text Identity (Token Text)
letterP forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
digitChar
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Char] -> Text
pack forall a b. (a -> b) -> a -> b
$ Char
first forall a. a -> [a] -> [a]
: [Char]
other
  if Text
n forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ([Text]
keywords forall a. Semigroup a => a -> a -> a
<> [Text]
predeclaredIdentifiers)
    then forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"wrong identifier"
    else forall (m :: * -> *) a. Monad m => a -> m a
return Text
n
  where
    letterP :: ParsecT Void Text Identity (Token Text)
letterP = forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
letterChar forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'_'

-- ** Keywords

kwVar, kwFunc, kwReturn, kwIf, kwElse, kwFor, kwBreak, kwContinue :: Parser Text
kwVar', kwFunc', kwReturn', kwIf', kwElse', kwFor', kwBreak', kwContinue' :: Text

-- | Keywords.
keywords :: [Text]
keywords :: [Text]
keywords = [Text
kwVar', Text
kwFunc', Text
kwReturn', Text
kwIf', Text
kwElse', Text
kwFor', Text
kwBreak', Text
kwContinue']

-- | @var@ keyword parser.
kwVar :: Parser Text
kwVar = Text -> Parser Text
symbol Text
kwVar'

kwVar' :: Text
kwVar' = Text
"var"

-- | @func@ keyword parser.
kwFunc :: Parser Text
kwFunc = Text -> Parser Text
symbol Text
kwFunc'

kwFunc' :: Text
kwFunc' = Text
"func"

-- | @return@ keyword parser.
kwReturn :: Parser Text
kwReturn = Text -> Parser Text
symbol Text
kwReturn'

kwReturn' :: Text
kwReturn' = Text
"return"

-- | @if@ keyword parser.
kwIf :: Parser Text
kwIf = Text -> Parser Text
symbol Text
kwIf'

kwIf' :: Text
kwIf' = Text
"if"

-- | @else@ keyword parser.
kwElse :: Parser Text
kwElse = Text -> Parser Text
symbol Text
kwElse'

kwElse' :: Text
kwElse' = Text
"else"

-- | @for@ keyword parser.
kwFor :: Parser Text
kwFor = Text -> Parser Text
symbol Text
kwFor'

kwFor' :: Text
kwFor' = Text
"for"

-- | @break@ keyword parser.
kwBreak :: Parser Text
kwBreak = Text -> Parser Text
symbol Text
kwBreak'

kwBreak' :: Text
kwBreak' = Text
"break"

-- | @continue@ keyword parser.
kwContinue :: Parser Text
kwContinue = Text -> Parser Text
symbol Text
kwContinue'

kwContinue' :: Text
kwContinue' = Text
"continue"

-- ** Predeclared identifiers

idBool, idInt, idString, idTrue, idFalse, idNil, idLenFunc, idPrintFunc, idPrintlnFunc, idPanicFunc :: Parser Text
idBool', idInt', idString', idTrue', idFalse', idNil', idLenFunc', idPrintFunc', idPrintlnFunc', idPanicFunc' :: Text

-- | Predeclared identifiers.
predeclaredIdentifiers :: [Ast.Identifier]
predeclaredIdentifiers :: [Text]
predeclaredIdentifiers =
  [Text
idBool', Text
idInt', Text
idString', Text
idTrue', Text
idFalse', Text
idNil']
    forall a. Semigroup a => a -> a -> a
<> [Text
idLenFunc', Text
idPrintFunc', Text
idPrintlnFunc', Text
idPanicFunc']

-- | @bool@ identifier parser.
idBool :: Parser Text
idBool = Text -> Parser Text
symbol Text
idBool'

idBool' :: Text
idBool' = Text
"bool"

-- | @int@ identifier parser.
idInt :: Parser Text
idInt = Text -> Parser Text
symbol Text
idInt'

idInt' :: Text
idInt' = Text
"int"

-- | @string@ identifier parser.
idString :: Parser Text
idString = Text -> Parser Text
symbol Text
idString'

idString' :: Text
idString' = Text
"string"

-- | @true@ identifier parser.
idTrue :: Parser Text
idTrue = Text -> Parser Text
symbol Text
idTrue'

idTrue' :: Text
idTrue' = Text
"true"

-- | @false@ identifier parser.
idFalse :: Parser Text
idFalse = Text -> Parser Text
symbol Text
idFalse'

idFalse' :: Text
idFalse' = Text
"false"

-- | @nil@ identifier parser.
idNil :: Parser Text
idNil = Text -> Parser Text
symbol Text
idNil'

idNil' :: Text
idNil' = Text
"nil"

-- | @len@ identifier parser.
idLenFunc :: Parser Text
idLenFunc = Text -> Parser Text
symbol Text
idLenFunc'

idLenFunc' :: Text
idLenFunc' = StdLibFunction -> Text
name StdLibFunction
lenFunction

-- | @print@ identifier parser.
idPrintFunc :: Parser Text
idPrintFunc = Text -> Parser Text
symbol Text
idPrintFunc'

idPrintFunc' :: Text
idPrintFunc' = StdLibFunction -> Text
name StdLibFunction
printFunction

-- | @println@ identifier parser.
idPrintlnFunc :: Parser Text
idPrintlnFunc = Text -> Parser Text
symbol Text
idPrintlnFunc'

idPrintlnFunc' :: Text
idPrintlnFunc' = StdLibFunction -> Text
name StdLibFunction
printlnFunction

-- | @panic@ identifier parser.
idPanicFunc :: Parser Text
idPanicFunc = Text -> Parser Text
symbol Text
idPanicFunc'

idPanicFunc' :: Text
idPanicFunc' = StdLibFunction -> Text
name StdLibFunction
panicFunction