{-# LANGUAGE OverloadedStrings #-}

module Parser.Lexer
  ( Parser,
    sc,
    lexeme,
    symbol,
    colon,
    semicolon2,
    arrow,
    eq,
    leftPar,
    rightPar,
    unitLitP,
    boolLitP,
    intLitP,
    identifierP,
    kwLet,
    kwRec,
    kwIn,
    kwIf,
    kwThen,
    kwElse,
    kwFun,
    kwUnit,
    kwBool,
    kwInt,
  )
where

import Control.Monad (when)
import Data.Int (Int64)
import Data.Text (Text, pack)
import Data.Void (Void)
import Text.Megaparsec (MonadParsec (..), Parsec, choice, many, (<|>))
import Text.Megaparsec.Char (char, digitChar, letterChar, space1, string)
import qualified Text.Megaparsec.Char.Lexer as L
import Trees.Common (Identifier)

-- * Basic lexer parts

-- | Parser monad.
type Parser = Parsec Void Text

-- | Space consumer, parses whitespace and comments.
sc :: Parser ()
sc :: Parser ()
sc = Parser () -> Parser () -> Parser () -> Parser ()
forall e s (m :: * -> *).
MonadParsec e s m =>
m () -> m () -> m () -> m ()
L.space Parser ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space1 (Tokens Text -> Parser ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Tokens s -> m ()
L.skipLineComment Tokens Text
"//") (Tokens Text -> Tokens Text -> Parser ()
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> Tokens s -> m ()
L.skipBlockComment Tokens Text
"(*" Tokens Text
"*)")

-- | Lexeme, automatically parses trailing whitespace and comments.
lexeme :: Parser a -> Parser a
lexeme :: forall a. Parser a -> Parser a
lexeme = Parser ()
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall e s (m :: * -> *) a. MonadParsec e s m => m () -> m a -> m a
L.lexeme Parser ()
sc

-- | Symbol, automatically parses trailing whitespace and comments.
symbol :: Text -> Parser Text
symbol :: Text -> Parser Text
symbol = Parser ()
-> Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
m () -> Tokens s -> m (Tokens s)
L.symbol Parser ()
sc

-- * Symbols

-- | Colon parser.
colon :: Parser Text
colon :: Parser Text
colon = Text -> Parser Text
symbol Text
":"

-- | Double semicolon parser.
semicolon2 :: Parser Text
semicolon2 :: Parser Text
semicolon2 = Text -> Parser Text
symbol Text
";;"

-- | Arrow parser.
arrow :: Parser Text
arrow :: Parser Text
arrow = Text -> Parser Text
symbol Text
"->"

-- | Equality parser.
eq :: Parser Text
eq :: Parser Text
eq = Text -> Parser Text
symbol Text
"="

-- | Left parenthesis parser.
leftPar :: Parser Text
leftPar :: Parser Text
leftPar = Text -> Parser Text
symbol Text
"("

-- | Right parenthesis parser.
rightPar :: Parser Text
rightPar :: Parser Text
rightPar = Text -> Parser Text
symbol Text
")"

-- * Literals

-- | Unit literal parser.
unitLitP :: Parser Text
unitLitP :: Parser Text
unitLitP = Parser Text
leftPar Parser Text -> Parser Text -> Parser Text
forall a b.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity b -> ParsecT Void Text Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Text
rightPar

-- | Boolean literal parser.
boolLitP :: Parser Bool
boolLitP :: Parser Bool
boolLitP = Bool
True Bool -> Parser Text -> Parser Bool
forall a b.
a -> ParsecT Void Text Identity b -> ParsecT Void Text Identity a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser Text
kwTrue Parser Bool -> Parser Bool -> Parser Bool
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Bool
False Bool -> Parser Text -> Parser Bool
forall a b.
a -> ParsecT Void Text Identity b -> ParsecT Void Text Identity a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Parser Text
kwFalse

-- | Decimal integer literal parser.
intLitP :: Parser Int64
intLitP :: Parser Int64
intLitP = do
  Integer
int <- Parser Integer -> Parser Integer
forall a. Parser a -> Parser a
lexeme Parser Integer
forall e s (m :: * -> *) a.
(MonadParsec e s m, Token s ~ Char, Num a) =>
m a
L.decimal
  Bool -> Parser () -> Parser ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Integer
int Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
> Integer
absMax) (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$
    String -> Parser ()
forall a. String -> ParsecT Void Text Identity a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Error: Integer literal exceeds the range of representable integers of type int64"
  Int64 -> Parser Int64
forall a. a -> ParsecT Void Text Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int64 -> Parser Int64) -> Int64 -> Parser Int64
forall a b. (a -> b) -> a -> b
$ Integer -> Int64
forall a. Num a => Integer -> a
fromInteger Integer
int
  where
    absMax :: Integer
absMax = Integer
9223372036854775808

-- * Identifiers and keywords

-- ** Identifier

identifierP :: Parser Identifier
identifierP :: Parser Text
identifierP = Parser ()
notReserved Parser () -> Parser Text -> Parser Text
forall a b.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity b -> ParsecT Void Text Identity b
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
identifier
  where
    identifier :: Parser Text
identifier = Parser Text -> Parser Text
forall a. Parser a -> Parser a
lexeme (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ do
      Char
first <- ParsecT Void Text Identity Char
ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
letterChar ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'_'
      String
other <- ParsecT Void Text Identity Char
-> ParsecT Void Text Identity String
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many ParsecT Void Text Identity Char
identifierChar
      Text -> Parser Text
forall a. a -> ParsecT Void Text Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text) -> Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ String -> Text
pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Char
first Char -> String -> String
forall a. a -> [a] -> [a]
: String
other
    notReserved :: Parser ()
notReserved =
      Parser Text -> Parser ()
forall a. ParsecT Void Text Identity a -> Parser ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy (Parser Text -> Parser ()) -> Parser Text -> Parser ()
forall a b. (a -> b) -> a -> b
$
        [Parser Text] -> Parser Text
forall (f :: * -> *) (m :: * -> *) a.
(Foldable f, Alternative m) =>
f (m a) -> m a
choice [Parser Text
kwLet, Parser Text
kwRec, Parser Text
kwIn, Parser Text
kwIf, Parser Text
kwThen, Parser Text
kwElse, Parser Text
kwFun, Parser Text
kwTrue, Parser Text
kwFalse, Parser Text
kwUnit, Parser Text
kwBool, Parser Text
kwInt]

keyword :: Text -> Parser Text
keyword :: Text -> Parser Text
keyword Text
ident = Parser Text -> Parser Text
forall a. Parser a -> Parser a
lexeme (Parser Text -> Parser Text) -> Parser Text -> Parser Text
forall a b. (a -> b) -> a -> b
$ Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Text
Tokens Text
ident Parser Text -> Parser () -> Parser Text
forall a b.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity b -> ParsecT Void Text Identity a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void Text Identity Char -> Parser ()
forall a. ParsecT Void Text Identity a -> Parser ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m ()
notFollowedBy ParsecT Void Text Identity Char
identifierChar

identifierChar :: Parser Char
identifierChar :: ParsecT Void Text Identity Char
identifierChar = ParsecT Void Text Identity Char
ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
letterChar ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'_' ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char
forall a.
ParsecT Void Text Identity a
-> ParsecT Void Text Identity a -> ParsecT Void Text Identity a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ParsecT Void Text Identity Char
ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Token s)
digitChar

-- ** Keywords

-- | @let@ keyword parser.
kwLet :: Parser Text
kwLet :: Parser Text
kwLet = Text -> Parser Text
keyword Text
"let"

-- | @rec@ keyword parser.
kwRec :: Parser Text
kwRec :: Parser Text
kwRec = Text -> Parser Text
keyword Text
"rec"

-- | @in@ keyword parser.
kwIn :: Parser Text
kwIn :: Parser Text
kwIn = Text -> Parser Text
keyword Text
"in"

-- | @if@ keyword parser.
kwIf :: Parser Text
kwIf :: Parser Text
kwIf = Text -> Parser Text
keyword Text
"if"

-- | @then@ keyword parser.
kwThen :: Parser Text
kwThen :: Parser Text
kwThen = Text -> Parser Text
keyword Text
"then"

-- | @else@ keyword parser.
kwElse :: Parser Text
kwElse :: Parser Text
kwElse = Text -> Parser Text
keyword Text
"else"

-- | @fun@ keyword parser.
kwFun :: Parser Text
kwFun :: Parser Text
kwFun = Text -> Parser Text
keyword Text
"fun"

-- | @true@ keyword parser.
kwTrue :: Parser Text
kwTrue :: Parser Text
kwTrue = Text -> Parser Text
keyword Text
"true"

-- | @false@ keyword parser.
kwFalse :: Parser Text
kwFalse :: Parser Text
kwFalse = Text -> Parser Text
keyword Text
"false"

-- | @unit@ keyword parser.
kwUnit :: Parser Text
kwUnit :: Parser Text
kwUnit = Text -> Parser Text
keyword Text
"unit"

-- | @bool@ keyword parser.
kwBool :: Parser Text
kwBool :: Parser Text
kwBool = Text -> Parser Text
keyword Text
"bool"

-- | @int@ keyword parser.
kwInt :: Parser Text
kwInt :: Parser Text
kwInt = Text -> Parser Text
keyword Text
"int"