-- Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are met:
--
-- 1. Redistributions of source code must retain the above copyright notice, this
--    list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above copyright notice,
--    this list of conditions and the following disclaimer in the documentation
--    and/or other materials provided with the distribution.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

module Lexer (lexer) where

import Text.Parsec hiding (token, tokens)
import Text.Parsec.String
import Control.Monad (liftM)
import qualified Text.Parsec.Token as T
import Text.Parsec.Language (emptyDef)
import Token

keywords =
    [ (InKeyword, "in")
    , (OfKeyword, "of")
    , (OnKeyword, "on")
    , (AsKeyword, "as")
    , (WithKeyword, "with")
    , (FromKeyword, "from")
    , (ImportKeyword, "import")
    , (UsingKeyword, "using")
    , (ConstKeyword, "const")
    , (EnumKeyword, "enum")
    , (StructKeyword, "struct")
    , (UnionKeyword, "union")
    , (InterfaceKeyword, "interface")
    , (OptionKeyword, "option")
    ]

languageDef :: T.LanguageDef st
languageDef = emptyDef
    { T.commentLine = "#"
    , T.identStart = letter <|> char '_'
    , T.identLetter = alphaNum <|> char '_'
    , T.reservedNames = [name | (_, name) <- keywords]
    , T.opStart = T.opLetter languageDef
    , T.opLetter = fail "There are no operators."
    }

tokenParser = T.makeTokenParser languageDef

identifier     = T.identifier tokenParser
reserved       = T.reserved tokenParser
symbol         = T.symbol tokenParser
naturalOrFloat = T.naturalOrFloat tokenParser
braces         = T.braces tokenParser
parens         = T.parens tokenParser
brackets       = T.brackets tokenParser
whiteSpace     = T.whiteSpace tokenParser
stringLiteral  = T.stringLiteral tokenParser

keyword :: Parser Token
keyword = foldl1 (<|>) [reserved name >> return t | (t, name) <- keywords]

toLiteral :: Either Integer Double -> Token
toLiteral (Left i) = LiteralInt i
toLiteral (Right d) = LiteralFloat d

located :: Parser t -> Parser (Located t)
located p = do
    pos <- getPosition
    t <- p
    return (Located pos t)

token :: Parser Token
token = keyword
    <|> liftM Identifier         identifier
    <|> liftM ParenthesizedList  (parens (sepBy (many locatedToken) (symbol ",")))
    <|> liftM BracketedList      (brackets (sepBy (many locatedToken) (symbol ",")))
    <|> liftM toLiteral          naturalOrFloat
    <|> liftM LiteralString      stringLiteral
    <|> liftM (const AtSign)     (symbol "@")
    <|> liftM (const Colon)      (symbol ":")
    <|> liftM (const Period)     (symbol ".")
    <|> liftM (const EqualsSign) (symbol "=")
    <|> liftM (const MinusSign)  (symbol "-")
    <|> liftM (const ExclamationPoint) (symbol "!")
    <?> "token"

locatedToken = located token

statementEnd :: Parser (Maybe [Located Statement])
statementEnd = (symbol ";" >>= \_ -> return Nothing)
           <|> (braces (many locatedStatement) >>= \statements -> return (Just statements))

compileStatement :: [Located Token] -> Maybe [Located Statement] -> Statement
compileStatement tokens Nothing = Line tokens
compileStatement tokens (Just statements) = Block tokens statements

statement :: Parser Statement
statement = do
    tokens <- many locatedToken
    end <- statementEnd
    return (compileStatement tokens end)

locatedStatement = located statement

lexer :: Parser [Located Statement]
lexer = do
    whiteSpace
    tokens <- many locatedStatement
    eof
    return tokens