Commit 6bb49ca7 authored by Kenton Varda's avatar Kenton Varda

Day 1: Learn Haskell, write a parser.

parents
import Distribution.Simple
main = defaultMain
name: capnproto-compiler
version: 0.1
cabal-version: >=1.2
build-type: Simple
author: kenton
executable capnproto-compiler
hs-source-dirs: src
main-is: Main.hs
build-depends:
base >= 4,
parsec,
mtl,
containers
ghc-options: -Wall -fno-warn-missing-signatures
other-modules:
Lexer,
Token,
Grammar,
Parser
-- Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are met:
--
-- 1. Redistributions of source code must retain the above copyright notice, this
-- list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above copyright notice,
-- this list of conditions and the following disclaimer in the documentation
-- and/or other materials provided with the distribution.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
module Grammar where
data DeclName = AbsoluteName String
| RelativeName String
| ImportName String
| MemberName DeclName String
deriving (Show)
data TypeExpression = TypeName DeclName
| Array TypeExpression
deriving (Show)
data FieldValue = VoidFieldValue
| BoolFieldValue Bool
| IntegerFieldValue Integer
| FloatFieldValue Double
| StringFieldValue String
| ArrayFieldValue [FieldValue]
| RecordFieldValue [(String, FieldValue)]
deriving (Show)
data Declaration = ConstantDecl String (Maybe TypeExpression) FieldValue
| EnumDecl String [Declaration]
| EnumValueDecl String Integer [Declaration]
| ClassDecl String [Declaration]
| FieldDecl String Integer TypeExpression FieldValue [Declaration]
| InterfaceDecl String [Declaration]
| MethodDecl String [(String, TypeExpression, FieldValue)]
TypeExpression [Declaration]
| OptionDecl DeclName FieldValue
deriving (Show)
-- Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are met:
--
-- 1. Redistributions of source code must retain the above copyright notice, this
-- list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above copyright notice,
-- this list of conditions and the following disclaimer in the documentation
-- and/or other materials provided with the distribution.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
module Lexer (lexer) where
import Text.Parsec hiding (token, tokens)
import Text.Parsec.String
import Control.Monad (liftM)
import qualified Text.Parsec.Token as T
import Text.Parsec.Language (emptyDef)
import Token
keywords =
[ (ImportKeyword, "import")
, (ConstKeyword, "const")
, (EnumKeyword, "enum")
, (ClassKeyword, "class")
, (InterfaceKeyword, "interface")
, (OptionKeyword, "option")
]
languageDef :: T.LanguageDef st
languageDef = emptyDef
{ T.commentLine = "#"
, T.identStart = letter <|> char '_'
, T.identLetter = alphaNum <|> char '_'
, T.reservedNames = [name | (_, name) <- keywords]
, T.opStart = T.opLetter languageDef
, T.opLetter = fail "There are no operators."
}
tokenParser = T.makeTokenParser languageDef
identifier = T.identifier tokenParser
reserved = T.reserved tokenParser
symbol = T.symbol tokenParser
naturalOrFloat = T.naturalOrFloat tokenParser
braces = T.braces tokenParser
parens = T.parens tokenParser
brackets = T.brackets tokenParser
whiteSpace = T.whiteSpace tokenParser
stringLiteral = T.stringLiteral tokenParser
keyword :: Parser Token
keyword = foldl1 (<|>) [reserved name >> return t | (t, name) <- keywords]
toLiteral :: Either Integer Double -> Token
toLiteral (Left i) = LiteralInt i
toLiteral (Right d) = LiteralFloat d
located :: Parser t -> Parser (Located t)
located p = do
pos <- getPosition
t <- p
return (Located pos t)
token :: Parser Token
token = keyword
<|> liftM Identifier identifier
<|> liftM ParenthesizedList (parens (sepBy (many locatedToken) (symbol ",")))
<|> liftM BracketedList (brackets (sepBy (many locatedToken) (symbol ",")))
<|> liftM toLiteral naturalOrFloat
<|> liftM LiteralString stringLiteral
<|> liftM (const AtSign) (symbol "@")
<|> liftM (const Colon) (symbol ":")
<|> liftM (const Period) (symbol ".")
<|> liftM (const EqualsSign) (symbol "=")
<?> "token"
locatedToken = located token
statementEnd :: Parser (Maybe [Located Statement])
statementEnd = (symbol ";" >>= \_ -> return Nothing)
<|> (braces (many locatedStatement) >>= \statements -> return (Just statements))
compileStatement :: [Located Token] -> Maybe [Located Statement] -> Statement
compileStatement tokens Nothing = Line tokens
compileStatement tokens (Just statements) = Block tokens statements
statement :: Parser Statement
statement = do
tokens <- many locatedToken
end <- statementEnd
return (compileStatement tokens end)
locatedStatement = located statement
lexer :: Parser [Located Statement]
lexer = do
whiteSpace
tokens <- many locatedStatement
eof
return tokens
-- Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are met:
--
-- 1. Redistributions of source code must retain the above copyright notice, this
-- list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above copyright notice,
-- this list of conditions and the following disclaimer in the documentation
-- and/or other materials provided with the distribution.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
module Main ( main ) where
main::IO()
main = undefined
-- Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are met:
--
-- 1. Redistributions of source code must retain the above copyright notice, this
-- list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above copyright notice,
-- this list of conditions and the following disclaimer in the documentation
-- and/or other materials provided with the distribution.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
module Parser (parseFile) where
import Text.Parsec hiding (tokens)
import Token
import Control.Monad (liftM)
import Grammar
import Lexer (lexer)
tokenParser :: (Located Token -> Maybe a) -> Parsec [Located Token] u a
tokenParser = token (show . locatedValue) locatedPos
type TokenParser = Parsec [Located Token] [ParseError]
-- Hmm, boilerplate is not supposed to happen in Haskell.
matchIdentifier t = case locatedValue t of { (Identifier v) -> Just v; _ -> Nothing }
matchParenthesizedList t = case locatedValue t of { (ParenthesizedList v) -> Just v; _ -> Nothing }
matchBracketedList t = case locatedValue t of { (BracketedList v) -> Just v; _ -> Nothing }
matchLiteralInt t = case locatedValue t of { (LiteralInt v) -> Just v; _ -> Nothing }
matchLiteralFloat t = case locatedValue t of { (LiteralFloat v) -> Just v; _ -> Nothing }
matchLiteralString t = case locatedValue t of { (LiteralString v) -> Just v; _ -> Nothing }
matchSimpleToken expected t = if locatedValue t == expected then Just () else Nothing
identifier = tokenParser matchIdentifier
literalInt = tokenParser matchLiteralInt
literalFloat = tokenParser matchLiteralFloat
literalString = tokenParser matchLiteralString
atSign = tokenParser (matchSimpleToken AtSign)
colon = tokenParser (matchSimpleToken Colon)
period = tokenParser (matchSimpleToken Period)
equalsSign = tokenParser (matchSimpleToken EqualsSign)
importKeyword = tokenParser (matchSimpleToken ImportKeyword)
constKeyword = tokenParser (matchSimpleToken ConstKeyword)
enumKeyword = tokenParser (matchSimpleToken EnumKeyword)
classKeyword = tokenParser (matchSimpleToken ClassKeyword)
interfaceKeyword = tokenParser (matchSimpleToken InterfaceKeyword)
optionKeyword = tokenParser (matchSimpleToken OptionKeyword)
parenthesizedList parser = do
items <- tokenParser matchParenthesizedList
parseList parser items
bracketedList parser = do
items <- tokenParser matchBracketedList
parseList parser items
declNameBase :: TokenParser DeclName
declNameBase = liftM ImportName (importKeyword >> literalString)
<|> liftM AbsoluteName (period >> identifier)
<|> liftM RelativeName identifier
declName :: TokenParser DeclName
declName = do
base <- declNameBase
members <- many (period >> identifier)
return (foldl MemberName base members :: DeclName)
typeExpression :: TokenParser TypeExpression
typeExpression = do
name <- declName
suffixes <- many (bracketedList (fail "Brackets should be empty."))
return (applySuffixes (TypeName name) (length suffixes)) where
applySuffixes t 0 = t
applySuffixes t n = applySuffixes (Array t) (n - 1)
topLine :: Maybe [Located Statement] -> TokenParser Declaration
topLine Nothing = optionDecl <|> constantDecl <|> implicitConstantDecl
topLine (Just statements) = typeDecl statements
constantDecl = do
constKeyword
implicitConstantDecl
implicitConstantDecl = do
name <- identifier
typeName <- optionMaybe (period >> typeExpression)
equalsSign
value <- fieldValue
return (ConstantDecl name typeName value)
typeDecl statements = enumDecl statements
<|> classDecl statements
<|> interfaceDecl statements
enumDecl statements = do
enumKeyword
name <- identifier
children <- parseBlock enumLine statements
return (EnumDecl name children)
enumLine :: Maybe [Located Statement] -> TokenParser Declaration
enumLine Nothing = optionDecl <|> enumValueDecl []
enumLine (Just statements) = enumValueDecl statements
enumValueDecl statements = do
name <- identifier
equalsSign
value <- literalInt
children <- parseBlock enumValueLine statements
return (EnumValueDecl name value children)
enumValueLine :: Maybe [Located Statement] -> TokenParser Declaration
enumValueLine Nothing = optionDecl
enumValueLine (Just _) = fail "Blocks not allowed here."
classDecl statements = do
classKeyword
name <- identifier
children <- parseBlock classLine statements
return (ClassDecl name children)
classLine :: Maybe [Located Statement] -> TokenParser Declaration
classLine Nothing = optionDecl <|> constantDecl <|> fieldDecl []
classLine (Just statements) = typeDecl statements <|> fieldDecl statements
fieldDecl statements = do
name <- identifier
atSign
ordinal <- literalInt
colon
t <- typeExpression
value <- option VoidFieldValue (equalsSign >> fieldValue)
children <- parseBlock fieldLine statements
return (FieldDecl name ordinal t value children)
fieldValue = liftM IntegerFieldValue literalInt
<|> liftM FloatFieldValue literalFloat
<|> liftM StringFieldValue literalString
<|> liftM ArrayFieldValue (bracketedList fieldValue)
<|> liftM RecordFieldValue (parenthesizedList fieldAssignment)
fieldAssignment = do
name <- identifier
equalsSign
value <- fieldValue
return (name, value)
fieldLine :: Maybe [Located Statement] -> TokenParser Declaration
fieldLine Nothing = optionDecl
fieldLine (Just _) = fail "Blocks not allowed here."
interfaceDecl statements = do
interfaceKeyword
name <- identifier
children <- parseBlock interfaceLine statements
return (InterfaceDecl name children)
interfaceLine :: Maybe [Located Statement] -> TokenParser Declaration
interfaceLine Nothing = optionDecl <|> constantDecl <|> methodDecl []
interfaceLine (Just statements) = typeDecl statements <|> methodDecl statements
methodDecl statements = do
name <- identifier
params <- parenthesizedList paramDecl
t <- typeExpression
children <- parseBlock methodLine statements
return (MethodDecl name params t children)
paramDecl = do
name <- identifier
colon
t <- typeExpression
value <- option VoidFieldValue (equalsSign >> fieldValue)
return (name, t, value)
methodLine :: Maybe [Located Statement] -> TokenParser Declaration
methodLine Nothing = optionDecl
methodLine (Just _) = fail "Blocks not allowed here."
optionDecl = do
optionKeyword
name <- declName
equalsSign
value <- fieldValue
return (OptionDecl name value)
extractErrors :: Either ParseError (a, [ParseError]) -> [ParseError]
extractErrors (Left err) = [err]
extractErrors (Right (_, errors)) = errors
parseList parser items = finish where
results = map (parseCollectingErrors parser) items
finish = do
modifyState (\old -> concat (old:map extractErrors results))
return [ result | Right (result, _) <- results ]
parseBlock :: (Maybe [Located Statement] -> TokenParser Declaration)
-> [Located Statement] -> TokenParser [Declaration]
parseBlock parser statements = finish where
results = map (parseStatement parser) statements
finish = do
modifyState (\old -> concat (old:map extractErrors results))
return [ result | Right (result, _) <- results ]
parseCollectingErrors :: TokenParser a -> [Located Token] -> Either ParseError (a, [ParseError])
parseCollectingErrors parser = runParser parser' [] "" where
parser' = do
result <- parser
eof
errors <- getState
return (result, errors)
parseStatement :: (Maybe [Located Statement] -> TokenParser Declaration)
-> Located Statement
-> Either ParseError (Declaration, [ParseError])
parseStatement parser (Located _ (Line tokens)) =
parseCollectingErrors (parser Nothing) tokens
parseStatement parser (Located _ (Block tokens statements)) =
parseCollectingErrors (parser (Just statements)) tokens
parseFileTokens :: [Located Statement] -> ([Declaration], [ParseError])
parseFileTokens statements = (decls, errors) where
results = map (parseStatement topLine) statements
errors = concatMap extractErrors results
decls = [ result | Right (result, _) <- results ]
parseFile :: String -> String -> ([Declaration], [ParseError])
parseFile filename text = case parse lexer filename text of
Left e -> ([], [e])
Right tokens -> parseFileTokens tokens
-- Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
-- All rights reserved.
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions are met:
--
-- 1. Redistributions of source code must retain the above copyright notice, this
-- list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above copyright notice,
-- this list of conditions and the following disclaimer in the documentation
-- and/or other materials provided with the distribution.
--
-- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
module Token where
import Data.Char (toLower)
import Text.Parsec.Pos (SourcePos, sourceLine, sourceColumn)
import Text.Printf (printf)
data PrimitiveType = Void | Bool
| Int8 | Int16 | Int32 | Int64
| UInt8 | UInt16 | UInt32 | UInt64
| Float32 | Float64
| Text | Bytes
deriving (Show, Enum, Bounded, Eq)
primitiveTypes = [(t, map toLower (show t))
| t <- [minBound::PrimitiveType .. maxBound::PrimitiveType]]
data Located t = Located { locatedPos :: SourcePos, locatedValue :: t } deriving (Eq)
instance Show t => Show (Located t) where
show (Located pos x) = printf "%d:%d:%s" (sourceLine pos) (sourceColumn pos) (show x)
data Token = Identifier String
| ParenthesizedList [[Located Token]]
| BracketedList [[Located Token]]
| LiteralInt Integer
| LiteralFloat Double
| LiteralString String
| AtSign
| Colon
| Period
| EqualsSign
| ImportKeyword
| ConstKeyword
| EnumKeyword
| ClassKeyword
| InterfaceKeyword
| OptionKeyword
deriving (Show, Eq)
data Statement = Line [Located Token]
| Block [Located Token] [Located Statement]
deriving (Show)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment