// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#include "lexer.h"
#include <kj/parse/char.h>
#include <kj/debug.h>

namespace capnp {
namespace compiler {

namespace p = kj::parse;

bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result,
         ErrorReporter& errorReporter) {
  Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter);

  auto parser = p::sequence(lexer.getParsers().statementSequence, p::endOfInput);

  Lexer::ParserInput parserInput(input.begin(), input.end());
  kj::Maybe<kj::Array<Orphan<Statement>>> parseOutput = parser(parserInput);

  KJ_IF_MAYBE(output, parseOutput) {
    auto l = result.initStatements(output->size());
    for (uint i = 0; i < output->size(); i++) {
      l.adoptWithCaveats(i, kj::mv((*output)[i]));
    }
    return true;
  } else {
    uint32_t best = parserInput.getBest();
    errorReporter.addError(best, best, kj::str("Parse error."));
    return false;
  }
}

bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result,
         ErrorReporter& errorReporter) {
  Lexer lexer(Orphanage::getForMessageContaining(result), errorReporter);

  auto parser = p::sequence(lexer.getParsers().tokenSequence, p::endOfInput);

  Lexer::ParserInput parserInput(input.begin(), input.end());
  kj::Maybe<kj::Array<Orphan<Token>>> parseOutput = parser(parserInput);

  KJ_IF_MAYBE(output, parseOutput) {
    auto l = result.initTokens(output->size());
    for (uint i = 0; i < output->size(); i++) {
      l.adoptWithCaveats(i, kj::mv((*output)[i]));
    }
    return true;
  } else {
    uint32_t best = parserInput.getBest();
    errorReporter.addError(best, best, kj::str("Parse error."));
    return false;
  }
}

namespace {

typedef p::Span<uint32_t> Location;

Token::Builder initTok(Orphan<Token>& t, const Location& loc) {
  auto builder = t.get();
  builder.setStartByte(loc.begin());
  builder.setEndByte(loc.end());
  return builder;
}

void buildTokenSequenceList(List<List<Token>>::Builder builder,
                            kj::Array<kj::Array<Orphan<Token>>>&& items) {
  for (uint i = 0; i < items.size(); i++) {
    auto& item = items[i];
    auto itemBuilder = builder.init(i, item.size());
    for (uint j = 0; j < item.size(); j++) {
      itemBuilder.adoptWithCaveats(j, kj::mv(item[j]));
    }
  }
}

void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comment) {
  size_t size = 0;
  for (auto& line: comment) {
    size += line.size() + 1;  // include newline
  }
  Text::Builder builder = statement.initDocComment(size);
  char* pos = builder.begin();
  for (auto& line: comment) {
    memcpy(pos, line.begin(), line.size());
    pos += line.size();
    *pos++ = '\n';
  }
  KJ_ASSERT(pos == builder.end());
}

constexpr auto discardComment =
    sequence(p::exactChar<'#'>(), p::discard(p::many(p::discard(p::anyOfChars("\n").invert()))),
             p::oneOf(p::exactChar<'\n'>(), p::endOfInput));
constexpr auto saveComment =
    sequence(p::exactChar<'#'>(), p::discard(p::optional(p::exactChar<' '>())),
             p::charsToString(p::many(p::anyOfChars("\n").invert())),
             p::oneOf(p::exactChar<'\n'>(), p::endOfInput));

constexpr auto utf8Bom =
    sequence(p::exactChar<'\xef'>(), p::exactChar<'\xbb'>(), p::exactChar<'\xbf'>());

constexpr auto bomsAndWhitespace =
    sequence(p::discardWhitespace,
             p::discard(p::many(sequence(utf8Bom, p::discardWhitespace))));

constexpr auto commentsAndWhitespace =
    sequence(bomsAndWhitespace,
             p::discard(p::many(sequence(discardComment, bomsAndWhitespace))));

constexpr auto discardLineWhitespace =
    p::discard(p::many(p::discard(p::whitespaceChar.invert().orAny("\r\n").invert())));
constexpr auto newline = p::oneOf(
    p::exactChar<'\n'>(),
    sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>()))));

constexpr auto docComment = p::optional(p::sequence(
    discardLineWhitespace,
    p::discard(p::optional(newline)),
    p::oneOrMore(p::sequence(discardLineWhitespace, saveComment))));
// Parses a set of comment lines preceded by at most one newline and with no intervening blank
// lines.

}  // namespace

Lexer::Lexer(Orphanage orphanageParam, ErrorReporter& errorReporter)
    : orphanage(orphanageParam) {

  // Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe
  // for us to use parsers.tokenSequence even though we haven't yet constructed it.
  auto& tokenSequence = parsers.tokenSequence;

  auto& commaDelimitedList = arena.copy(p::transform(
      p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))),
      [](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
          -> kj::Array<kj::Array<Orphan<Token>>> {
        if (first == nullptr && rest == nullptr) {
          // Completely empty list.
          return nullptr;
        } else {
          uint restSize = rest.size();
          if (restSize > 0 && rest[restSize - 1] == nullptr) {
            // Allow for trailing commas by shortening the list by one item if the final token is
            // nullptr
            restSize--;
          }
          auto result = kj::heapArrayBuilder<kj::Array<Orphan<Token>>>(1 + restSize); // first+rest
          result.add(kj::mv(first));
          for (uint i = 0; i < restSize ; i++) {
            result.add(kj::mv(rest[i]));
          }
          return result.finish();
        }
      }));

  auto& token = arena.copy(p::oneOf(
      p::transformWithLocation(p::identifier,
          [this](Location loc, kj::String name) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setIdentifier(name);
            return t;
          }),
      p::transformWithLocation(p::doubleQuotedString,
          [this](Location loc, kj::String text) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setStringLiteral(text);
            return t;
          }),
      p::transformWithLocation(p::doubleQuotedHexBinary,
          [this](Location loc, kj::Array<byte> data) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setBinaryLiteral(data);
            return t;
          }),
      p::transformWithLocation(p::integer,
          [this](Location loc, uint64_t i) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setIntegerLiteral(i);
            return t;
          }),
      p::transformWithLocation(p::number,
          [this](Location loc, double x) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setFloatLiteral(x);
            return t;
          }),
      p::transformWithLocation(
          p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))),
          [this](Location loc, kj::String text) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            initTok(t, loc).setOperator(text);
            return t;
          }),
      p::transformWithLocation(
          sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()),
          [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            buildTokenSequenceList(
                initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items));
            return t;
          }),
      p::transformWithLocation(
          sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()),
          [this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
            auto t = orphanage.newOrphan<Token>();
            buildTokenSequenceList(
                initTok(t, loc).initBracketedList(items.size()), kj::mv(items));
            return t;
          }),
      p::transformOrReject(p::transformWithLocation(
          p::oneOf(sequence(p::exactChar<'\xff'>(), p::exactChar<'\xfe'>()),
                   sequence(p::exactChar<'\xfe'>(), p::exactChar<'\xff'>()),
                   sequence(p::exactChar<'\x00'>())),
          [&errorReporter](Location loc) -> kj::Maybe<Orphan<Token>> {
            errorReporter.addError(loc.begin(), loc.end(),
                "Non-UTF-8 input detected. Cap'n Proto schema files must be UTF-8 text.");
            return nullptr;
          }), [](kj::Maybe<Orphan<Token>> param) { return param; })));
  parsers.tokenSequence = arena.copy(p::sequence(
      commentsAndWhitespace, p::many(p::sequence(token, commentsAndWhitespace))));

  auto& statementSequence = parsers.statementSequence;

  auto& statementEnd = arena.copy(p::oneOf(
      transform(p::sequence(p::exactChar<';'>(), docComment),
          [this](kj::Maybe<kj::Array<kj::String>>&& comment) -> Orphan<Statement> {
            auto result = orphanage.newOrphan<Statement>();
            auto builder = result.get();
            KJ_IF_MAYBE(c, comment) {
              attachDocComment(builder, kj::mv(*c));
            }
            builder.setLine();
            return result;
          }),
      transform(
          p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>(),
                      docComment),
          [this](kj::Maybe<kj::Array<kj::String>>&& comment,
                 kj::Array<Orphan<Statement>>&& statements,
                 kj::Maybe<kj::Array<kj::String>>&& lateComment)
              -> Orphan<Statement> {
            auto result = orphanage.newOrphan<Statement>();
            auto builder = result.get();
            KJ_IF_MAYBE(c, comment) {
              attachDocComment(builder, kj::mv(*c));
            } else KJ_IF_MAYBE(c, lateComment) {
              attachDocComment(builder, kj::mv(*c));
            }
            auto list = builder.initBlock(statements.size());
            for (uint i = 0; i < statements.size(); i++) {
              list.adoptWithCaveats(i, kj::mv(statements[i]));
            }
            return result;
          })
      ));

  auto& statement = arena.copy(p::transformWithLocation(p::sequence(tokenSequence, statementEnd),
      [](Location loc, kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
        auto builder = statement.get();
        auto tokensBuilder = builder.initTokens(tokens.size());
        for (uint i = 0; i < tokens.size(); i++) {
          tokensBuilder.adoptWithCaveats(i, kj::mv(tokens[i]));
        }
        builder.setStartByte(loc.begin());
        builder.setEndByte(loc.end());
        return kj::mv(statement);
      }));

  parsers.statementSequence = arena.copy(sequence(
      commentsAndWhitespace, many(sequence(statement, commentsAndWhitespace))));

  parsers.token = token;
  parsers.statement = statement;
  parsers.emptySpace = commentsAndWhitespace;
}

Lexer::~Lexer() noexcept(false) {}

}  // namespace compiler
}  // namespace capnp