Commit 75845ceb authored by Kenton Varda's avatar Kenton Varda

Encapsulate default type ID generation better.

People see files named "md5" and get worried about security issues, even though we're using it only as a *non-cryptographic* PRNG. So, I renamed the files to type-id.*, encapsulated the code better to make clear that it's only used to generate type IDs, and added comments clarifying that we're not expecting it to be collision-proof.

Fixes #482.
parent f05559d7
......@@ -299,8 +299,8 @@ libcapnp_json_la_SOURCES= \
libcapnpc_la_LIBADD = libcapnp.la libkj.la $(PTHREAD_LIBS)
libcapnpc_la_LDFLAGS = -release $(SO_VERSION) -no-undefined
libcapnpc_la_SOURCES= \
src/capnp/compiler/md5.h \
src/capnp/compiler/md5.c++ \
src/capnp/compiler/type-id.h \
src/capnp/compiler/type-id.c++ \
src/capnp/compiler/error-reporter.h \
src/capnp/compiler/error-reporter.c++ \
src/capnp/compiler/lexer.capnp.h \
......@@ -429,7 +429,7 @@ heavy_tests = \
src/capnp/ez-rpc-test.c++ \
src/capnp/compat/json-test.c++ \
src/capnp/compiler/lexer-test.c++ \
src/capnp/compiler/md5-test.c++
src/capnp/compiler/type-id-test.c++
capnp_test_LDADD = \
libcapnp-test.a \
libcapnpc.la \
......
......@@ -125,7 +125,7 @@ endif()
# Tools/Compilers ==============================================================
set(capnpc_sources
compiler/md5.c++
compiler/type-id.c++
compiler/error-reporter.c++
compiler/lexer.capnp.c++
compiler/lexer.c++
......@@ -237,7 +237,7 @@ if(BUILD_TESTING)
rpc-twoparty-test.c++
ez-rpc-test.c++
compiler/lexer-test.c++
compiler/md5-test.c++
compiler/type-id-test.c++
test-util.c++
compat/json-test.c++
${test_capnp_cpp_files}
......
......@@ -30,7 +30,6 @@
#include <set>
#include <unordered_map>
#include "node-translator.h"
#include "md5.h"
namespace capnp {
namespace compiler {
......
// This file was modified by Kenton Varda from code placed in the public domain.
// The code, which was originally C, was modified to give it a C++ interface.
// The original code bore the following notice:
/*
* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm (RFC 1321).
*
* Homepage:
* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
*
* Author:
* Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
*
* This software was written by Alexander Peslyak in 2001. No copyright is
* claimed, and the software is hereby placed in the public domain.
* In case this attempt to disclaim copyright and place the software in the
* public domain is deemed null and void, then the software is
* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
* general public under the following terms:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* There's ABSOLUTELY NO WARRANTY, express or implied.
*
* See md5.c for more information.
*/
// TODO(someday): Put in KJ?
#ifndef CAPNP_COMPILER_MD5_H
#define CAPNP_COMPILER_MD5_H
#include <kj/string.h>
#include <kj/array.h>
namespace capnp {
namespace compiler {
class Md5 {
public:
Md5();
void update(kj::ArrayPtr<const kj::byte> data);
inline void update(kj::ArrayPtr<const char> data) {
return update(data.asBytes());
}
inline void update(kj::StringPtr data) {
return update(data.asArray());
}
inline void update(const char* data) {
return update(kj::StringPtr(data));
}
kj::ArrayPtr<const kj::byte> finish();
kj::StringPtr finishAsHex();
private:
/* Any 32-bit or wider unsigned integer data type will do */
typedef unsigned int MD5_u32plus;
bool finished = false;
typedef struct {
MD5_u32plus lo, hi;
MD5_u32plus a, b, c, d;
kj::byte buffer[64];
MD5_u32plus block[16];
} MD5_CTX;
MD5_CTX ctx;
const kj::byte* body(const kj::byte* ptr, size_t size);
};
} // namespace compiler
} // namespace capnp
#endif // CAPNP_COMPILER_MD5_H
......@@ -20,7 +20,7 @@
// THE SOFTWARE.
#include "parser.h"
#include "md5.h"
#include "type-id.h"
#include <capnp/dynamic.h>
#include <kj/debug.h>
#if !_MSC_VER
......@@ -61,81 +61,6 @@ uint64_t generateRandomId() {
return result | (1ull << 63);
}
uint64_t generateChildId(uint64_t parentId, kj::StringPtr childName) {
// Compute ID by MD5 hashing the concatenation of the parent ID and the declaration name, and
// then taking the first 8 bytes.
kj::byte parentIdBytes[sizeof(uint64_t)];
for (uint i = 0; i < sizeof(uint64_t); i++) {
parentIdBytes[i] = (parentId >> (i * 8)) & 0xff;
}
Md5 md5;
md5.update(kj::arrayPtr(parentIdBytes, kj::size(parentIdBytes)));
md5.update(childName);
kj::ArrayPtr<const kj::byte> resultBytes = md5.finish();
uint64_t result = 0;
for (uint i = 0; i < sizeof(uint64_t); i++) {
result = (result << 8) | resultBytes[i];
}
return result | (1ull << 63);
}
uint64_t generateGroupId(uint64_t parentId, uint16_t groupIndex) {
// Compute ID by MD5 hashing the concatenation of the parent ID and the group index, and
// then taking the first 8 bytes.
kj::byte bytes[sizeof(uint64_t) + sizeof(uint16_t)];
for (uint i = 0; i < sizeof(uint64_t); i++) {
bytes[i] = (parentId >> (i * 8)) & 0xff;
}
for (uint i = 0; i < sizeof(uint16_t); i++) {
bytes[sizeof(uint64_t) + i] = (groupIndex >> (i * 8)) & 0xff;
}
Md5 md5;
md5.update(bytes);
kj::ArrayPtr<const kj::byte> resultBytes = md5.finish();
uint64_t result = 0;
for (uint i = 0; i < sizeof(uint64_t); i++) {
result = (result << 8) | resultBytes[i];
}
return result | (1ull << 63);
}
uint64_t generateMethodParamsId(uint64_t parentId, uint16_t methodOrdinal, bool isResults) {
// Compute ID by MD5 hashing the concatenation of the parent ID, the method ordinal, and a
// boolean indicating whether this is the params or the results, and then taking the first 8
// bytes.
kj::byte bytes[sizeof(uint64_t) + sizeof(uint16_t) + 1];
for (uint i = 0; i < sizeof(uint64_t); i++) {
bytes[i] = (parentId >> (i * 8)) & 0xff;
}
for (uint i = 0; i < sizeof(uint16_t); i++) {
bytes[sizeof(uint64_t) + i] = (methodOrdinal >> (i * 8)) & 0xff;
}
bytes[sizeof(bytes) - 1] = isResults;
Md5 md5;
md5.update(bytes);
kj::ArrayPtr<const kj::byte> resultBytes = md5.finish();
uint64_t result = 0;
for (uint i = 0; i < sizeof(uint64_t); i++) {
result = (result << 8) | resultBytes[i];
}
return result | (1ull << 63);
}
void parseFile(List<Statement>::Reader statements, ParsedFile::Builder result,
ErrorReporter& errorReporter) {
CapnpParser parser(Orphanage::getForMessageContaining(result), errorReporter);
......
// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
// Copyright (c) 2017 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
......@@ -19,43 +19,28 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "md5.h"
#include <kj/compat/gtest.h>
#include "type-id.h"
#include <capnp/schema.capnp.h>
#include <kj/test.h>
namespace capnp {
namespace compiler {
namespace {
static kj::String doMd5(kj::StringPtr text) {
Md5 md5;
md5.update(text);
return kj::str(md5.finishAsHex().cStr());
}
TEST(Md5, Sum) {
EXPECT_STREQ("acbd18db4cc2f85cedef654fccc4a4d8", doMd5("foo").cStr());
EXPECT_STREQ("37b51d194a7513e45b56f6524f2d51f2", doMd5("bar").cStr());
EXPECT_STREQ("3858f62230ac3c915f300c664312c63f", doMd5("foobar").cStr());
{
Md5 md5;
md5.update("foo");
md5.update("bar");
EXPECT_STREQ("3858f62230ac3c915f300c664312c63f", md5.finishAsHex().cStr());
}
KJ_TEST("type ID generation hasn't changed") {
KJ_EXPECT(generateChildId(0xa93fc509624c72d9ull, "Node") == 0xe682ab4cf923a417ull);
KJ_EXPECT(generateChildId(0xe682ab4cf923a417ull, "NestedNode") == 0xdebf55bbfa0fc242ull);
KJ_EXPECT(generateGroupId(0xe682ab4cf923a417ull, 7) == 0x9ea0b19b37fb4435ull);
EXPECT_STREQ("ebf2442d167a30ca4453f99abd8cddf4", doMd5(
"Hello, this is a long string that is more than 64 bytes because the md5 code uses a "
"buffer of 64 bytes.").cStr());
KJ_EXPECT(typeId<schema::Node>() == 0xe682ab4cf923a417ull);
KJ_EXPECT(typeId<schema::Node::NestedNode>() == 0xdebf55bbfa0fc242ull);
KJ_EXPECT(typeId<schema::Node::Struct>() == 0x9ea0b19b37fb4435ull);
{
Md5 md5;
md5.update("Hello, this is a long string ");
md5.update("that is more than 64 bytes ");
md5.update("because the md5 code uses a ");
md5.update("buffer of 64 bytes.");
EXPECT_STREQ("ebf2442d167a30ca4453f99abd8cddf4", md5.finishAsHex().cStr());
}
// Methods of TestInterface.
KJ_EXPECT(generateMethodParamsId(0x88eb12a0e0af92b2ull, 0, false) == 0xb874edc0d559b391ull);
KJ_EXPECT(generateMethodParamsId(0x88eb12a0e0af92b2ull, 0, true) == 0xb04fcaddab714ba4ull);
KJ_EXPECT(generateMethodParamsId(0x88eb12a0e0af92b2ull, 1, false) == 0xd044893357b42568ull);
KJ_EXPECT(generateMethodParamsId(0x88eb12a0e0af92b2ull, 1, true) == 0x9bf141df4247d52full);
}
} // namespace
......
// This file was modified by Kenton Varda from code placed in the public domain.
// The code, which was originally C, was modified to give it a C++ interface.
// Copyright (c) 2013-2017 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "type-id.h"
#include <kj/debug.h>
#include <string.h>
namespace capnp {
namespace compiler {
class TypeIdGenerator {
// A non-cryptographic deterministic random number generator used to generate type IDs when the
// developer did not specify one themselves.
//
// The underlying algorithm is MD5. MD5 is safe to use here because this is not intended to be a
// cryptographic random number generator. In retrospect it would have been nice to use something
// else just to avoid people freaking out about it, but changing the algorithm now would break
// backwards-compatibility.
public:
TypeIdGenerator();
void update(kj::ArrayPtr<const kj::byte> data);
inline void update(kj::ArrayPtr<const char> data) {
return update(data.asBytes());
}
inline void update(kj::StringPtr data) {
return update(data.asArray());
}
kj::ArrayPtr<const kj::byte> finish();
private:
bool finished = false;
struct {
uint lo, hi;
uint a, b, c, d;
kj::byte buffer[64];
uint block[16];
} ctx;
const kj::byte* body(const kj::byte* ptr, size_t size);
};
uint64_t generateChildId(uint64_t parentId, kj::StringPtr childName) {
// Compute ID by hashing the concatenation of the parent ID and the declaration name, and
// then taking the first 8 bytes.
kj::byte parentIdBytes[sizeof(uint64_t)];
for (uint i = 0; i < sizeof(uint64_t); i++) {
parentIdBytes[i] = (parentId >> (i * 8)) & 0xff;
}
TypeIdGenerator generator;
generator.update(kj::arrayPtr(parentIdBytes, kj::size(parentIdBytes)));
generator.update(childName);
kj::ArrayPtr<const kj::byte> resultBytes = generator.finish();
uint64_t result = 0;
for (uint i = 0; i < sizeof(uint64_t); i++) {
result = (result << 8) | resultBytes[i];
}
return result | (1ull << 63);
}
uint64_t generateGroupId(uint64_t parentId, uint16_t groupIndex) {
// Compute ID by hashing the concatenation of the parent ID and the group index, and
// then taking the first 8 bytes.
kj::byte bytes[sizeof(uint64_t) + sizeof(uint16_t)];
for (uint i = 0; i < sizeof(uint64_t); i++) {
bytes[i] = (parentId >> (i * 8)) & 0xff;
}
for (uint i = 0; i < sizeof(uint16_t); i++) {
bytes[sizeof(uint64_t) + i] = (groupIndex >> (i * 8)) & 0xff;
}
TypeIdGenerator generator;
generator.update(bytes);
kj::ArrayPtr<const kj::byte> resultBytes = generator.finish();
uint64_t result = 0;
for (uint i = 0; i < sizeof(uint64_t); i++) {
result = (result << 8) | resultBytes[i];
}
return result | (1ull << 63);
}
uint64_t generateMethodParamsId(uint64_t parentId, uint16_t methodOrdinal, bool isResults) {
// Compute ID by hashing the concatenation of the parent ID, the method ordinal, and a
// boolean indicating whether this is the params or the results, and then taking the first 8
// bytes.
kj::byte bytes[sizeof(uint64_t) + sizeof(uint16_t) + 1];
for (uint i = 0; i < sizeof(uint64_t); i++) {
bytes[i] = (parentId >> (i * 8)) & 0xff;
}
for (uint i = 0; i < sizeof(uint16_t); i++) {
bytes[sizeof(uint64_t) + i] = (methodOrdinal >> (i * 8)) & 0xff;
}
bytes[sizeof(bytes) - 1] = isResults;
TypeIdGenerator generator;
generator.update(bytes);
kj::ArrayPtr<const kj::byte> resultBytes = generator.finish();
uint64_t result = 0;
for (uint i = 0; i < sizeof(uint64_t); i++) {
result = (result << 8) | resultBytes[i];
}
return result | (1ull << 63);
}
// The remainder of this file was derived from code placed in the public domain.
// The original code bore the following notice:
/*
......@@ -39,13 +176,6 @@
* compile-time configuration.
*/
#include "md5.h"
#include <kj/debug.h>
#include <string.h>
namespace capnp {
namespace compiler {
/*
* The basic MD5 functions.
*
......@@ -76,16 +206,16 @@ namespace compiler {
*/
#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
#define SET(n) \
(*(MD5_u32plus *)&ptr[(n) * 4])
(*(uint *)&ptr[(n) * 4])
#define GET(n) \
SET(n)
#else
#define SET(n) \
(ctx.block[(n)] = \
(MD5_u32plus)ptr[(n) * 4] | \
((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
(uint)ptr[(n) * 4] | \
((uint)ptr[(n) * 4 + 1] << 8) | \
((uint)ptr[(n) * 4 + 2] << 16) | \
((uint)ptr[(n) * 4 + 3] << 24))
#define GET(n) \
(ctx.block[(n)])
#endif
......@@ -94,10 +224,10 @@ namespace compiler {
* This processes one or more 64-byte data blocks, but does NOT update
* the bit counters. There are no alignment requirements.
*/
const kj::byte* Md5::body(const kj::byte* ptr, size_t size)
const kj::byte* TypeIdGenerator::body(const kj::byte* ptr, size_t size)
{
MD5_u32plus a, b, c, d;
MD5_u32plus saved_a, saved_b, saved_c, saved_d;
uint a, b, c, d;
uint saved_a, saved_b, saved_c, saved_d;
a = ctx.a;
b = ctx.b;
......@@ -198,7 +328,7 @@ const kj::byte* Md5::body(const kj::byte* ptr, size_t size)
return ptr;
}
Md5::Md5()
TypeIdGenerator::TypeIdGenerator()
{
ctx.a = 0x67452301;
ctx.b = 0xefcdab89;
......@@ -209,14 +339,14 @@ Md5::Md5()
ctx.hi = 0;
}
void Md5::update(kj::ArrayPtr<const kj::byte> dataArray)
void TypeIdGenerator::update(kj::ArrayPtr<const kj::byte> dataArray)
{
KJ_REQUIRE(!finished, "already called Md5::finish()");
KJ_REQUIRE(!finished, "already called TypeIdGenerator::finish()");
const kj::byte* data = dataArray.begin();
unsigned long size = dataArray.size();
MD5_u32plus saved_lo;
uint saved_lo;
unsigned long used, free;
saved_lo = ctx.lo;
......@@ -248,7 +378,7 @@ void Md5::update(kj::ArrayPtr<const kj::byte> dataArray)
memcpy(ctx.buffer, data, size);
}
kj::ArrayPtr<const kj::byte> Md5::finish()
kj::ArrayPtr<const kj::byte> TypeIdGenerator::finish()
{
if (!finished) {
unsigned long used, free;
......@@ -304,21 +434,6 @@ kj::ArrayPtr<const kj::byte> Md5::finish()
return kj::arrayPtr(ctx.buffer, 16);
}
kj::StringPtr Md5::finishAsHex() {
static const char hexDigits[] = "0123456789abcdef";
kj::ArrayPtr<const kj::byte> bytes = finish();
char* chars = reinterpret_cast<char*>(ctx.buffer + 16);
char* pos = chars;
for (auto byte: bytes) {
*pos++ = hexDigits[byte / 16];
*pos++ = hexDigits[byte % 16];
}
*pos++ = '\0';
return kj::StringPtr(chars, 32);
}
} // namespace compiler
} // namespace capnp
// Copyright (c) 2017 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#ifndef CAPNP_COMPILER_TYPE_ID_H
#define CAPNP_COMPILER_TYPE_ID_H
#include <kj/string.h>
#include <kj/array.h>
#include <capnp/common.h>
namespace capnp {
namespace compiler {
uint64_t generateChildId(uint64_t parentId, kj::StringPtr childName);
uint64_t generateGroupId(uint64_t parentId, uint16_t groupIndex);
uint64_t generateMethodParamsId(uint64_t parentId, uint16_t methodOrdinal, bool isResults);
// Generate a default type ID for various symbols. These are used only if the developer did not
// specify an ID explicitly.
//
// The returned ID always has the most-significant bit set. The remaining bits are generated
// pseudo-randomly from the input using an algorithm that should produce a uniform distribution of
// IDs.
} // namespace compiler
} // namespace capnp
#endif // CAPNP_COMPILER_TYPE_ID_H
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment