Unverified Commit 6a59486e authored by Kenton Varda's avatar Kenton Varda Committed by GitHub

Merge pull request #595 from capnproto/add-decode-base-64-errors

Make decodeBase64() report errors
parents 9306bc07 c137c9fd
......@@ -277,7 +277,9 @@ KJ_TEST("base64 encoding/decoding") {
{
auto encoded = encodeBase64(StringPtr("foo").asBytes(), false);
KJ_EXPECT(encoded == "Zm9v", encoded, encoded.size());
KJ_EXPECT(heapString(decodeBase64(encoded.asArray()).asChars()) == "foo");
auto decoded = decodeBase64(encoded.asArray());
KJ_EXPECT(!decoded.hadErrors);
KJ_EXPECT(heapString(decoded.asChars()) == "foo");
}
{
......@@ -289,11 +291,35 @@ KJ_TEST("base64 encoding/decoding") {
{
auto encoded = encodeBase64(StringPtr("corge").asBytes(), false);
KJ_EXPECT(encoded == "Y29yZ2U=", encoded);
KJ_EXPECT(heapString(decodeBase64(encoded.asArray()).asChars()) == "corge");
auto decoded = decodeBase64(encoded.asArray());
KJ_EXPECT(!decoded.hadErrors);
KJ_EXPECT(heapString(decoded.asChars()) == "corge");
}
KJ_EXPECT(heapString(decodeBase64("Y29yZ2U").asChars()) == "corge");
KJ_EXPECT(heapString(decodeBase64("Y\n29y Z@2U=\n").asChars()) == "corge");
{
auto decoded = decodeBase64("Y29yZ2U");
KJ_EXPECT(!decoded.hadErrors);
KJ_EXPECT(heapString(decoded.asChars()) == "corge");
}
{
auto decoded = decodeBase64("Y\n29y Z@2U=\n");
KJ_EXPECT(decoded.hadErrors); // @-sign is invalid base64 input.
KJ_EXPECT(heapString(decoded.asChars()) == "corge");
}
{
auto decoded = decodeBase64("Y\n29y Z2U=\n");
KJ_EXPECT(!decoded.hadErrors);
KJ_EXPECT(heapString(decoded.asChars()) == "corge");
}
// Too much padding.
KJ_EXPECT(decodeBase64("Y29yZ2U==").hadErrors);
KJ_EXPECT(decodeBase64("Y29yZ===").hadErrors);
// Non-terminal padding.
KJ_EXPECT(decodeBase64("ab=c").hadErrors);
{
auto encoded = encodeBase64(StringPtr("corge").asBytes(), true);
......
......@@ -661,24 +661,43 @@ typedef enum {
} base64_decodestep;
typedef struct {
base64_decodestep step;
char plainchar;
bool hadErrors = false;
size_t nPaddingBytesSeen = 0;
// Output state. `nPaddingBytesSeen` is not guaranteed to be correct if `hadErrors` is true. It is
// included in the state purely to preserve the streaming capability of the algorithm while still
// checking for errors correctly (consider chunk 1 = "abc=", chunk 2 = "d").
base64_decodestep step = step_a;
char plainchar = 0;
} base64_decodestate;
int base64_decode_value(char value_in) {
static const char decoding[] = {
62,-1,-1,-1,63,52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-2,-1,-1,-1,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,-1,
26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51};
static const char decoding_size = sizeof(decoding);
value_in -= 43;
if (value_in < 0 || value_in > decoding_size) return -1;
return decoding[(int)value_in];
}
// Returns either the fragment value or: -1 on whitespace, -2 on padding, -3 on invalid input.
//
// Note that the original libb64 implementation used -1 for invalid input, -2 on padding -- this
// new scheme allows for some simpler error checks in steps A and B.
void base64_init_decodestate(base64_decodestate* state_in) {
state_in->step = step_a;
state_in->plainchar = 0;
static const char decoding[] = {
-3,-3,-3,-3,-3,-3,-3,-3, -3,-1,-1,-3,-1,-1,-3,-3,
-3,-3,-3,-3,-3,-3,-3,-3, -3,-3,-3,-3,-3,-3,-3,-3,
-1,-3,-3,-3,-3,-3,-3,-3, -3,-3,-3,62,-3,-3,-3,63,
52,53,54,55,56,57,58,59, 60,61,-3,-3,-3,-2,-3,-3,
-3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,
15,16,17,18,19,20,21,22, 23,24,25,-3,-3,-3,-3,-3,
-3,26,27,28,29,30,31,32, 33,34,35,36,37,38,39,40,
41,42,43,44,45,46,47,48, 49,50,51,-3,-3,-3,-3,-3,
-3,-3,-3,-3,-3,-3,-3,-3, -3,-3,-3,-3,-3,-3,-3,-3,
-3,-3,-3,-3,-3,-3,-3,-3, -3,-3,-3,-3,-3,-3,-3,-3,
-3,-3,-3,-3,-3,-3,-3,-3, -3,-3,-3,-3,-3,-3,-3,-3,
-3,-3,-3,-3,-3,-3,-3,-3, -3,-3,-3,-3,-3,-3,-3,-3,
-3,-3,-3,-3,-3,-3,-3,-3, -3,-3,-3,-3,-3,-3,-3,-3,
-3,-3,-3,-3,-3,-3,-3,-3, -3,-3,-3,-3,-3,-3,-3,-3,
-3,-3,-3,-3,-3,-3,-3,-3, -3,-3,-3,-3,-3,-3,-3,-3,
-3,-3,-3,-3,-3,-3,-3,-3, -3,-3,-3,-3,-3,-3,-3,-3,
};
static_assert(sizeof(decoding) == 256, "base64 decoding table size error");
return decoding[(unsigned char)value_in];
}
int base64_decode_block(const char* code_in, const int length_in,
......@@ -691,6 +710,8 @@ int base64_decode_block(const char* code_in, const int length_in,
*plainchar = state_in->plainchar;
}
#define ERROR_IF(predicate) state_in->hadErrors = state_in->hadErrors || (predicate)
switch (state_in->step)
{
while (1)
......@@ -703,6 +724,8 @@ int base64_decode_block(const char* code_in, const int length_in,
return plainchar - plaintext_out;
}
fragment = (char)base64_decode_value(*codechar++);
// It is an error to see invalid or padding bytes in step A.
ERROR_IF(fragment < -1);
} while (fragment < 0);
*plainchar = (fragment & 0x03f) << 2;
case step_b:
......@@ -710,9 +733,15 @@ int base64_decode_block(const char* code_in, const int length_in,
if (codechar == code_in+length_in) {
state_in->step = step_b;
state_in->plainchar = *plainchar;
// It is always an error to suspend from step B, because we don't have enough bits yet.
// TODO(someday): This actually breaks the streaming use case, if base64_decode_block() is
// to be called multiple times. We'll fix it if we ever care to support streaming.
state_in->hadErrors = true;
return plainchar - plaintext_out;
}
fragment = (char)base64_decode_value(*codechar++);
// It is an error to see invalid or padding bytes in step B.
ERROR_IF(fragment < -1);
} while (fragment < 0);
*plainchar++ |= (fragment & 0x030) >> 4;
*plainchar = (fragment & 0x00f) << 4;
......@@ -721,10 +750,18 @@ int base64_decode_block(const char* code_in, const int length_in,
if (codechar == code_in+length_in) {
state_in->step = step_c;
state_in->plainchar = *plainchar;
// It is an error to complete from step C if we have seen incomplete padding.
// TODO(someday): This actually breaks the streaming use case, if base64_decode_block() is
// to be called multiple times. We'll fix it if we ever care to support streaming.
ERROR_IF(state_in->nPaddingBytesSeen == 1);
return plainchar - plaintext_out;
}
fragment = (char)base64_decode_value(*codechar++);
// It is an error to see invalid bytes or more than two padding bytes in step C.
ERROR_IF(fragment < -2 || (fragment == -2 && ++state_in->nPaddingBytesSeen > 2));
} while (fragment < 0);
// It is an error to continue from step C after having seen any padding.
ERROR_IF(state_in->nPaddingBytesSeen > 0);
*plainchar++ |= (fragment & 0x03c) >> 2;
*plainchar = (fragment & 0x003) << 6;
case step_d:
......@@ -735,19 +772,25 @@ int base64_decode_block(const char* code_in, const int length_in,
return plainchar - plaintext_out;
}
fragment = (char)base64_decode_value(*codechar++);
// It is an error to see invalid bytes or more than one padding byte in step D.
ERROR_IF(fragment < -2 || (fragment == -2 && ++state_in->nPaddingBytesSeen > 1));
} while (fragment < 0);
// It is an error to continue from step D after having seen padding bytes.
ERROR_IF(state_in->nPaddingBytesSeen > 0);
*plainchar++ |= (fragment & 0x03f);
}
}
#undef ERROR_IF
/* control should not reach here */
return plainchar - plaintext_out;
}
} // namespace
Array<byte> decodeBase64(ArrayPtr<const char> input) {
EncodingResult<Array<byte>> decodeBase64(ArrayPtr<const char> input) {
base64_decodestate state;
base64_init_decodestate(&state);
auto output = heapArray<byte>((input.size() * 6 + 7) / 8);
......@@ -760,7 +803,7 @@ Array<byte> decodeBase64(ArrayPtr<const char> input) {
output = kj::mv(copy);
}
return output;
return EncodingResult<Array<byte>>(kj::mv(output), state.hadErrors);
}
} // namespace kj
......@@ -97,9 +97,9 @@ String encodeBase64(ArrayPtr<const byte> bytes, bool breakLines = false);
// Encode the given bytes as base64 text. If `breakLines` is true, line breaks will be inserted
// into the output every 72 characters (e.g. for encoding e-mail bodies).
Array<byte> decodeBase64(ArrayPtr<const char> text);
// Decode base64 text. Non-base64 characters are ignored and padding characters are not requried;
// as such, this function never fails.
EncodingResult<Array<byte>> decodeBase64(ArrayPtr<const char> text);
// Decode base64 text. This function reports errors required by the WHATWG HTML/Infra specs: see
// https://html.spec.whatwg.org/multipage/webappapis.html#atob for details.
// =======================================================================================
// inline implementation details
......@@ -200,7 +200,7 @@ inline EncodingResult<String> decodeCEscape(const char (&text)[s]) {
return decodeCEscape(arrayPtr(text, s-1));
}
template <size_t s>
Array<byte> decodeBase64(const char (&text)[s]) {
EncodingResult<Array<byte>> decodeBase64(const char (&text)[s]) {
return decodeBase64(arrayPtr(text, s - 1));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment