// Copyright 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "butil/strings/safe_sprintf.h" #include <limits> #if !defined(NDEBUG) // In debug builds, we use RAW_CHECK() to print useful error messages, if // SafeSPrintf() is called with broken arguments. // As our contract promises that SafeSPrintf() can be called from any // restricted run-time context, it is not actually safe to call logging // functions from it; and we only ever do so for debug builds and hope for the // best. We should _never_ call any logging function other than RAW_CHECK(), // and we should _never_ include any logging code that is active in production // builds. Most notably, we should not include these logging functions in // unofficial release builds, even though those builds would otherwise have // DEBUG_CHECKS() enabled. // In other words; please do not remove the #ifdef around this #include. // Instead, in production builds we opt for returning a degraded result, // whenever an error is encountered. // E.g. The broken function call // SafeSPrintf("errno = %d (%x)", errno, strerror(errno)) // will print something like // errno = 13, (%x) // instead of // errno = 13 (Access denied) // In most of the anticipated use cases, that's probably the preferred // behavior. #include "butil/logging.h" #define DEBUG_CHECK RAW_CHECK #else #define DEBUG_CHECK(x, msg) do { if (x) { } } while (0) #endif namespace butil { namespace strings { // The code in this file is extremely careful to be async-signal-safe. // // Most obviously, we avoid calling any code that could dynamically allocate // memory. Doing so would almost certainly result in bugs and dead-locks. // We also avoid calling any other STL functions that could have unintended // side-effects involving memory allocation or access to other shared // resources. // // But on top of that, we also avoid calling other library functions, as many // of them have the side-effect of calling getenv() (in order to deal with // localization) or accessing errno. The latter sounds benign, but there are // several execution contexts where it isn't even possible to safely read let // alone write errno. // // The stated design goal of the SafeSPrintf() function is that it can be // called from any context that can safely call C or C++ code (i.e. anything // that doesn't require assembly code). // // For a brief overview of some but not all of the issues with async-signal- // safety, refer to: // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html namespace { const size_t kSSizeMaxConst = ((size_t)(ssize_t)-1) >> 1; const char kUpCaseHexDigits[] = "0123456789ABCDEF"; const char kDownCaseHexDigits[] = "0123456789abcdef"; } #if defined(NDEBUG) // We would like to define kSSizeMax as std::numeric_limits<ssize_t>::max(), // but C++ doesn't allow us to do that for constants. Instead, we have to // use careful casting and shifting. We later use a COMPILE_ASSERT to // verify that this worked correctly. namespace { const size_t kSSizeMax = kSSizeMaxConst; } #else // defined(NDEBUG) // For efficiency, we really need kSSizeMax to be a constant. But for unit // tests, it should be adjustable. This allows us to verify edge cases without // having to fill the entire available address space. As a compromise, we make // kSSizeMax adjustable in debug builds, and then only compile that particular // part of the unit test in debug builds. namespace { static size_t kSSizeMax = kSSizeMaxConst; } namespace internal { void SetSafeSPrintfSSizeMaxForTest(size_t max) { kSSizeMax = max; } size_t GetSafeSPrintfSSizeMaxForTest() { return kSSizeMax; } } #endif // defined(NDEBUG) namespace { class Buffer { public: // |buffer| is caller-allocated storage that SafeSPrintf() writes to. It // has |size| bytes of writable storage. It is the caller's responsibility // to ensure that the buffer is at least one byte in size, so that it fits // the trailing NUL that will be added by the destructor. The buffer also // must be smaller or equal to kSSizeMax in size. Buffer(char* buffer, size_t size) : buffer_(buffer), size_(size - 1), // Account for trailing NUL byte count_(0) { // The following assertion does not build on Mac and Android and gcc before 4.6 // This is because static_assert only works with compile-time constants, but // mac uses libstdc++4.2, android uses stlport and gcc doesn't support keyword // constexpr until 4.6, which all don't mark numeric_limits::max() as constexp. #if defined(BUTIL_CXX11_ENABLED) \ && !(defined(__GNUC__) && __GNUC__ * 10000 + __GNUC_MINOR__ * 100 < 40600) \ && !defined(OS_ANDROID) && !defined(OS_MACOSX) && !defined(OS_IOS) BAIDU_CASSERT(kSSizeMaxConst == \ static_cast<size_t>(std::numeric_limits<ssize_t>::max()), kSSizeMax_is_the_max_value_of_an_ssize_t); #endif DEBUG_CHECK(size > 0, ""); DEBUG_CHECK(size <= kSSizeMax, ""); } ~Buffer() { // The code calling the constructor guaranteed that there was enough space // to store a trailing NUL -- and in debug builds, we are actually // verifying this with DEBUG_CHECK()s in the constructor. So, we can // always unconditionally write the NUL byte in the destructor. We do not // need to adjust the count_, as SafeSPrintf() copies snprintf() in not // including the NUL byte in its return code. *GetInsertionPoint() = '\000'; } // Returns true, iff the buffer is filled all the way to |kSSizeMax-1|. The // caller can now stop adding more data, as GetCount() has reached its // maximum possible value. inline bool OutOfAddressableSpace() const { return count_ == static_cast<size_t>(kSSizeMax - 1); } // Returns the number of bytes that would have been emitted to |buffer_| // if it was sized sufficiently large. This number can be larger than // |size_|, if the caller provided an insufficiently large output buffer. // But it will never be bigger than |kSSizeMax-1|. inline ssize_t GetCount() const { DEBUG_CHECK(count_ < kSSizeMax, ""); return static_cast<ssize_t>(count_); } // Emits one |ch| character into the |buffer_| and updates the |count_| of // characters that are currently supposed to be in the buffer. // Returns "false", iff the buffer was already full. // N.B. |count_| increases even if no characters have been written. This is // needed so that GetCount() can return the number of bytes that should // have been allocated for the |buffer_|. inline bool Out(char ch) { if (size_ >= 1 && count_ < size_) { buffer_[count_] = ch; return IncrementCountByOne(); } // |count_| still needs to be updated, even if the buffer has been // filled completely. This allows SafeSPrintf() to return the number of // bytes that should have been emitted. IncrementCountByOne(); return false; } // Inserts |padding|-|len| bytes worth of padding into the |buffer_|. // |count_| will also be incremented by the number of bytes that were meant // to be emitted. The |pad| character is typically either a ' ' space // or a '0' zero, but other non-NUL values are legal. // Returns "false", iff the the |buffer_| filled up (i.e. |count_| // overflowed |size_|) at any time during padding. inline bool Pad(char pad, size_t padding, size_t len) { DEBUG_CHECK(pad, ""); DEBUG_CHECK(padding <= kSSizeMax, ""); for (; padding > len; --padding) { if (!Out(pad)) { if (--padding) { IncrementCount(padding-len); } return false; } } return true; } // POSIX doesn't define any async-signal-safe function for converting // an integer to ASCII. Define our own version. // // This also gives us the ability to make the function a little more // powerful and have it deal with |padding|, with truncation, and with // predicting the length of the untruncated output. // // IToASCII() converts an integer |i| to ASCII. // // Unlike similar functions in the standard C library, it never appends a // NUL character. This is left for the caller to do. // // While the function signature takes a signed int64_t, the code decides at // run-time whether to treat the argument as signed (int64_t) or as unsigned // (uint64_t) based on the value of |sign|. // // It supports |base|s 2 through 16. Only a |base| of 10 is allowed to have // a |sign|. Otherwise, |i| is treated as unsigned. // // For bases larger than 10, |upcase| decides whether lower-case or upper- // case letters should be used to designate digits greater than 10. // // Padding can be done with either '0' zeros or ' ' spaces. Padding has to // be positive and will always be applied to the left of the output. // // Prepends a |prefix| to the number (e.g. "0x"). This prefix goes to // the left of |padding|, if |pad| is '0'; and to the right of |padding| // if |pad| is ' '. // // Returns "false", if the |buffer_| overflowed at any time. bool IToASCII(bool sign, bool upcase, int64_t i, int base, char pad, size_t padding, const char* prefix); private: // Increments |count_| by |inc| unless this would cause |count_| to // overflow |kSSizeMax-1|. Returns "false", iff an overflow was detected; // it then clamps |count_| to |kSSizeMax-1|. inline bool IncrementCount(size_t inc) { // "inc" is either 1 or a "padding" value. Padding is clamped at // run-time to at most kSSizeMax-1. So, we know that "inc" is always in // the range 1..kSSizeMax-1. // This allows us to compute "kSSizeMax - 1 - inc" without incurring any // integer overflows. DEBUG_CHECK(inc <= kSSizeMax - 1, ""); if (count_ > kSSizeMax - 1 - inc) { count_ = kSSizeMax - 1; return false; } else { count_ += inc; return true; } } // Convenience method for the common case of incrementing |count_| by one. inline bool IncrementCountByOne() { return IncrementCount(1); } // Return the current insertion point into the buffer. This is typically // at |buffer_| + |count_|, but could be before that if truncation // happened. It always points to one byte past the last byte that was // successfully placed into the |buffer_|. inline char* GetInsertionPoint() const { size_t idx = count_; if (idx > size_) { idx = size_; } return buffer_ + idx; } // User-provided buffer that will receive the fully formatted output string. char* buffer_; // Number of bytes that are available in the buffer excluding the trailing // NUL byte that will be added by the destructor. const size_t size_; // Number of bytes that would have been emitted to the buffer, if the buffer // was sufficiently big. This number always excludes the trailing NUL byte // and it is guaranteed to never grow bigger than kSSizeMax-1. size_t count_; DISALLOW_COPY_AND_ASSIGN(Buffer); }; bool Buffer::IToASCII(bool sign, bool upcase, int64_t i, int base, char pad, size_t padding, const char* prefix) { // Sanity check for parameters. None of these should ever fail, but see // above for the rationale why we can't call CHECK(). DEBUG_CHECK(base >= 2, ""); DEBUG_CHECK(base <= 16, ""); DEBUG_CHECK(!sign || base == 10, ""); DEBUG_CHECK(pad == '0' || pad == ' ', ""); DEBUG_CHECK(padding <= kSSizeMax, ""); DEBUG_CHECK(!(sign && prefix && *prefix), ""); // Handle negative numbers, if the caller indicated that |i| should be // treated as a signed number; otherwise treat |i| as unsigned (even if the // MSB is set!) // Details are tricky, because of limited data-types, but equivalent pseudo- // code would look like: // if (sign && i < 0) // prefix = "-"; // num = abs(i); int minint = 0; uint64_t num; if (sign && i < 0) { prefix = "-"; // Turn our number positive. if (i == std::numeric_limits<int64_t>::min()) { // The most negative integer needs special treatment. minint = 1; num = static_cast<uint64_t>(-(i + 1)); } else { // "Normal" negative numbers are easy. num = static_cast<uint64_t>(-i); } } else { num = static_cast<uint64_t>(i); } // If padding with '0' zero, emit the prefix or '-' character now. Otherwise, // make the prefix accessible in reverse order, so that we can later output // it right between padding and the number. // We cannot choose the easier approach of just reversing the number, as that // fails in situations where we need to truncate numbers that have padding // and/or prefixes. const char* reverse_prefix = NULL; if (prefix && *prefix) { if (pad == '0') { while (*prefix) { if (padding) { --padding; } Out(*prefix++); } prefix = NULL; } else { for (reverse_prefix = prefix; *reverse_prefix; ++reverse_prefix) { } } } else prefix = NULL; const size_t prefix_length = reverse_prefix - prefix; // Loop until we have converted the entire number. Output at least one // character (i.e. '0'). size_t start = count_; size_t discarded = 0; bool started = false; do { // Make sure there is still enough space left in our output buffer. if (count_ >= size_) { if (start < size_) { // It is rare that we need to output a partial number. But if asked // to do so, we will still make sure we output the correct number of // leading digits. // Since we are generating the digits in reverse order, we actually // have to discard digits in the order that we have already emitted // them. This is essentially equivalent to: // memmove(buffer_ + start, buffer_ + start + 1, size_ - start - 1) for (char* move = buffer_ + start, *end = buffer_ + size_ - 1; move < end; ++move) { *move = move[1]; } ++discarded; --count_; } else if (count_ - size_ > 1) { // Need to increment either |count_| or |discarded| to make progress. // The latter is more efficient, as it eventually triggers fast // handling of padding. But we have to ensure we don't accidentally // change the overall state (i.e. switch the state-machine from // discarding to non-discarding). |count_| needs to always stay // bigger than |size_|. --count_; ++discarded; } } // Output the next digit and (if necessary) compensate for the most // negative integer needing special treatment. This works because, // no matter the bit width of the integer, the lowest-most decimal // integer always ends in 2, 4, 6, or 8. if (!num && started) { if (reverse_prefix > prefix) { Out(*--reverse_prefix); } else { Out(pad); } } else { started = true; Out((upcase ? kUpCaseHexDigits : kDownCaseHexDigits)[num%base + minint]); } minint = 0; num /= base; // Add padding, if requested. if (padding > 0) { --padding; // Performance optimization for when we are asked to output excessive // padding, but our output buffer is limited in size. Even if we output // a 64bit number in binary, we would never write more than 64 plus // prefix non-padding characters. So, once this limit has been passed, // any further state change can be computed arithmetically; we know that // by this time, our entire final output consists of padding characters // that have all already been output. if (discarded > 8*sizeof(num) + prefix_length) { IncrementCount(padding); padding = 0; } } } while (num || padding || (reverse_prefix > prefix)); // Conversion to ASCII actually resulted in the digits being in reverse // order. We can't easily generate them in forward order, as we can't tell // the number of characters needed until we are done converting. // So, now, we reverse the string (except for the possible '-' sign). char* front = buffer_ + start; char* back = GetInsertionPoint(); while (--back > front) { char ch = *back; *back = *front; *front++ = ch; } IncrementCount(discarded); return !discarded; } } // anonymous namespace namespace internal { ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt, const Arg* args, const size_t max_args) { // Make sure that at least one NUL byte can be written, and that the buffer // never overflows kSSizeMax. Not only does that use up most or all of the // address space, it also would result in a return code that cannot be // represented. if (static_cast<ssize_t>(sz) < 1) { return -1; } else if (sz > kSSizeMax) { sz = kSSizeMax; } // Iterate over format string and interpret '%' arguments as they are // encountered. Buffer buffer(buf, sz); size_t padding; char pad; for (unsigned int cur_arg = 0; *fmt && !buffer.OutOfAddressableSpace(); ) { if (*fmt++ == '%') { padding = 0; pad = ' '; char ch = *fmt++; format_character_found: switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // Found a width parameter. Convert to an integer value and store in // "padding". If the leading digit is a zero, change the padding // character from a space ' ' to a zero '0'. pad = ch == '0' ? '0' : ' '; for (;;) { // The maximum allowed padding fills all the available address // space and leaves just enough space to insert the trailing NUL. const size_t max_padding = kSSizeMax - 1; if (padding > max_padding/10 || 10*padding > max_padding - (ch - '0')) { DEBUG_CHECK(padding <= max_padding/10 && 10*padding <= max_padding - (ch - '0'), ""); // Integer overflow detected. Skip the rest of the width until // we find the format character, then do the normal error handling. padding_overflow: padding = max_padding; while ((ch = *fmt++) >= '0' && ch <= '9') { } if (cur_arg < max_args) { ++cur_arg; } goto fail_to_expand; } padding = 10*padding + ch - '0'; if (padding > max_padding) { // This doesn't happen for "sane" values of kSSizeMax. But once // kSSizeMax gets smaller than about 10, our earlier range checks // are incomplete. Unittests do trigger this artificial corner // case. DEBUG_CHECK(padding <= max_padding, ""); goto padding_overflow; } ch = *fmt++; if (ch < '0' || ch > '9') { // Reached the end of the width parameter. This is where the format // character is found. goto format_character_found; } } break; case 'c': { // Output an ASCII character. // Check that there are arguments left to be inserted. if (cur_arg >= max_args) { DEBUG_CHECK(cur_arg < max_args, ""); goto fail_to_expand; } // Check that the argument has the expected type. const Arg& arg = args[cur_arg++]; if (arg.type != Arg::INT && arg.type != Arg::UINT) { DEBUG_CHECK(arg.type == Arg::INT || arg.type == Arg::UINT, ""); goto fail_to_expand; } // Apply padding, if needed. buffer.Pad(' ', padding, 1); // Convert the argument to an ASCII character and output it. char ch = static_cast<char>(arg.i); if (!ch) { goto end_of_output_buffer; } buffer.Out(ch); break; } case 'd': // Output a possibly signed decimal value. case 'o': // Output an unsigned octal value. case 'x': // Output an unsigned hexadecimal value. case 'X': case 'p': { // Output a pointer value. // Check that there are arguments left to be inserted. if (cur_arg >= max_args) { DEBUG_CHECK(cur_arg < max_args, ""); goto fail_to_expand; } const Arg& arg = args[cur_arg++]; int64_t i; const char* prefix = NULL; if (ch != 'p') { // Check that the argument has the expected type. if (arg.type != Arg::INT && arg.type != Arg::UINT) { DEBUG_CHECK(arg.type == Arg::INT || arg.type == Arg::UINT, ""); goto fail_to_expand; } i = arg.i; if (ch != 'd') { // The Arg() constructor automatically performed sign expansion on // signed parameters. This is great when outputting a %d decimal // number, but can result in unexpected leading 0xFF bytes when // outputting a %x hexadecimal number. Mask bits, if necessary. // We have to do this here, instead of in the Arg() constructor, as // the Arg() constructor cannot tell whether we will output a %d // or a %x. Only the latter should experience masking. if (arg.width < sizeof(int64_t)) { i &= (1LL << (8*arg.width)) - 1; } } } else { // Pointer values require an actual pointer or a string. if (arg.type == Arg::POINTER) { i = reinterpret_cast<uintptr_t>(arg.ptr); } else if (arg.type == Arg::STRING) { i = reinterpret_cast<uintptr_t>(arg.str); } else if (arg.type == Arg::INT && arg.width == sizeof(NULL) && arg.i == 0) { // Allow C++'s version of NULL i = 0; } else { DEBUG_CHECK(arg.type == Arg::POINTER || arg.type == Arg::STRING, ""); goto fail_to_expand; } // Pointers always include the "0x" prefix. prefix = "0x"; } // Use IToASCII() to convert to ASCII representation. For decimal // numbers, optionally print a sign. For hexadecimal numbers, // distinguish between upper and lower case. %p addresses are always // printed as upcase. Supports base 8, 10, and 16. Prints padding // and/or prefixes, if so requested. buffer.IToASCII(ch == 'd' && arg.type == Arg::INT, ch != 'x', i, ch == 'o' ? 8 : ch == 'd' ? 10 : 16, pad, padding, prefix); break; } case 's': { // Check that there are arguments left to be inserted. if (cur_arg >= max_args) { DEBUG_CHECK(cur_arg < max_args, ""); goto fail_to_expand; } // Check that the argument has the expected type. const Arg& arg = args[cur_arg++]; const char *s; if (arg.type == Arg::STRING) { s = arg.str ? arg.str : "<NULL>"; } else if (arg.type == Arg::INT && arg.width == sizeof(NULL) && arg.i == 0) { // Allow C++'s version of NULL s = "<NULL>"; } else { DEBUG_CHECK(arg.type == Arg::STRING, ""); goto fail_to_expand; } // Apply padding, if needed. This requires us to first check the // length of the string that we are outputting. if (padding) { size_t len = 0; for (const char* src = s; *src++; ) { ++len; } buffer.Pad(' ', padding, len); } // Printing a string involves nothing more than copying it into the // output buffer and making sure we don't output more bytes than // available space; Out() takes care of doing that. for (const char* src = s; *src; ) { buffer.Out(*src++); } break; } case '%': // Quoted percent '%' character. goto copy_verbatim; fail_to_expand: // C++ gives us tools to do type checking -- something that snprintf() // could never really do. So, whenever we see arguments that don't // match up with the format string, we refuse to output them. But // since we have to be extremely conservative about being async- // signal-safe, we are limited in the type of error handling that we // can do in production builds (in debug builds we can use // DEBUG_CHECK() and hope for the best). So, all we do is pass the // format string unchanged. That should eventually get the user's // attention; and in the meantime, it hopefully doesn't lose too much // data. default: // Unknown or unsupported format character. Just copy verbatim to // output. buffer.Out('%'); DEBUG_CHECK(ch, ""); if (!ch) { goto end_of_format_string; } buffer.Out(ch); break; } } else { copy_verbatim: buffer.Out(fmt[-1]); } } end_of_format_string: end_of_output_buffer: return buffer.GetCount(); } } // namespace internal ssize_t SafeSNPrintf(char* buf, size_t sz, const char* fmt) { // Make sure that at least one NUL byte can be written, and that the buffer // never overflows kSSizeMax. Not only does that use up most or all of the // address space, it also would result in a return code that cannot be // represented. if (static_cast<ssize_t>(sz) < 1) { return -1; } else if (sz > kSSizeMax) { sz = kSSizeMax; } Buffer buffer(buf, sz); // In the slow-path, we deal with errors by copying the contents of // "fmt" unexpanded. This means, if there are no arguments passed, the // SafeSPrintf() function always degenerates to a version of strncpy() that // de-duplicates '%' characters. const char* src = fmt; for (; *src; ++src) { buffer.Out(*src); DEBUG_CHECK(src[0] != '%' || src[1] == '%', ""); if (src[0] == '%' && src[1] == '%') { ++src; } } return buffer.GetCount(); } } // namespace strings } // namespace butil