Commit c4d37a78 authored by Peter Collingbourne's avatar Peter Collingbourne

Compute base addresses from program headers while reading /proc/self/maps.

We previously had logic to compute the base address from program
headers as part of symbolization. The problem is that we need a correct
base address earlier in order to adjust a PC into the image's address
space, as these addresses can appear in unsymbolized output.

There was previously an assumption that only the mapping that
was lowest in the address space did not need to be adjusted. This
assumption is not guaranteed (for example, the kernel may choose to
map an ET_DYN lowest) and in fact turned out to be wrong in binaries
linked with lld because the first mapping is read-only.

The solution is to move the program header reading logic into the
code that reads /proc/self/maps.

There is a change in semantics for clients that install a callback
using the InstallSymbolizeOpenObjectFileCallback function. Any such
clients will need to return a correct base address from the callback
by reading program headers using code similar to that in the function
OpenObjectFileContainingPcAndGetStartAddress.
parent d72e3be8
......@@ -63,7 +63,10 @@ TEST_LIBS = $(GTEST_LIBS) $(GMOCK_LIBS) $(GFLAGS_LIBS)
## TESTS_ENVIRONMENT sets environment variables for when you run unittest,
## but it only seems to take effect for *binary* unittests (argh!)
TESTS =
TESTS_ENVIRONMENT =
# Set a small stack size so that (at least on Linux) PIEs are mapped at a lower
# address than DSOs. This is used by symbolize_pie_unittest to check that we can
# successfully symbolize PIEs loaded at low addresses.
TESTS_ENVIRONMENT = ulimit -s 8192;
check_SCRIPTS =
# Every time you add a unittest to check_SCRIPTS, add it here too
noinst_SCRIPTS =
......@@ -145,6 +148,14 @@ symbolize_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(TEST_CFLAGS)
symbolize_unittest_LDFLAGS = $(PTHREAD_CFLAGS)
symbolize_unittest_LDADD = libglog.la $(COMMON_LIBS) $(TEST_LIBS)
TESTS += symbolize_pie_unittest
symbolize_pie_unittest_SOURCES = $(gloginclude_HEADERS) \
src/symbolize_unittest.cc
nodist_symbolize_pie_unittest_SOURCES = $(nodist_gloginclude_HEADERS)
symbolize_pie_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(TEST_CFLAGS) -fPIE
symbolize_pie_unittest_LDFLAGS = $(PTHREAD_CFLAGS) -pie
symbolize_pie_unittest_LDADD = libglog.la $(COMMON_LIBS) $(TEST_LIBS)
TESTS += stl_logging_unittest
stl_logging_unittest_SOURCES = $(gloginclude_HEADERS) \
src/stl_logging_unittest.cc
......
......@@ -56,6 +56,8 @@
#if defined(HAVE_SYMBOLIZE)
#include <string.h>
#include <limits>
#include "symbolize.h"
......@@ -325,41 +327,17 @@ FindSymbol(uint64_t pc, const int fd, char *out, int out_size,
// both regular and dynamic symbol tables if necessary. On success,
// write the symbol name to "out" and return true. Otherwise, return
// false.
static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
char *out, int out_size,
uint64_t map_base_address) {
static bool GetSymbolFromObjectFile(const int fd,
uint64_t pc,
char* out,
int out_size,
uint64_t base_address) {
// Read the ELF header.
ElfW(Ehdr) elf_header;
if (!ReadFromOffsetExact(fd, &elf_header, sizeof(elf_header), 0)) {
return false;
}
uint64_t symbol_offset = 0;
if (elf_header.e_type == ET_DYN) { // DSO needs offset adjustment.
ElfW(Phdr) phdr;
// We need to find the PT_LOAD segment corresponding to the read-execute
// file mapping in order to correctly perform the offset adjustment.
for (unsigned i = 0; i != elf_header.e_phnum; ++i) {
if (!ReadFromOffsetExact(fd, &phdr, sizeof(phdr),
elf_header.e_phoff + i * sizeof(phdr)))
return false;
if (phdr.p_type == PT_LOAD &&
(phdr.p_flags & (PF_R | PF_X)) == (PF_R | PF_X)) {
// Find the mapped address corresponding to virtual address zero. We do
// this by first adding p_offset. This gives us the mapped address of
// the start of the segment, or in other words the mapped address
// corresponding to the virtual address of the segment. (Note that this
// is distinct from the start address, as p_offset is not guaranteed to
// be page aligned.) We then subtract p_vaddr, which takes us to virtual
// address zero.
symbol_offset = map_base_address + phdr.p_offset - phdr.p_vaddr;
break;
}
}
if (symbol_offset == 0)
return false;
}
ElfW(Shdr) symtab, strtab;
// Consult a regular symbol table first.
......@@ -369,8 +347,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
symtab.sh_link * sizeof(symtab))) {
return false;
}
if (FindSymbol(pc, fd, out, out_size, symbol_offset,
&strtab, &symtab)) {
if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
return true; // Found the symbol in a regular symbol table.
}
}
......@@ -382,8 +359,7 @@ static bool GetSymbolFromObjectFile(const int fd, uint64_t pc,
symtab.sh_link * sizeof(symtab))) {
return false;
}
if (FindSymbol(pc, fd, out, out_size, symbol_offset,
&strtab, &symtab)) {
if (FindSymbol(pc, fd, out, out_size, base_address, &strtab, &symtab)) {
return true; // Found the symbol in a dynamic symbol table.
}
}
......@@ -532,7 +508,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
int out_file_name_size) {
int object_fd;
// Open /proc/self/maps.
int maps_fd;
NO_INTR(maps_fd = open("/proc/self/maps", O_RDONLY));
FileDescriptor wrapped_maps_fd(maps_fd);
......@@ -540,6 +515,13 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
return -1;
}
int mem_fd;
NO_INTR(mem_fd = open("/proc/self/mem", O_RDONLY));
FileDescriptor wrapped_mem_fd(mem_fd);
if (wrapped_mem_fd.get() < 0) {
return -1;
}
// Iterate over maps and look for the map containing the pc. Then
// look into the symbol tables inside.
char buf[1024]; // Big enough for line of sane /proc/self/maps
......@@ -575,11 +557,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
}
++cursor; // Skip ' '.
// Check start and end addresses.
if (!(start_address <= pc && pc < end_address)) {
continue; // We skip this map. PC isn't in this map.
}
// Read flags. Skip flags until we encounter a space or eol.
const char * const flags_start = cursor;
while (cursor < eol && *cursor != ' ') {
......@@ -590,6 +567,49 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
return -1; // Malformed line.
}
// Determine the base address by reading ELF headers in process memory.
ElfW(Ehdr) ehdr;
// Skip non-readable maps.
if (flags_start[0] == 'r' &&
ReadFromOffsetExact(mem_fd, &ehdr, sizeof(ElfW(Ehdr)), start_address) &&
memcmp(ehdr.e_ident, ELFMAG, SELFMAG) == 0) {
switch (ehdr.e_type) {
case ET_EXEC:
base_address = 0;
break;
case ET_DYN:
// Find the segment containing file offset 0. This will correspond
// to the ELF header that we just read. Normally this will have
// virtual address 0, but this is not guaranteed. We must subtract
// the virtual address from the address where the ELF header was
// mapped to get the base address.
//
// If we fail to find a segment for file offset 0, use the address
// of the ELF header as the base address.
base_address = start_address;
for (unsigned i = 0; i != ehdr.e_phnum; ++i) {
ElfW(Phdr) phdr;
if (ReadFromOffsetExact(
mem_fd, &phdr, sizeof(phdr),
start_address + ehdr.e_phoff + i * sizeof(phdr)) &&
phdr.p_type == PT_LOAD && phdr.p_offset == 0) {
base_address = start_address - phdr.p_vaddr;
break;
}
}
break;
default:
// ET_REL or ET_CORE. These aren't directly executable, so they don't
// affect the base address.
break;
}
}
// Check start and end addresses.
if (!(start_address <= pc && pc < end_address)) {
continue; // We skip this map. PC isn't in this map.
}
// Check flags. We are only interested in "r*x" maps.
if (flags_start[0] != 'r' || flags_start[2] != 'x') {
continue; // We skip this map.
......@@ -604,19 +624,6 @@ OpenObjectFileContainingPcAndGetStartAddress(uint64_t pc,
}
++cursor; // Skip ' '.
// Don't subtract 'start_address' from the first entry:
// * If a binary is compiled w/o -pie, then the first entry in
// process maps is likely the binary itself (all dynamic libs
// are mapped higher in address space). For such a binary,
// instruction offset in binary coincides with the actual
// instruction address in virtual memory (as code section
// is mapped to a fixed memory range).
// * If a binary is compiled with -pie, all the modules are
// mapped high at address space (in particular, higher than
// shadow memory of the tool), so the module can't be the
// first entry.
base_address = ((num_maps == 1) ? 0U : start_address) - file_offset;
// Skip to file name. "cursor" now points to dev. We need to
// skip at least two spaces for dev and inode.
int num_spaces = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment