/*1* Copyright 2010-2011 PathScale, Inc. All rights reserved.2*3* Redistribution and use in source and binary forms, with or without4* modification, are permitted provided that the following conditions are met:5*6* 1. Redistributions of source code must retain the above copyright notice,7* this list of conditions and the following disclaimer.8*9* 2. Redistributions in binary form must reproduce the above copyright notice,10* this list of conditions and the following disclaimer in the documentation11* and/or other materials provided with the distribution.12*13* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS14* IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,15* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR16* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR17* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,18* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,19* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;20* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,21* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR22* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF23* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.24*/25/**26* dwarf_eh.h - Defines some helper functions for parsing DWARF exception27* handling tables.28*29* This file contains various helper functions that are independent of the30* language-specific code. It can be used in any personality function for the31* Itanium ABI.32*/33#include <assert.h>3435// TODO: Factor out Itanium / ARM differences. We probably want an itanium.h36// and arm.h that can be included by this file depending on the target ABI.3738// _GNU_SOURCE must be defined for unwind.h to expose some of the functions39// that we want. If it isn't, then we define it and undefine it to make sure40// that it doesn't impact the rest of the program.41#ifndef _GNU_SOURCE42# define _GNU_SOURCE 143# include "unwind.h"44# undef _GNU_SOURCE45#else46# include "unwind.h"47#endif4849#include <stdint.h>5051/// Type used for pointers into DWARF data52typedef unsigned char *dw_eh_ptr_t;5354// Flag indicating a signed quantity55#define DW_EH_PE_signed 0x0856/// DWARF data encoding types.57enum dwarf_data_encoding58{59/// Absolute pointer value60DW_EH_PE_absptr = 0x00,61/// Unsigned, little-endian, base 128-encoded (variable length).62DW_EH_PE_uleb128 = 0x01,63/// Unsigned 16-bit integer.64DW_EH_PE_udata2 = 0x02,65/// Unsigned 32-bit integer.66DW_EH_PE_udata4 = 0x03,67/// Unsigned 64-bit integer.68DW_EH_PE_udata8 = 0x04,69/// Signed, little-endian, base 128-encoded (variable length)70DW_EH_PE_sleb128 = DW_EH_PE_uleb128 | DW_EH_PE_signed,71/// Signed 16-bit integer.72DW_EH_PE_sdata2 = DW_EH_PE_udata2 | DW_EH_PE_signed,73/// Signed 32-bit integer.74DW_EH_PE_sdata4 = DW_EH_PE_udata4 | DW_EH_PE_signed,75/// Signed 32-bit integer.76DW_EH_PE_sdata8 = DW_EH_PE_udata8 | DW_EH_PE_signed77};7879/**80* Returns the encoding for a DWARF EH table entry. The encoding is stored in81* the low four of an octet. The high four bits store the addressing mode.82*/83static inline enum dwarf_data_encoding get_encoding(unsigned char x)84{85return static_cast<enum dwarf_data_encoding>(x & 0xf);86}8788/**89* DWARF addressing mode constants. When reading a pointer value from a DWARF90* exception table, you must know how it is stored and what the addressing mode91* is. The low four bits tell you the encoding, allowing you to decode a92* number. The high four bits tell you the addressing mode, allowing you to93* turn that number into an address in memory.94*/95enum dwarf_data_relative96{97/// Value is omitted98DW_EH_PE_omit = 0xff,99/// Value relative to program counter100DW_EH_PE_pcrel = 0x10,101/// Value relative to the text segment102DW_EH_PE_textrel = 0x20,103/// Value relative to the data segment104DW_EH_PE_datarel = 0x30,105/// Value relative to the start of the function106DW_EH_PE_funcrel = 0x40,107/// Aligned pointer (Not supported yet - are they actually used?)108DW_EH_PE_aligned = 0x50,109/// Pointer points to address of real value110DW_EH_PE_indirect = 0x80111};112/**113* Returns the addressing mode component of this encoding.114*/115static inline enum dwarf_data_relative get_base(unsigned char x)116{117return static_cast<enum dwarf_data_relative>(x & 0x70);118}119/**120* Returns whether an encoding represents an indirect address.121*/122static int is_indirect(unsigned char x)123{124return ((x & DW_EH_PE_indirect) == DW_EH_PE_indirect);125}126127/**128* Returns the size of a fixed-size encoding. This function will abort if129* called with a value that is not a fixed-size encoding.130*/131static inline int dwarf_size_of_fixed_size_field(unsigned char type)132{133switch (get_encoding(type))134{135default: abort();136case DW_EH_PE_sdata2:137case DW_EH_PE_udata2: return 2;138case DW_EH_PE_sdata4:139case DW_EH_PE_udata4: return 4;140case DW_EH_PE_sdata8:141case DW_EH_PE_udata8: return 8;142case DW_EH_PE_absptr: return sizeof(void*);143}144}145146/**147* Read an unsigned, little-endian, base-128, DWARF value. Updates *data to148* point to the end of the value. Stores the number of bits read in the value149* pointed to by b, allowing you to determine the value of the highest bit, and150* therefore the sign of a signed value.151*152* This function is not intended to be called directly. Use read_sleb128() or153* read_uleb128() for reading signed and unsigned versions, respectively.154*/155static uint64_t read_leb128(dw_eh_ptr_t *data, int *b)156{157uint64_t uleb = 0;158unsigned int bit = 0;159unsigned char digit = 0;160// We have to read at least one octet, and keep reading until we get to one161// with the high bit unset162do163{164// This check is a bit too strict - we should also check the highest165// bit of the digit.166assert(bit < sizeof(uint64_t) * 8);167// Get the base 128 digit168digit = (**data) & 0x7f;169// Add it to the current value170uleb += digit << bit;171// Increase the shift value172bit += 7;173// Proceed to the next octet174(*data)++;175// Terminate when we reach a value that does not have the high bit set176// (i.e. which was not modified when we mask it with 0x7f)177} while ((*(*data - 1)) != digit);178*b = bit;179180return uleb;181}182183/**184* Reads an unsigned little-endian base-128 value starting at the address185* pointed to by *data. Updates *data to point to the next byte after the end186* of the variable-length value.187*/188static int64_t read_uleb128(dw_eh_ptr_t *data)189{190int b;191return read_leb128(data, &b);192}193194/**195* Reads a signed little-endian base-128 value starting at the address pointed196* to by *data. Updates *data to point to the next byte after the end of the197* variable-length value.198*/199static int64_t read_sleb128(dw_eh_ptr_t *data)200{201int bits;202// Read as if it's signed203uint64_t uleb = read_leb128(data, &bits);204// If the most significant bit read is 1, then we need to sign extend it205if ((uleb >> (bits-1)) == 1)206{207// Sign extend by setting all bits in front of it to 1208uleb |= static_cast<int64_t>(-1) << bits;209}210return static_cast<int64_t>(uleb);211}212/**213* Reads a value using the specified encoding from the address pointed to by214* *data. Updates the value of *data to point to the next byte after the end215* of the data.216*/217static uint64_t read_value(char encoding, dw_eh_ptr_t *data)218{219enum dwarf_data_encoding type = get_encoding(encoding);220switch (type)221{222// Read fixed-length types223#define READ(dwarf, type) \224case dwarf:\225{\226type t;\227memcpy(&t, *data, sizeof t);\228*data += sizeof t;\229return static_cast<uint64_t>(t);\230}231READ(DW_EH_PE_udata2, uint16_t)232READ(DW_EH_PE_udata4, uint32_t)233READ(DW_EH_PE_udata8, uint64_t)234READ(DW_EH_PE_sdata2, int16_t)235READ(DW_EH_PE_sdata4, int32_t)236READ(DW_EH_PE_sdata8, int64_t)237READ(DW_EH_PE_absptr, intptr_t)238#undef READ239// Read variable-length types240case DW_EH_PE_sleb128:241return read_sleb128(data);242case DW_EH_PE_uleb128:243return read_uleb128(data);244default: abort();245}246}247248/**249* Resolves an indirect value. This expects an unwind context, an encoding, a250* decoded value, and the start of the region as arguments. The returned value251* is a pointer to the address identified by the encoded value.252*253* If the encoding does not specify an indirect value, then this returns v.254*/255static uint64_t resolve_indirect_value(_Unwind_Context *c,256unsigned char encoding,257int64_t v,258dw_eh_ptr_t start)259{260switch (get_base(encoding))261{262case DW_EH_PE_pcrel:263v += reinterpret_cast<uint64_t>(start);264break;265case DW_EH_PE_textrel:266v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetTextRelBase(c)));267break;268case DW_EH_PE_datarel:269v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetDataRelBase(c)));270break;271case DW_EH_PE_funcrel:272v += static_cast<uint64_t>(static_cast<uintptr_t>(_Unwind_GetRegionStart(c)));273default:274break;275}276// If this is an indirect value, then it is really the address of the real277// value278// TODO: Check whether this should really always be a pointer - it seems to279// be a GCC extensions, so not properly documented...280if (is_indirect(encoding))281{282v = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(*reinterpret_cast<void**>(v)));283}284return v;285}286287288/**289* Reads an encoding and a value, updating *data to point to the next byte.290*/291static inline void read_value_with_encoding(_Unwind_Context *context,292dw_eh_ptr_t *data,293uint64_t *out)294{295dw_eh_ptr_t start = *data;296unsigned char encoding = *((*data)++);297// If this value is omitted, skip it and don't touch the output value298if (encoding == DW_EH_PE_omit) { return; }299300*out = read_value(encoding, data);301*out = resolve_indirect_value(context, encoding, *out, start);302}303304/**305* Structure storing a decoded language-specific data area. Use parse_lsda()306* to generate an instance of this structure from the address returned by the307* generic unwind library.308*309* You should not need to inspect the fields of this structure directly if you310* are just using this header. The structure stores the locations of the311* various tables used for unwinding exceptions and is used by the functions312* for reading values from these tables.313*/314struct dwarf_eh_lsda315{316/// The start of the region. This is a cache of the value returned by317/// _Unwind_GetRegionStart().318dw_eh_ptr_t region_start;319/// The start of the landing pads table.320dw_eh_ptr_t landing_pads;321/// The start of the type table.322dw_eh_ptr_t type_table;323/// The encoding used for entries in the type tables.324unsigned char type_table_encoding;325/// The location of the call-site table.326dw_eh_ptr_t call_site_table;327/// The location of the action table.328dw_eh_ptr_t action_table;329/// The encoding used for entries in the call-site table.330unsigned char callsite_encoding;331};332333/**334* Parse the header on the language-specific data area and return a structure335* containing the addresses and encodings of the various tables.336*/337static inline struct dwarf_eh_lsda parse_lsda(_Unwind_Context *context,338unsigned char *data)339{340struct dwarf_eh_lsda lsda;341342lsda.region_start = reinterpret_cast<dw_eh_ptr_t>(_Unwind_GetRegionStart(context));343344// If the landing pads are relative to anything other than the start of345// this region, find out where. This is @LPStart in the spec, although the346// encoding that GCC uses does not quite match the spec.347uint64_t v = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(lsda.region_start));348read_value_with_encoding(context, &data, &v);349lsda.landing_pads = reinterpret_cast<dw_eh_ptr_t>(static_cast<uintptr_t>(v));350351// If there is a type table, find out where it is. This is @TTBase in the352// spec. Note: we find whether there is a type table pointer by checking353// whether the leading byte is DW_EH_PE_omit (0xff), which is not what the354// spec says, but does seem to be how G++ indicates this.355lsda.type_table = 0;356lsda.type_table_encoding = *data++;357if (lsda.type_table_encoding != DW_EH_PE_omit)358{359v = read_uleb128(&data);360dw_eh_ptr_t type_table = data;361type_table += v;362lsda.type_table = type_table;363//lsda.type_table = (uintptr_t*)(data + v);364}365#if defined(__arm__) && !defined(__ARM_DWARF_EH__)366lsda.type_table_encoding = (DW_EH_PE_pcrel | DW_EH_PE_indirect);367#endif368369lsda.callsite_encoding = static_cast<enum dwarf_data_encoding>(*(data++));370371// Action table is immediately after the call site table372lsda.action_table = data;373uintptr_t callsite_size = static_cast<uintptr_t>(read_uleb128(&data));374lsda.action_table = data + callsite_size;375// Call site table is immediately after the header376lsda.call_site_table = static_cast<dw_eh_ptr_t>(data);377378379return lsda;380}381382/**383* Structure representing an action to be performed while unwinding. This384* contains the address that should be unwound to and the action record that385* provoked this action.386*/387struct dwarf_eh_action388{389/**390* The address that this action directs should be the new program counter391* value after unwinding.392*/393dw_eh_ptr_t landing_pad;394/// The address of the action record.395dw_eh_ptr_t action_record;396};397398/**399* Look up the landing pad that corresponds to the current invoke.400* Returns true if record exists. The context is provided by the generic401* unwind library and the lsda should be the result of a call to parse_lsda().402*403* The action record is returned via the result parameter.404*/405static bool dwarf_eh_find_callsite(struct _Unwind_Context *context,406struct dwarf_eh_lsda *lsda,407struct dwarf_eh_action *result)408{409result->action_record = 0;410result->landing_pad = 0;411// The current instruction pointer offset within the region412uint64_t ip = _Unwind_GetIP(context) - _Unwind_GetRegionStart(context);413unsigned char *callsite_table = static_cast<unsigned char*>(lsda->call_site_table);414415while (callsite_table <= lsda->action_table)416{417// Once again, the layout deviates from the spec.418uint64_t call_site_start, call_site_size, landing_pad, action;419call_site_start = read_value(lsda->callsite_encoding, &callsite_table);420call_site_size = read_value(lsda->callsite_encoding, &callsite_table);421422// Call site entries are sorted, so if we find a call site that's after423// the current instruction pointer then there is no action associated424// with this call and we should unwind straight through this frame425// without doing anything.426if (call_site_start > ip) { break; }427428// Read the address of the landing pad and the action from the call429// site table.430landing_pad = read_value(lsda->callsite_encoding, &callsite_table);431action = read_uleb128(&callsite_table);432433// We should not include the call_site_start (beginning of the region)434// address in the ip range. For each call site:435//436// address1: call proc437// address2: next instruction438//439// The call stack contains address2 and not address1, address1 can be440// at the end of another EH region.441if (call_site_start < ip && ip <= call_site_start + call_site_size)442{443if (action)444{445// Action records are 1-biased so both no-record and zeroth446// record can be stored.447result->action_record = lsda->action_table + action - 1;448}449// No landing pad means keep unwinding.450if (landing_pad)451{452// Landing pad is the offset from the value in the header453result->landing_pad = lsda->landing_pads + landing_pad;454}455return true;456}457}458return false;459}460461/// Defines an exception class from 8 bytes (endian independent)462#define EXCEPTION_CLASS(a,b,c,d,e,f,g,h) \463((static_cast<uint64_t>(a) << 56) +\464(static_cast<uint64_t>(b) << 48) +\465(static_cast<uint64_t>(c) << 40) +\466(static_cast<uint64_t>(d) << 32) +\467(static_cast<uint64_t>(e) << 24) +\468(static_cast<uint64_t>(f) << 16) +\469(static_cast<uint64_t>(g) << 8) +\470(static_cast<uint64_t>(h)))471472#define GENERIC_EXCEPTION_CLASS(e,f,g,h) \473(static_cast<uint32_t>(e) << 24) +\474(static_cast<uint32_t>(f) << 16) +\475(static_cast<uint32_t>(g) << 8) +\476(static_cast<uint32_t>(h))477478479