Path: blob/main/cddl/contrib/opensolaris/tools/ctf/cvt/output.c
39586 views
/*1* CDDL HEADER START2*3* The contents of this file are subject to the terms of the4* Common Development and Distribution License (the "License").5* You may not use this file except in compliance with the License.6*7* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE8* or http://www.opensolaris.org/os/licensing.9* See the License for the specific language governing permissions10* and limitations under the License.11*12* When distributing Covered Code, include this CDDL HEADER in each13* file and include the License file at usr/src/OPENSOLARIS.LICENSE.14* If applicable, add the following below this CDDL HEADER, with the15* fields enclosed by brackets "[]" replaced with your own identifying16* information: Portions Copyright [yyyy] [name of copyright owner]17*18* CDDL HEADER END19*/20/*21* Copyright 2006 Sun Microsystems, Inc. All rights reserved.22* Use is subject to license terms.23*/2425/*26* Routines for preparing tdata trees for conversion into CTF data, and27* for placing the resulting data into an output file.28*/2930#include <stdio.h>31#include <stdlib.h>32#include <strings.h>33#include <sys/types.h>34#include <sys/stat.h>35#include <fcntl.h>36#include <libelf.h>37#include <gelf.h>38#include <unistd.h>3940#include "ctftools.h"41#include "list.h"42#include "memory.h"43#include "traverse.h"44#include "symbol.h"4546typedef struct iidesc_match {47int iim_fuzzy;48iidesc_t *iim_ret;49char *iim_name;50char *iim_file;51uchar_t iim_bind;52} iidesc_match_t;5354static int55burst_iitypes(void *data, void *arg)56{57iidesc_t *ii = data;58iiburst_t *iiburst = arg;5960switch (ii->ii_type) {61case II_GFUN:62case II_SFUN:63case II_GVAR:64case II_SVAR:65if (!(ii->ii_flags & IIDESC_F_USED))66return (0);67break;68default:69break;70}7172ii->ii_dtype->t_flags |= TDESC_F_ISROOT;73(void) iitraverse_td(ii, iiburst->iib_tdtd);74return (1);75}7677/*ARGSUSED1*/78static int79save_type_by_id(tdesc_t *tdp, tdesc_t **tdpp __unused, void *private)80{81iiburst_t *iiburst = private;8283/*84* Doing this on every node is horribly inefficient, but given that85* we may be suppressing some types, we can't trust nextid in the86* tdata_t.87*/88if (tdp->t_id > iiburst->iib_maxtypeid)89iiburst->iib_maxtypeid = tdp->t_id;9091slist_add(&iiburst->iib_types, tdp, tdesc_idcmp);9293return (1);94}9596static tdtrav_cb_f burst_types_cbs[] = {97NULL,98save_type_by_id, /* intrinsic */99save_type_by_id, /* pointer */100save_type_by_id, /* array */101save_type_by_id, /* function */102save_type_by_id, /* struct */103save_type_by_id, /* union */104save_type_by_id, /* enum */105save_type_by_id, /* forward */106save_type_by_id, /* typedef */107tdtrav_assert, /* typedef_unres */108save_type_by_id, /* volatile */109save_type_by_id, /* const */110save_type_by_id /* restrict */111};112113114static iiburst_t *115iiburst_new(tdata_t *td, int max)116{117iiburst_t *iiburst = xcalloc(sizeof (iiburst_t));118iiburst->iib_td = td;119iiburst->iib_funcs = xcalloc(sizeof (iidesc_t *) * max);120iiburst->iib_nfuncs = 0;121iiburst->iib_objts = xcalloc(sizeof (iidesc_t *) * max);122iiburst->iib_nobjts = 0;123return (iiburst);124}125126static void127iiburst_types(iiburst_t *iiburst)128{129tdtrav_data_t tdtd;130131tdtrav_init(&tdtd, &iiburst->iib_td->td_curvgen, NULL, burst_types_cbs,132NULL, (void *)iiburst);133134iiburst->iib_tdtd = &tdtd;135136(void) hash_iter(iiburst->iib_td->td_iihash, burst_iitypes, iiburst);137}138139static void140iiburst_free(iiburst_t *iiburst)141{142free(iiburst->iib_funcs);143free(iiburst->iib_objts);144list_free(iiburst->iib_types, NULL, NULL);145free(iiburst);146}147148/*149* See if this iidesc matches the ELF symbol data we pass in.150*151* A fuzzy match is where we have a local symbol matching the name of a152* global type description. This is common when a mapfile is used for a153* DSO, but we don't accept it by default.154*155* A weak fuzzy match is when a weak symbol was resolved and matched to156* a global type description.157*/158static int159matching_iidesc(void *arg1, void *arg2)160{161iidesc_t *iidesc = arg1;162iidesc_match_t *match = arg2;163if (streq(iidesc->ii_name, match->iim_name) == 0)164return (0);165166switch (iidesc->ii_type) {167case II_GFUN:168case II_GVAR:169if (match->iim_bind == STB_GLOBAL) {170match->iim_ret = iidesc;171return (-1);172} else if (match->iim_fuzzy && match->iim_ret == NULL) {173match->iim_ret = iidesc;174/* continue to look for strong match */175return (0);176}177break;178case II_SFUN:179case II_SVAR:180if (match->iim_bind == STB_LOCAL &&181match->iim_file != NULL &&182streq(iidesc->ii_owner, match->iim_file)) {183match->iim_ret = iidesc;184return (-1);185}186break;187default:188break;189}190return (0);191}192193static iidesc_t *194find_iidesc(tdata_t *td, iidesc_match_t *match)195{196match->iim_ret = NULL;197iter_iidescs_by_name(td, match->iim_name,198matching_iidesc, match);199return (match->iim_ret);200}201202/*203* If we have a weak symbol, attempt to find the strong symbol it will204* resolve to. Note: the code where this actually happens is in205* sym_process() in cmd/sgs/libld/common/syms.c206*207* Finding the matching symbol is unfortunately not trivial. For a208* symbol to be a candidate, it must:209*210* - have the same type (function, object)211* - have the same value (address)212* - have the same size213* - not be another weak symbol214* - belong to the same section (checked via section index)215*216* If such a candidate is global, then we assume we've found it. The217* linker generates the symbol table such that the curfile might be218* incorrect; this is OK for global symbols, since find_iidesc() doesn't219* need to check for the source file for the symbol.220*221* We might have found a strong local symbol, where the curfile is222* accurate and matches that of the weak symbol. We assume this is a223* reasonable match.224*225* If we've got a local symbol with a non-matching curfile, there are226* two possibilities. Either this is a completely different symbol, or227* it's a once-global symbol that was scoped to local via a mapfile. In228* the latter case, curfile is likely inaccurate since the linker does229* not preserve the needed curfile in the order of the symbol table (see230* the comments about locally scoped symbols in libld's update_osym()).231* As we can't tell this case from the former one, we use this symbol232* iff no other matching symbol is found.233*234* What we really need here is a SUNW section containing weak<->strong235* mappings that we can consume.236*/237static int238check_for_weak(GElf_Sym *weak, char const *weakfile,239Elf_Data *data, int nent, Elf_Data *strdata,240GElf_Sym *retsym, char **curfilep)241{242char *curfile = NULL;243char *tmpfile1 = NULL;244GElf_Sym tmpsym;245int candidate = 0;246int i;247tmpsym.st_info = 0;248tmpsym.st_name = 0;249250if (GELF_ST_BIND(weak->st_info) != STB_WEAK)251return (0);252253for (i = 0; i < nent; i++) {254GElf_Sym sym;255uchar_t type;256257if (gelf_getsym(data, i, &sym) == NULL)258continue;259260type = GELF_ST_TYPE(sym.st_info);261262if (type == STT_FILE)263curfile = (char *)strdata->d_buf + sym.st_name;264265if (GELF_ST_TYPE(weak->st_info) != type ||266weak->st_value != sym.st_value)267continue;268269if (weak->st_size != sym.st_size)270continue;271272if (GELF_ST_BIND(sym.st_info) == STB_WEAK)273continue;274275if (sym.st_shndx != weak->st_shndx)276continue;277278if (GELF_ST_BIND(sym.st_info) == STB_LOCAL &&279(curfile == NULL || weakfile == NULL ||280strcmp(curfile, weakfile) != 0)) {281candidate = 1;282tmpfile1 = curfile;283tmpsym = sym;284continue;285}286287*curfilep = curfile;288*retsym = sym;289return (1);290}291292if (candidate) {293*curfilep = tmpfile1;294*retsym = tmpsym;295return (1);296}297298return (0);299}300301/*302* When we've found the underlying symbol's type description303* for a weak symbol, we need to copy it and rename it to match304* the weak symbol. We also need to add it to the td so it's305* handled along with the others later.306*/307static iidesc_t *308copy_from_strong(tdata_t *td, GElf_Sym *sym, iidesc_t *strongdesc,309const char *weakname, const char *weakfile)310{311iidesc_t *new = iidesc_dup_rename(strongdesc, weakname, weakfile);312uchar_t type = GELF_ST_TYPE(sym->st_info);313314switch (type) {315case STT_OBJECT:316new->ii_type = II_GVAR;317break;318case STT_FUNC:319new->ii_type = II_GFUN;320break;321}322323hash_add(td->td_iihash, new);324325return (new);326}327328/*329* Process the symbol table of the output file, associating each symbol330* with a type description if possible, and sorting them into functions331* and data, maintaining symbol table order.332*/333static iiburst_t *334sort_iidescs(Elf *elf, const char *file, tdata_t *td, int fuzzymatch,335int dynsym)336{337iiburst_t *iiburst;338Elf_Scn *scn;339GElf_Shdr shdr;340Elf_Data *data, *strdata;341int i, stidx;342int nent;343iidesc_match_t match;344345match.iim_fuzzy = fuzzymatch;346match.iim_file = NULL;347348if ((stidx = findelfsecidx(elf, file,349dynsym ? ".dynsym" : ".symtab")) < 0)350terminate("%s: Can't open symbol table\n", file);351scn = elf_getscn(elf, stidx);352data = elf_getdata(scn, NULL);353gelf_getshdr(scn, &shdr);354nent = shdr.sh_size / shdr.sh_entsize;355356scn = elf_getscn(elf, shdr.sh_link);357strdata = elf_getdata(scn, NULL);358359iiburst = iiburst_new(td, nent);360361for (i = 0; i < nent; i++) {362GElf_Sym sym;363char *bname;364iidesc_t **tolist;365GElf_Sym ssym;366iidesc_match_t smatch;367int *curr;368iidesc_t *iidesc;369370if (gelf_getsym(data, i, &sym) == NULL)371elfterminate(file, "Couldn't read symbol %d", i);372373match.iim_name = (char *)strdata->d_buf + sym.st_name;374match.iim_bind = GELF_ST_BIND(sym.st_info);375376switch (GELF_ST_TYPE(sym.st_info)) {377case STT_FILE:378bname = strrchr(match.iim_name, '/');379match.iim_file = bname == NULL ? match.iim_name : bname + 1;380continue;381case STT_OBJECT:382tolist = iiburst->iib_objts;383curr = &iiburst->iib_nobjts;384break;385case STT_FUNC:386tolist = iiburst->iib_funcs;387curr = &iiburst->iib_nfuncs;388break;389default:390continue;391}392393if (ignore_symbol(&sym, match.iim_name))394continue;395396iidesc = find_iidesc(td, &match);397398if (iidesc != NULL) {399tolist[*curr] = iidesc;400iidesc->ii_flags |= IIDESC_F_USED;401(*curr)++;402continue;403}404405if (!check_for_weak(&sym, match.iim_file, data, nent, strdata,406&ssym, &smatch.iim_file)) {407(*curr)++;408continue;409}410411smatch.iim_fuzzy = fuzzymatch;412smatch.iim_name = (char *)strdata->d_buf + ssym.st_name;413smatch.iim_bind = GELF_ST_BIND(ssym.st_info);414415debug(3, "Weak symbol %s resolved to %s\n", match.iim_name,416smatch.iim_name);417418iidesc = find_iidesc(td, &smatch);419420if (iidesc != NULL) {421tolist[*curr] = copy_from_strong(td, &sym,422iidesc, match.iim_name, match.iim_file);423tolist[*curr]->ii_flags |= IIDESC_F_USED;424}425426(*curr)++;427}428429/*430* Stabs are generated for every function declared in a given C source431* file. When converting an object file, we may encounter a stab that432* has no symbol table entry because the optimizer has decided to omit433* that item (for example, an unreferenced static function). We may434* see iidescs that do not have an associated symtab entry, and so435* we do not write records for those functions into the CTF data.436* All others get marked as a root by this function.437*/438iiburst_types(iiburst);439440/*441* By not adding some of the functions and/or objects, we may have442* caused some types that were referenced solely by those443* functions/objects to be suppressed. This could cause a label,444* generated prior to the evisceration, to be incorrect. Find the445* highest type index, and change the label indicies to be no higher446* than this value.447*/448tdata_label_newmax(td, iiburst->iib_maxtypeid);449450return (iiburst);451}452453static void454write_file(Elf *src, const char *srcname, Elf *dst, const char *dstname,455caddr_t ctfdata, size_t ctfsize, int flags)456{457GElf_Ehdr sehdr, dehdr;458Elf_Scn *sscn, *dscn;459Elf_Data *sdata, *ddata;460GElf_Shdr shdr;461GElf_Word symtab_type;462int symtab_idx = -1;463off_t new_offset = 0;464off_t ctfnameoff = 0;465int dynsym = (flags & CTF_USE_DYNSYM);466int keep_stabs = (flags & CTF_KEEP_STABS);467int *secxlate;468int srcidx, dstidx;469int changing = 0;470int pad;471int i;472473if (gelf_newehdr(dst, gelf_getclass(src)) == NULL)474elfterminate(dstname, "Cannot copy ehdr to temp file");475gelf_getehdr(src, &sehdr);476memcpy(&dehdr, &sehdr, sizeof (GElf_Ehdr));477gelf_update_ehdr(dst, &dehdr);478479symtab_type = dynsym ? SHT_DYNSYM : SHT_SYMTAB;480481/*482* Neither the existing stab sections nor the SUNW_ctf sections (new or483* existing) are SHF_ALLOC'd, so they won't be in areas referenced by484* program headers. As such, we can just blindly copy the program485* headers from the existing file to the new file.486*/487if (sehdr.e_phnum != 0) {488(void) elf_flagelf(dst, ELF_C_SET, ELF_F_LAYOUT);489if (gelf_newphdr(dst, sehdr.e_phnum) == NULL)490elfterminate(dstname, "Cannot make phdrs in temp file");491492for (i = 0; i < sehdr.e_phnum; i++) {493GElf_Phdr phdr;494495gelf_getphdr(src, i, &phdr);496gelf_update_phdr(dst, i, &phdr);497}498}499500secxlate = xmalloc(sizeof (int) * sehdr.e_shnum);501for (srcidx = dstidx = 0; srcidx < sehdr.e_shnum; srcidx++) {502Elf_Scn *scn = elf_getscn(src, srcidx);503GElf_Shdr shdr1;504char *sname;505506gelf_getshdr(scn, &shdr1);507sname = elf_strptr(src, sehdr.e_shstrndx, shdr1.sh_name);508if (sname == NULL) {509elfterminate(srcname, "Can't find string at %u",510shdr1.sh_name);511}512513if (strcmp(sname, CTF_ELF_SCN_NAME) == 0) {514secxlate[srcidx] = -1;515} else if (!keep_stabs &&516(strncmp(sname, ".stab", 5) == 0 ||517strncmp(sname, ".debug", 6) == 0 ||518strncmp(sname, ".rel.debug", 10) == 0 ||519strncmp(sname, ".rela.debug", 11) == 0)) {520secxlate[srcidx] = -1;521} else if (dynsym && shdr1.sh_type == SHT_SYMTAB) {522/*523* If we're building CTF against the dynsym,524* we'll rip out the symtab so debuggers aren't525* confused.526*/527secxlate[srcidx] = -1;528} else {529secxlate[srcidx] = dstidx++;530}531532new_offset = (off_t)dehdr.e_phoff;533}534535for (srcidx = 1; srcidx < sehdr.e_shnum; srcidx++) {536char *sname;537538sscn = elf_getscn(src, srcidx);539gelf_getshdr(sscn, &shdr);540541if (secxlate[srcidx] == -1) {542changing = 1;543continue;544}545546dscn = elf_newscn(dst);547548/*549* If this file has program headers, we need to explicitly lay550* out sections. If none of the sections prior to this one have551* been removed, then we can just use the existing location. If552* one or more sections have been changed, then we need to553* adjust this one to avoid holes.554*/555if (changing && sehdr.e_phnum != 0) {556pad = new_offset % shdr.sh_addralign;557558if (pad)559new_offset += shdr.sh_addralign - pad;560shdr.sh_offset = new_offset;561}562563shdr.sh_link = secxlate[shdr.sh_link];564565if (shdr.sh_type == SHT_REL || shdr.sh_type == SHT_RELA)566shdr.sh_info = secxlate[shdr.sh_info];567568sname = elf_strptr(src, sehdr.e_shstrndx, shdr.sh_name);569if (sname == NULL) {570elfterminate(srcname, "Can't find string at %u",571shdr.sh_name);572}573574#ifndef illumos575if (gelf_update_shdr(dscn, &shdr) == 0)576elfterminate(dstname, "Cannot update sect %s", sname);577#endif578579if ((sdata = elf_getdata(sscn, NULL)) == NULL)580elfterminate(srcname, "Cannot get sect %s data", sname);581if ((ddata = elf_newdata(dscn)) == NULL)582elfterminate(dstname, "Can't make sect %s data", sname);583#ifdef illumos584bcopy(sdata, ddata, sizeof (Elf_Data));585#else586/*587* FreeBSD's Elf_Data has private fields which the588* elf_* routines manage. Simply copying the589* entire structure corrupts the data. So we need590* to copy the public fields explictly.591*/592ddata->d_align = sdata->d_align;593ddata->d_off = sdata->d_off;594ddata->d_size = sdata->d_size;595ddata->d_type = sdata->d_type;596ddata->d_version = sdata->d_version;597#endif598599if (srcidx == sehdr.e_shstrndx) {600char seclen = strlen(CTF_ELF_SCN_NAME);601602ddata->d_buf = xmalloc(ddata->d_size + shdr.sh_size +603seclen + 1);604bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size);605strcpy((caddr_t)ddata->d_buf + shdr.sh_size,606CTF_ELF_SCN_NAME);607ctfnameoff = (off_t)shdr.sh_size;608shdr.sh_size += seclen + 1;609ddata->d_size += seclen + 1;610611if (sehdr.e_phnum != 0)612changing = 1;613}614615if (shdr.sh_type == symtab_type && shdr.sh_entsize != 0) {616int nsym = shdr.sh_size / shdr.sh_entsize;617618symtab_idx = secxlate[srcidx];619620ddata->d_buf = xmalloc(shdr.sh_size);621bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size);622623for (i = 0; i < nsym; i++) {624GElf_Sym sym;625short newscn;626627if (gelf_getsym(ddata, i, &sym) == NULL)628printf("Could not get symbol %d\n",i);629630if (sym.st_shndx >= SHN_LORESERVE)631continue;632633if ((newscn = secxlate[sym.st_shndx]) !=634sym.st_shndx) {635sym.st_shndx =636(newscn == -1 ? 1 : newscn);637638gelf_update_sym(ddata, i, &sym);639}640}641}642643#ifndef illumos644if (ddata->d_buf == NULL && sdata->d_buf != NULL) {645ddata->d_buf = xmalloc(shdr.sh_size);646bcopy(sdata->d_buf, ddata->d_buf, shdr.sh_size);647}648#endif649650if (gelf_update_shdr(dscn, &shdr) == 0)651elfterminate(dstname, "Cannot update sect %s", sname);652653new_offset = (off_t)shdr.sh_offset;654if (shdr.sh_type != SHT_NOBITS)655new_offset += shdr.sh_size;656}657658if (symtab_idx == -1) {659terminate("%s: Cannot find %s section\n", srcname,660dynsym ? "SHT_DYNSYM" : "SHT_SYMTAB");661}662663/* Add the ctf section */664dscn = elf_newscn(dst);665gelf_getshdr(dscn, &shdr);666shdr.sh_name = ctfnameoff;667shdr.sh_type = SHT_PROGBITS;668shdr.sh_size = ctfsize;669shdr.sh_link = symtab_idx;670shdr.sh_addralign = 4;671if (changing && sehdr.e_phnum != 0) {672pad = new_offset % shdr.sh_addralign;673674if (pad)675new_offset += shdr.sh_addralign - pad;676677shdr.sh_offset = new_offset;678new_offset += shdr.sh_size;679}680681ddata = elf_newdata(dscn);682ddata->d_buf = ctfdata;683ddata->d_size = ctfsize;684ddata->d_align = shdr.sh_addralign;685ddata->d_off = 0;686687gelf_update_shdr(dscn, &shdr);688689/* update the section header location */690if (sehdr.e_phnum != 0) {691size_t align = gelf_fsize(dst, ELF_T_ADDR, 1, EV_CURRENT);692size_t r = new_offset % align;693694if (r)695new_offset += align - r;696697dehdr.e_shoff = new_offset;698}699700/* commit to disk */701dehdr.e_shstrndx = secxlate[sehdr.e_shstrndx];702gelf_update_ehdr(dst, &dehdr);703if (elf_update(dst, ELF_C_WRITE) < 0)704elfterminate(dstname, "Cannot finalize temp file");705706free(secxlate);707}708709static caddr_t710make_ctf_data(tdata_t *td, Elf *elf, const char *file, size_t *lenp, int flags)711{712iiburst_t *iiburst;713caddr_t data;714715iiburst = sort_iidescs(elf, file, td, flags & CTF_FUZZY_MATCH,716flags & CTF_USE_DYNSYM);717data = ctf_gen(iiburst, lenp, flags & (CTF_COMPRESS | CTF_SWAP_BYTES));718719iiburst_free(iiburst);720721return (data);722}723724void725write_ctf(tdata_t *td, const char *curname, const char *newname, int flags)726{727struct stat st;728Elf *elf = NULL;729Elf *telf = NULL;730GElf_Ehdr ehdr;731caddr_t data;732size_t len;733int fd = -1;734int tfd = -1;735int byteorder;736737(void) elf_version(EV_CURRENT);738if ((fd = open(curname, O_RDONLY)) < 0 || fstat(fd, &st) < 0)739terminate("%s: Cannot open for re-reading", curname);740if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL)741elfterminate(curname, "Cannot re-read");742743if ((tfd = open(newname, O_RDWR | O_CREAT | O_TRUNC, st.st_mode)) < 0)744terminate("Cannot open temp file %s for writing", newname);745if ((telf = elf_begin(tfd, ELF_C_WRITE, NULL)) == NULL)746elfterminate(curname, "Cannot write");747748if (gelf_getehdr(elf, &ehdr)) {749#if BYTE_ORDER == _BIG_ENDIAN750byteorder = ELFDATA2MSB;751#else752byteorder = ELFDATA2LSB;753#endif754/*755* If target and host has the same byte order756* clear byte swapping request757*/758if (ehdr.e_ident[EI_DATA] == byteorder)759flags &= ~CTF_SWAP_BYTES;760}761else762elfterminate(curname, "Failed to get EHDR");763764data = make_ctf_data(td, elf, curname, &len, flags);765write_file(elf, curname, telf, newname, data, len, flags);766free(data);767768elf_end(telf);769elf_end(elf);770(void) close(fd);771(void) close(tfd);772}773774775