Path: blob/master/src/hotspot/share/classfile/compactHashtable.hpp
40949 views
/*1* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation.7*8* This code is distributed in the hope that it will be useful, but WITHOUT9* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or10* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License11* version 2 for more details (a copy is included in the LICENSE file that12* accompanied this code).13*14* You should have received a copy of the GNU General Public License version15* 2 along with this work; if not, write to the Free Software Foundation,16* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.17*18* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA19* or visit www.oracle.com if you need additional information or have any20* questions.21*22*/2324#ifndef SHARE_CLASSFILE_COMPACTHASHTABLE_HPP25#define SHARE_CLASSFILE_COMPACTHASHTABLE_HPP2627#include "oops/array.hpp"28#include "oops/symbol.hpp"29#include "runtime/globals.hpp"30#include "utilities/growableArray.hpp"313233template <34typename K,35typename V,36V (*DECODE)(address base_address, u4 offset),37bool (*EQUALS)(V value, K key, int len)38>39class CompactHashtable;40class NumberSeq;41class SimpleCompactHashtable;42class SerializeClosure;4344// Stats for symbol tables in the CDS archive45class CompactHashtableStats {46public:47int hashentry_count;48int hashentry_bytes;49int bucket_count;50int bucket_bytes;5152CompactHashtableStats() :53hashentry_count(0), hashentry_bytes(0),54bucket_count(0), bucket_bytes(0) {}55};5657#if INCLUDE_CDS58/////////////////////////////////////////////////////////////////////////59//60// The compact hash table writer. Used at dump time for writing out61// the compact table to the shared archive.62//63// At dump time, the CompactHashtableWriter obtains all entries from the64// symbol/string table and adds them to a new temporary hash table. The hash65// table size (number of buckets) is calculated using66// '(num_entries + bucket_size - 1) / bucket_size'. The default bucket67// size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.68// 4 is chosen because it produces smaller sized bucket on average for69// faster lookup. It also has relatively small number of empty buckets and70// good distribution of the entries.71//72// We use a simple hash function (hash % num_bucket) for the table.73// The new table is compacted when written out. Please see comments74// above the CompactHashtable class for the table layout detail. The bucket75// offsets are written to the archive as part of the compact table. The76// bucket offset is encoded in the low 30-bit (0-29) and the bucket type77// (regular or compact) are encoded in bit[31, 30]. For buckets with more78// than one entry, both hash and entry offset are written to the79// table. For buckets with only one entry, only the entry offset is written80// to the table and the buckets are tagged as compact in their type bits.81// Buckets without entry are skipped from the table. Their offsets are82// still written out for faster lookup.83//84class CompactHashtableWriter: public StackObj {85public:86class Entry {87unsigned int _hash;88u4 _value;8990public:91Entry() {}92Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {}9394u4 value() {95return _value;96}97unsigned int hash() {98return _hash;99}100101bool operator==(const CompactHashtableWriter::Entry& other) {102return (_value == other._value && _hash == other._hash);103}104}; // class CompactHashtableWriter::Entry105106private:107int _num_entries_written;108int _num_buckets;109int _num_empty_buckets;110int _num_value_only_buckets;111int _num_other_buckets;112GrowableArray<Entry>** _buckets;113CompactHashtableStats* _stats;114Array<u4>* _compact_buckets;115Array<u4>* _compact_entries;116117public:118// This is called at dump-time only119CompactHashtableWriter(int num_entries, CompactHashtableStats* stats);120~CompactHashtableWriter();121122void add(unsigned int hash, u4 value);123124private:125void allocate_table();126void dump_table(NumberSeq* summary);127static int calculate_num_buckets(int num_entries) {128int num_buckets = num_entries / SharedSymbolTableBucketSize;129// calculation of num_buckets can result in zero buckets, we need at least one130return (num_buckets < 1) ? 1 : num_buckets;131}132133public:134void dump(SimpleCompactHashtable *cht, const char* table_name);135136static size_t estimate_size(int num_entries);137};138#endif // INCLUDE_CDS139140#define REGULAR_BUCKET_TYPE 0141#define VALUE_ONLY_BUCKET_TYPE 1142#define TABLEEND_BUCKET_TYPE 3143#define BUCKET_OFFSET_MASK 0x3FFFFFFF144#define BUCKET_OFFSET(info) ((info) & BUCKET_OFFSET_MASK)145#define BUCKET_TYPE_SHIFT 30146#define BUCKET_TYPE(info) (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)147#define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK))148149/////////////////////////////////////////////////////////////////////////////150//151// CompactHashtable is used to store the CDS archive's symbol/string tables.152//153// Because these tables are read-only (no entries can be added/deleted) at run-time154// and tend to have large number of entries, we try to minimize the footprint155// cost per entry.156//157// The CompactHashtable is split into two arrays158//159// u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset160// u4 entries[<variable size>]161//162// The size of buckets[] is 'num_buckets + 1'. Each entry of163// buckets[] is a 32-bit encoding of the bucket type and bucket offset,164// with the type in the left-most 2-bit and offset in the remaining 30-bit.165// The last entry is a special type. It contains the end of the last166// bucket.167//168// There are two types of buckets, regular buckets and value_only buckets. The169// value_only buckets have '01' in their highest 2-bit, and regular buckets have170// '00' in their highest 2-bit.171//172// For normal buckets, each entry is 8 bytes in the entries[]:173// u4 hash; /* symbol/string hash */174// union {175// u4 offset; /* Symbol* sym = (Symbol*)(base_address + offset) */176// narrowOop str; /* String narrowOop encoding */177// }178//179//180// For value_only buckets, each entry has only the 4-byte 'offset' in the entries[].181//182// Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code183// is skipped.184// buckets[0, 4, 5, ....]185// | | |186// | | +---+187// | | |188// | +----+ |189// v v v190// entries[H,O,H,O,O,H,O,H,O.....]191//192// See CompactHashtable::lookup() for how the table is searched at runtime.193// See CompactHashtableWriter::dump() for how the table is written at CDS194// dump time.195//196class SimpleCompactHashtable {197protected:198address _base_address;199u4 _bucket_count;200u4 _entry_count;201u4* _buckets;202u4* _entries;203204public:205SimpleCompactHashtable() {206_entry_count = 0;207_bucket_count = 0;208_buckets = 0;209_entries = 0;210}211212void reset() {213_bucket_count = 0;214_entry_count = 0;215_buckets = 0;216_entries = 0;217}218219void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries);220221// Read/Write the table's header from/to the CDS archive222void serialize_header(SerializeClosure* soc) NOT_CDS_RETURN;223224inline bool empty() const {225return (_entry_count == 0);226}227228inline size_t entry_count() const {229return _entry_count;230}231232static size_t calculate_header_size();233};234235template <236typename K,237typename V,238V (*DECODE)(address base_address, u4 offset),239bool (*EQUALS)(V value, K key, int len)240>241class CompactHashtable : public SimpleCompactHashtable {242friend class VMStructs;243244V decode(u4 offset) const {245return DECODE(_base_address, offset);246}247248public:249// Lookup a value V from the compact table using key K250inline V lookup(K key, unsigned int hash, int len) const {251if (_entry_count > 0) {252int index = hash % _bucket_count;253u4 bucket_info = _buckets[index];254u4 bucket_offset = BUCKET_OFFSET(bucket_info);255int bucket_type = BUCKET_TYPE(bucket_info);256u4* entry = _entries + bucket_offset;257258if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {259V value = decode(entry[0]);260if (EQUALS(value, key, len)) {261return value;262}263} else {264// This is a regular bucket, which has more than one265// entries. Each entry is a pair of entry (hash, offset).266// Seek until the end of the bucket.267u4* entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]);268while (entry < entry_max) {269unsigned int h = (unsigned int)(entry[0]);270if (h == hash) {271V value = decode(entry[1]);272if (EQUALS(value, key, len)) {273return value;274}275}276entry += 2;277}278}279}280return NULL;281}282283template <class ITER>284inline void iterate(ITER* iter) const {285for (u4 i = 0; i < _bucket_count; i++) {286u4 bucket_info = _buckets[i];287u4 bucket_offset = BUCKET_OFFSET(bucket_info);288int bucket_type = BUCKET_TYPE(bucket_info);289u4* entry = _entries + bucket_offset;290291if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {292iter->do_value(decode(entry[0]));293} else {294u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]);295while (entry < entry_max) {296iter->do_value(decode(entry[1]));297entry += 2;298}299}300}301}302303void print_table_statistics(outputStream* st, const char* name) {304st->print_cr("%s statistics:", name);305int total_entries = 0;306int max_bucket = 0;307for (u4 i = 0; i < _bucket_count; i++) {308u4 bucket_info = _buckets[i];309int bucket_type = BUCKET_TYPE(bucket_info);310int bucket_size;311312if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {313bucket_size = 1;314} else {315bucket_size = (BUCKET_OFFSET(_buckets[i + 1]) - BUCKET_OFFSET(bucket_info)) / 2;316}317total_entries += bucket_size;318if (max_bucket < bucket_size) {319max_bucket = bucket_size;320}321}322st->print_cr("Number of buckets : %9d", _bucket_count);323st->print_cr("Number of entries : %9d", total_entries);324st->print_cr("Maximum bucket size : %9d", max_bucket);325}326};327328////////////////////////////////////////////////////////////////////////329//330// OffsetCompactHashtable -- This is used to store many types of objects331// in the CDS archive. On 64-bit platforms, we save space by using a 32-bit332// offset from the CDS base address.333334template <typename V>335inline V read_value_from_compact_hashtable(address base_address, u4 offset) {336return (V)(base_address + offset);337}338339template <340typename K,341typename V,342bool (*EQUALS)(V value, K key, int len)343>344class OffsetCompactHashtable : public CompactHashtable<345K, V, read_value_from_compact_hashtable<V>, EQUALS> {346};347348349////////////////////////////////////////////////////////////////////////350//351// Read/Write the contents of a hashtable textual dump (created by352// SymbolTable::dump and StringTable::dump).353// Because the dump file may be big (hundred of MB in extreme cases),354// we use mmap for fast access when reading it.355//356class HashtableTextDump {357int _fd;358const char* _base;359const char* _p;360const char* _end;361const char* _filename;362size_t _size;363int _prefix_type;364int _line_no;365public:366HashtableTextDump(const char* filename);367~HashtableTextDump();368369enum {370SymbolPrefix = 1 << 0,371StringPrefix = 1 << 1,372Unknown = 1 << 2373};374375void quit(const char* err, const char* msg);376377inline int remain() {378return (int)(_end - _p);379}380int last_line_no() {381return _line_no - 1;382}383384void corrupted(const char *p, const char *msg);385386inline void corrupted_if(bool cond, const char *msg) {387if (cond) {388corrupted(_p, msg);389}390}391392bool skip_newline();393int skip(char must_be_char);394void skip_past(char c);395void check_version(const char* ver);396397inline void get_num(char delim, int *num) {398const char* p = _p;399const char* end = _end;400u8 n = 0;401402while (p < end) {403char c = *p++;404if ('0' <= c && c <= '9') {405n = n * 10 + (c - '0');406if (n > (u8)INT_MAX) {407corrupted(_p, "Num overflow");408}409} else if (c == delim) {410_p = p;411*num = (int)n;412return;413} else {414// Not [0-9], not 'delim'415corrupted(_p, "Unrecognized format");;416}417}418419corrupted(_end, "Incorrect format");420ShouldNotReachHere();421}422423void scan_prefix_type();424int scan_prefix(int* utf8_length);425int scan_string_prefix();426int scan_symbol_prefix();427428jchar unescape(const char* from, const char* end, int count);429void get_utf8(char* utf8_buffer, int utf8_length);430static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);431};432433#endif // SHARE_CLASSFILE_COMPACTHASHTABLE_HPP434435436