Path: blob/main/contrib/llvm-project/llvm/utils/TableGen/Basic/SequenceToOffsetTable.h
35290 views
//===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// SequenceToOffsetTable can be used to emit a number of null-terminated9// sequences as one big array. Use the same memory when a sequence is a suffix10// of another.11//12//===----------------------------------------------------------------------===//1314#ifndef LLVM_UTILS_TABLEGEN_SEQUENCETOOFFSETTABLE_H15#define LLVM_UTILS_TABLEGEN_SEQUENCETOOFFSETTABLE_H1617#include "llvm/Support/CommandLine.h"18#include "llvm/Support/raw_ostream.h"19#include <algorithm>20#include <cassert>21#include <cctype>22#include <functional>23#include <map>2425namespace llvm {26extern llvm::cl::opt<bool> EmitLongStrLiterals;2728static inline void printChar(raw_ostream &OS, char C) {29unsigned char UC(C);30if (isalnum(UC) || ispunct(UC)) {31OS << '\'';32if (C == '\\' || C == '\'')33OS << '\\';34OS << C << '\'';35} else {36OS << unsigned(UC);37}38}3940/// SequenceToOffsetTable - Collect a number of terminated sequences of T.41/// Compute the layout of a table that contains all the sequences, possibly by42/// reusing entries.43///44/// @tparam SeqT The sequence container. (vector or string).45/// @tparam Less A stable comparator for SeqT elements.46template <typename SeqT, typename Less = std::less<typename SeqT::value_type>>47class SequenceToOffsetTable {48typedef typename SeqT::value_type ElemT;4950// Define a comparator for SeqT that sorts a suffix immediately before a51// sequence with that suffix.52struct SeqLess {53Less L;54bool operator()(const SeqT &A, const SeqT &B) const {55return std::lexicographical_compare(A.rbegin(), A.rend(), B.rbegin(),56B.rend(), L);57}58};5960// Keep sequences ordered according to SeqLess so suffixes are easy to find.61// Map each sequence to its offset in the table.62typedef std::map<SeqT, unsigned, SeqLess> SeqMap;6364// Sequences added so far, with suffixes removed.65SeqMap Seqs;6667// Entries in the final table, or 0 before layout was called.68unsigned Entries;6970// isSuffix - Returns true if A is a suffix of B.71static bool isSuffix(const SeqT &A, const SeqT &B) {72return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin());73}7475public:76SequenceToOffsetTable() : Entries(0) {}7778/// add - Add a sequence to the table.79/// This must be called before layout().80void add(const SeqT &Seq) {81assert(Entries == 0 && "Cannot call add() after layout()");82typename SeqMap::iterator I = Seqs.lower_bound(Seq);8384// If SeqMap contains a sequence that has Seq as a suffix, I will be85// pointing to it.86if (I != Seqs.end() && isSuffix(Seq, I->first))87return;8889I = Seqs.insert(I, std::pair(Seq, 0u));9091// The entry before I may be a suffix of Seq that can now be erased.92if (I != Seqs.begin() && isSuffix((--I)->first, Seq))93Seqs.erase(I);94}9596bool empty() const { return Seqs.empty(); }9798unsigned size() const {99assert((empty() || Entries) && "Call layout() before size()");100return Entries;101}102103/// layout - Computes the final table layout.104void layout() {105assert(Entries == 0 && "Can only call layout() once");106// Lay out the table in Seqs iteration order.107for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E;108++I) {109I->second = Entries;110// Include space for a terminator.111Entries += I->first.size() + 1;112}113}114115/// get - Returns the offset of Seq in the final table.116unsigned get(const SeqT &Seq) const {117assert(Entries && "Call layout() before get()");118typename SeqMap::const_iterator I = Seqs.lower_bound(Seq);119assert(I != Seqs.end() && isSuffix(Seq, I->first) &&120"get() called with sequence that wasn't added first");121return I->second + (I->first.size() - Seq.size());122}123124/// `emitStringLiteralDef` - Print out the table as the body of an array125/// initializer, where each element is a C string literal terminated by126/// `\0`. Falls back to emitting a comma-separated integer list if127/// `EmitLongStrLiterals` is false128void emitStringLiteralDef(raw_ostream &OS, const llvm::Twine &Decl) const {129assert(Entries && "Call layout() before emitStringLiteralDef()");130if (!EmitLongStrLiterals) {131OS << Decl << " = {\n";132emit(OS, printChar, "0");133OS << " 0\n};\n\n";134return;135}136137OS << "\n#ifdef __GNUC__\n"138<< "#pragma GCC diagnostic push\n"139<< "#pragma GCC diagnostic ignored \"-Woverlength-strings\"\n"140<< "#endif\n"141<< Decl << " = {\n";142for (auto I : Seqs) {143OS << " /* " << I.second << " */ \"";144OS.write_escaped(I.first);145OS << "\\0\"\n";146}147OS << "};\n"148<< "#ifdef __GNUC__\n"149<< "#pragma GCC diagnostic pop\n"150<< "#endif\n\n";151}152153/// emit - Print out the table as the body of an array initializer.154/// Use the Print function to print elements.155void emit(raw_ostream &OS, void (*Print)(raw_ostream &, ElemT),156const char *Term = "0") const {157assert((empty() || Entries) && "Call layout() before emit()");158for (typename SeqMap::const_iterator I = Seqs.begin(), E = Seqs.end();159I != E; ++I) {160OS << " /* " << I->second << " */ ";161for (typename SeqT::const_iterator SI = I->first.begin(),162SE = I->first.end();163SI != SE; ++SI) {164Print(OS, *SI);165OS << ", ";166}167OS << Term << ",\n";168}169}170};171172} // end namespace llvm173174#endif175176177