Path: blob/main/contrib/llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h
39638 views
//===-- PythonDataObjects.h--------------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78//9// !! FIXME FIXME FIXME !!10//11// Python APIs nearly all can return an exception. They do this12// by returning NULL, or -1, or some such value and setting13// the exception state with PyErr_Set*(). Exceptions must be14// handled before further python API functions are called. Failure15// to do so will result in asserts on debug builds of python.16// It will also sometimes, but not usually result in crashes of17// release builds.18//19// Nearly all the code in this header does not handle python exceptions20// correctly. It should all be converted to return Expected<> or21// Error types to capture the exception.22//23// Everything in this file except functions that return Error or24// Expected<> is considered deprecated and should not be25// used in new code. If you need to use it, fix it first.26//27//28// TODOs for this file29//30// * Make all methods safe for exceptions.31//32// * Eliminate method signatures that must translate exceptions into33// empty objects or NULLs. Almost everything here should return34// Expected<>. It should be acceptable for certain operations that35// can never fail to assert instead, such as the creation of36// PythonString from a string literal.37//38// * Eliminate Reset(), and make all non-default constructors private.39// Python objects should be created with Retain<> or Take<>, and they40// should be assigned with operator=41//42// * Eliminate default constructors, make python objects always43// nonnull, and use optionals where necessary.44//454647#ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H48#define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H4950#include "lldb/Host/Config.h"5152#if LLDB_ENABLE_PYTHON5354// LLDB Python header must be included first55#include "lldb-python.h"5657#include "lldb/Host/File.h"58#include "lldb/Utility/StructuredData.h"5960#include "llvm/ADT/ArrayRef.h"6162namespace lldb_private {63namespace python {6465class PythonObject;66class PythonBytes;67class PythonString;68class PythonList;69class PythonDictionary;70class PythonInteger;71class PythonException;7273class GIL {74public:75GIL() {76m_state = PyGILState_Ensure();77assert(!PyErr_Occurred());78}79~GIL() { PyGILState_Release(m_state); }8081protected:82PyGILState_STATE m_state;83};8485enum class PyObjectType {86Unknown,87None,88Boolean,89Integer,90Dictionary,91List,92String,93Bytes,94ByteArray,95Module,96Callable,97Tuple,98File99};100101enum class PyRefType {102Borrowed, // We are not given ownership of the incoming PyObject.103// We cannot safely hold it without calling Py_INCREF.104Owned // We have ownership of the incoming PyObject. We should105// not call Py_INCREF.106};107108109// Take a reference that you already own, and turn it into110// a PythonObject.111//112// Most python API methods will return a +1 reference113// if they succeed or NULL if and only if114// they set an exception. Use this to collect such return115// values, after checking for NULL.116//117// If T is not just PythonObject, then obj must be already be118// checked to be of the correct type.119template <typename T> T Take(PyObject *obj) {120assert(obj);121assert(!PyErr_Occurred());122T thing(PyRefType::Owned, obj);123assert(thing.IsValid());124return thing;125}126127// Retain a reference you have borrowed, and turn it into128// a PythonObject.129//130// A minority of python APIs return a borrowed reference131// instead of a +1. They will also return NULL if and only132// if they set an exception. Use this to collect such return133// values, after checking for NULL.134//135// If T is not just PythonObject, then obj must be already be136// checked to be of the correct type.137template <typename T> T Retain(PyObject *obj) {138assert(obj);139assert(!PyErr_Occurred());140T thing(PyRefType::Borrowed, obj);141assert(thing.IsValid());142return thing;143}144145// This class can be used like a utility function to convert from146// a llvm-friendly Twine into a null-terminated const char *,147// which is the form python C APIs want their strings in.148//149// Example:150// const llvm::Twine &some_twine;151// PyFoo_Bar(x, y, z, NullTerminated(some_twine));152//153// Why a class instead of a function? If the twine isn't already null154// terminated, it will need a temporary buffer to copy the string155// into. We need that buffer to stick around for the lifetime of the156// statement.157class NullTerminated {158const char *str;159llvm::SmallString<32> storage;160161public:162NullTerminated(const llvm::Twine &twine) {163llvm::StringRef ref = twine.toNullTerminatedStringRef(storage);164str = ref.begin();165}166operator const char *() { return str; }167};168169inline llvm::Error nullDeref() {170return llvm::createStringError(llvm::inconvertibleErrorCode(),171"A NULL PyObject* was dereferenced");172}173174inline llvm::Error exception(const char *s = nullptr) {175return llvm::make_error<PythonException>(s);176}177178inline llvm::Error keyError() {179return llvm::createStringError(llvm::inconvertibleErrorCode(),180"key not in dict");181}182183inline const char *py2_const_cast(const char *s) { return s; }184185enum class PyInitialValue { Invalid, Empty };186187// DOC: https://docs.python.org/3/c-api/arg.html#building-values188template <typename T, typename Enable = void> struct PythonFormat;189190template <typename T, char F> struct PassthroughFormat {191static constexpr char format = F;192static constexpr T get(T t) { return t; }193};194195template <> struct PythonFormat<char *> : PassthroughFormat<char *, 's'> {};196template <> struct PythonFormat<const char *> :197PassthroughFormat<const char *, 's'> {};198template <> struct PythonFormat<char> : PassthroughFormat<char, 'b'> {};199template <>200struct PythonFormat<unsigned char> : PassthroughFormat<unsigned char, 'B'> {};201template <> struct PythonFormat<short> : PassthroughFormat<short, 'h'> {};202template <>203struct PythonFormat<unsigned short> : PassthroughFormat<unsigned short, 'H'> {};204template <> struct PythonFormat<int> : PassthroughFormat<int, 'i'> {};205template <> struct PythonFormat<bool> : PassthroughFormat<bool, 'p'> {};206template <>207struct PythonFormat<unsigned int> : PassthroughFormat<unsigned int, 'I'> {};208template <> struct PythonFormat<long> : PassthroughFormat<long, 'l'> {};209template <>210struct PythonFormat<unsigned long> : PassthroughFormat<unsigned long, 'k'> {};211template <>212struct PythonFormat<long long> : PassthroughFormat<long long, 'L'> {};213template <>214struct PythonFormat<unsigned long long>215: PassthroughFormat<unsigned long long, 'K'> {};216template <>217struct PythonFormat<PyObject *> : PassthroughFormat<PyObject *, 'O'> {};218219template <typename T>220struct PythonFormat<221T, typename std::enable_if<std::is_base_of<PythonObject, T>::value>::type> {222static constexpr char format = 'O';223static auto get(const T &value) { return value.get(); }224};225226class PythonObject {227public:228PythonObject() = default;229230PythonObject(PyRefType type, PyObject *py_obj) {231m_py_obj = py_obj;232// If this is a borrowed reference, we need to convert it to233// an owned reference by incrementing it. If it is an owned234// reference (for example the caller allocated it with PyDict_New()235// then we must *not* increment it.236if (m_py_obj && Py_IsInitialized() && type == PyRefType::Borrowed)237Py_XINCREF(m_py_obj);238}239240PythonObject(const PythonObject &rhs)241: PythonObject(PyRefType::Borrowed, rhs.m_py_obj) {}242243PythonObject(PythonObject &&rhs) {244m_py_obj = rhs.m_py_obj;245rhs.m_py_obj = nullptr;246}247248~PythonObject() { Reset(); }249250void Reset();251252void Dump() const {253if (m_py_obj)254_PyObject_Dump(m_py_obj);255else256puts("NULL");257}258259void Dump(Stream &strm) const;260261PyObject *get() const { return m_py_obj; }262263PyObject *release() {264PyObject *result = m_py_obj;265m_py_obj = nullptr;266return result;267}268269PythonObject &operator=(PythonObject other) {270Reset();271m_py_obj = std::exchange(other.m_py_obj, nullptr);272return *this;273}274275PyObjectType GetObjectType() const;276277PythonString Repr() const;278279PythonString Str() const;280281static PythonObject ResolveNameWithDictionary(llvm::StringRef name,282const PythonDictionary &dict);283284template <typename T>285static T ResolveNameWithDictionary(llvm::StringRef name,286const PythonDictionary &dict) {287return ResolveNameWithDictionary(name, dict).AsType<T>();288}289290PythonObject ResolveName(llvm::StringRef name) const;291292template <typename T> T ResolveName(llvm::StringRef name) const {293return ResolveName(name).AsType<T>();294}295296bool HasAttribute(llvm::StringRef attribute) const;297298PythonObject GetAttributeValue(llvm::StringRef attribute) const;299300bool IsNone() const { return m_py_obj == Py_None; }301302bool IsValid() const { return m_py_obj != nullptr; }303304bool IsAllocated() const { return IsValid() && !IsNone(); }305306explicit operator bool() const { return IsValid() && !IsNone(); }307308template <typename T> T AsType() const {309if (!T::Check(m_py_obj))310return T();311return T(PyRefType::Borrowed, m_py_obj);312}313314StructuredData::ObjectSP CreateStructuredObject() const;315316template <typename... T>317llvm::Expected<PythonObject> CallMethod(const char *name,318const T &... t) const {319const char format[] = {'(', PythonFormat<T>::format..., ')', 0};320PyObject *obj =321PyObject_CallMethod(m_py_obj, py2_const_cast(name),322py2_const_cast(format), PythonFormat<T>::get(t)...);323if (!obj)324return exception();325return python::Take<PythonObject>(obj);326}327328template <typename... T>329llvm::Expected<PythonObject> Call(const T &... t) const {330const char format[] = {'(', PythonFormat<T>::format..., ')', 0};331PyObject *obj = PyObject_CallFunction(m_py_obj, py2_const_cast(format),332PythonFormat<T>::get(t)...);333if (!obj)334return exception();335return python::Take<PythonObject>(obj);336}337338llvm::Expected<PythonObject> GetAttribute(const llvm::Twine &name) const {339if (!m_py_obj)340return nullDeref();341PyObject *obj = PyObject_GetAttrString(m_py_obj, NullTerminated(name));342if (!obj)343return exception();344return python::Take<PythonObject>(obj);345}346347llvm::Expected<PythonObject> GetType() const {348if (!m_py_obj)349return nullDeref();350PyObject *obj = PyObject_Type(m_py_obj);351if (!obj)352return exception();353return python::Take<PythonObject>(obj);354}355356llvm::Expected<bool> IsTrue() {357if (!m_py_obj)358return nullDeref();359int r = PyObject_IsTrue(m_py_obj);360if (r < 0)361return exception();362return !!r;363}364365llvm::Expected<long long> AsLongLong() const;366367llvm::Expected<unsigned long long> AsUnsignedLongLong() const;368369// wraps on overflow, instead of raising an error.370llvm::Expected<unsigned long long> AsModuloUnsignedLongLong() const;371372llvm::Expected<bool> IsInstance(const PythonObject &cls) {373if (!m_py_obj || !cls.IsValid())374return nullDeref();375int r = PyObject_IsInstance(m_py_obj, cls.get());376if (r < 0)377return exception();378return !!r;379}380381protected:382PyObject *m_py_obj = nullptr;383};384385386// This is why C++ needs monads.387template <typename T> llvm::Expected<T> As(llvm::Expected<PythonObject> &&obj) {388if (!obj)389return obj.takeError();390if (!T::Check(obj.get().get()))391return llvm::createStringError(llvm::inconvertibleErrorCode(),392"type error");393return T(PyRefType::Borrowed, std::move(obj.get().get()));394}395396template <> llvm::Expected<bool> As<bool>(llvm::Expected<PythonObject> &&obj);397398template <>399llvm::Expected<long long> As<long long>(llvm::Expected<PythonObject> &&obj);400401template <>402llvm::Expected<unsigned long long>403As<unsigned long long>(llvm::Expected<PythonObject> &&obj);404405template <>406llvm::Expected<std::string> As<std::string>(llvm::Expected<PythonObject> &&obj);407408409template <class T> class TypedPythonObject : public PythonObject {410public:411TypedPythonObject(PyRefType type, PyObject *py_obj) {412if (!py_obj)413return;414if (T::Check(py_obj))415PythonObject::operator=(PythonObject(type, py_obj));416else if (type == PyRefType::Owned)417Py_DECREF(py_obj);418}419420TypedPythonObject() = default;421};422423class PythonBytes : public TypedPythonObject<PythonBytes> {424public:425using TypedPythonObject::TypedPythonObject;426explicit PythonBytes(llvm::ArrayRef<uint8_t> bytes);427PythonBytes(const uint8_t *bytes, size_t length);428429static bool Check(PyObject *py_obj);430431llvm::ArrayRef<uint8_t> GetBytes() const;432433size_t GetSize() const;434435void SetBytes(llvm::ArrayRef<uint8_t> stringbytes);436437StructuredData::StringSP CreateStructuredString() const;438};439440class PythonByteArray : public TypedPythonObject<PythonByteArray> {441public:442using TypedPythonObject::TypedPythonObject;443explicit PythonByteArray(llvm::ArrayRef<uint8_t> bytes);444PythonByteArray(const uint8_t *bytes, size_t length);445PythonByteArray(const PythonBytes &object);446447static bool Check(PyObject *py_obj);448449llvm::ArrayRef<uint8_t> GetBytes() const;450451size_t GetSize() const;452453void SetBytes(llvm::ArrayRef<uint8_t> stringbytes);454455StructuredData::StringSP CreateStructuredString() const;456};457458class PythonString : public TypedPythonObject<PythonString> {459public:460using TypedPythonObject::TypedPythonObject;461static llvm::Expected<PythonString> FromUTF8(llvm::StringRef string);462463PythonString() : TypedPythonObject() {} // MSVC requires this for some reason464465explicit PythonString(llvm::StringRef string); // safe, null on error466467static bool Check(PyObject *py_obj);468469llvm::StringRef GetString() const; // safe, empty string on error470471llvm::Expected<llvm::StringRef> AsUTF8() const;472473size_t GetSize() const;474475void SetString(llvm::StringRef string); // safe, null on error476477StructuredData::StringSP CreateStructuredString() const;478};479480class PythonInteger : public TypedPythonObject<PythonInteger> {481public:482using TypedPythonObject::TypedPythonObject;483484PythonInteger() : TypedPythonObject() {} // MSVC requires this for some reason485486explicit PythonInteger(int64_t value);487488static bool Check(PyObject *py_obj);489490void SetInteger(int64_t value);491492StructuredData::IntegerSP CreateStructuredInteger() const;493494StructuredData::UnsignedIntegerSP CreateStructuredUnsignedInteger() const;495496StructuredData::SignedIntegerSP CreateStructuredSignedInteger() const;497};498499class PythonBoolean : public TypedPythonObject<PythonBoolean> {500public:501using TypedPythonObject::TypedPythonObject;502503explicit PythonBoolean(bool value);504505static bool Check(PyObject *py_obj);506507bool GetValue() const;508509void SetValue(bool value);510511StructuredData::BooleanSP CreateStructuredBoolean() const;512};513514class PythonList : public TypedPythonObject<PythonList> {515public:516using TypedPythonObject::TypedPythonObject;517518PythonList() : TypedPythonObject() {} // MSVC requires this for some reason519520explicit PythonList(PyInitialValue value);521explicit PythonList(int list_size);522523static bool Check(PyObject *py_obj);524525uint32_t GetSize() const;526527PythonObject GetItemAtIndex(uint32_t index) const;528529void SetItemAtIndex(uint32_t index, const PythonObject &object);530531void AppendItem(const PythonObject &object);532533StructuredData::ArraySP CreateStructuredArray() const;534};535536class PythonTuple : public TypedPythonObject<PythonTuple> {537public:538using TypedPythonObject::TypedPythonObject;539540explicit PythonTuple(PyInitialValue value);541explicit PythonTuple(int tuple_size);542PythonTuple(std::initializer_list<PythonObject> objects);543PythonTuple(std::initializer_list<PyObject *> objects);544545static bool Check(PyObject *py_obj);546547uint32_t GetSize() const;548549PythonObject GetItemAtIndex(uint32_t index) const;550551void SetItemAtIndex(uint32_t index, const PythonObject &object);552553StructuredData::ArraySP CreateStructuredArray() const;554};555556class PythonDictionary : public TypedPythonObject<PythonDictionary> {557public:558using TypedPythonObject::TypedPythonObject;559560PythonDictionary() : TypedPythonObject() {} // MSVC requires this for some reason561562explicit PythonDictionary(PyInitialValue value);563564static bool Check(PyObject *py_obj);565566bool HasKey(const llvm::Twine &key) const;567568uint32_t GetSize() const;569570PythonList GetKeys() const;571572PythonObject GetItemForKey(const PythonObject &key) const; // DEPRECATED573void SetItemForKey(const PythonObject &key,574const PythonObject &value); // DEPRECATED575576llvm::Expected<PythonObject> GetItem(const PythonObject &key) const;577llvm::Expected<PythonObject> GetItem(const llvm::Twine &key) const;578llvm::Error SetItem(const PythonObject &key, const PythonObject &value) const;579llvm::Error SetItem(const llvm::Twine &key, const PythonObject &value) const;580581StructuredData::DictionarySP CreateStructuredDictionary() const;582};583584class PythonModule : public TypedPythonObject<PythonModule> {585public:586using TypedPythonObject::TypedPythonObject;587588static bool Check(PyObject *py_obj);589590static PythonModule BuiltinsModule();591592static PythonModule MainModule();593594static PythonModule AddModule(llvm::StringRef module);595596// safe, returns invalid on error;597static PythonModule ImportModule(llvm::StringRef name) {598std::string s = std::string(name);599auto mod = Import(s.c_str());600if (!mod) {601llvm::consumeError(mod.takeError());602return PythonModule();603}604return std::move(mod.get());605}606607static llvm::Expected<PythonModule> Import(const llvm::Twine &name);608609llvm::Expected<PythonObject> Get(const llvm::Twine &name);610611PythonDictionary GetDictionary() const;612};613614class PythonCallable : public TypedPythonObject<PythonCallable> {615public:616using TypedPythonObject::TypedPythonObject;617618struct ArgInfo {619/* the largest number of positional arguments this callable620* can accept, or UNBOUNDED, ie UINT_MAX if it's a varargs621* function and can accept an arbitrary number */622unsigned max_positional_args;623static constexpr unsigned UNBOUNDED = UINT_MAX; // FIXME c++17 inline624};625626static bool Check(PyObject *py_obj);627628llvm::Expected<ArgInfo> GetArgInfo() const;629630PythonObject operator()();631632PythonObject operator()(std::initializer_list<PyObject *> args);633634PythonObject operator()(std::initializer_list<PythonObject> args);635636template <typename Arg, typename... Args>637PythonObject operator()(const Arg &arg, Args... args) {638return operator()({arg, args...});639}640};641642class PythonFile : public TypedPythonObject<PythonFile> {643public:644using TypedPythonObject::TypedPythonObject;645646PythonFile() : TypedPythonObject() {} // MSVC requires this for some reason647648static bool Check(PyObject *py_obj);649650static llvm::Expected<PythonFile> FromFile(File &file,651const char *mode = nullptr);652653llvm::Expected<lldb::FileSP> ConvertToFile(bool borrowed = false);654llvm::Expected<lldb::FileSP>655ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed = false);656};657658class PythonException : public llvm::ErrorInfo<PythonException> {659private:660PyObject *m_exception_type, *m_exception, *m_traceback;661PyObject *m_repr_bytes;662663public:664static char ID;665const char *toCString() const;666PythonException(const char *caller = nullptr);667void Restore();668~PythonException() override;669void log(llvm::raw_ostream &OS) const override;670std::error_code convertToErrorCode() const override;671bool Matches(PyObject *exc) const;672std::string ReadBacktrace() const;673};674675// This extracts the underlying T out of an Expected<T> and returns it.676// If the Expected is an Error instead of a T, that error will be converted677// into a python exception, and this will return a default-constructed T.678//679// This is appropriate for use right at the boundary of python calling into680// C++, such as in a SWIG typemap. In such a context you should simply681// check if the returned T is valid, and if it is, return a NULL back682// to python. This will result in the Error being raised as an exception683// from python code's point of view.684//685// For example:686// ```687// Expected<Foo *> efoop = some_cpp_function();688// Foo *foop = unwrapOrSetPythonException(efoop);689// if (!foop)690// return NULL;691// do_something(*foop);692//693// If the Error returned was itself created because a python exception was694// raised when C++ code called into python, then the original exception695// will be restored. Otherwise a simple string exception will be raised.696template <typename T> T unwrapOrSetPythonException(llvm::Expected<T> expected) {697if (expected)698return expected.get();699llvm::handleAllErrors(700expected.takeError(), [](PythonException &E) { E.Restore(); },701[](const llvm::ErrorInfoBase &E) {702PyErr_SetString(PyExc_Exception, E.message().c_str());703});704return T();705}706707// This is only here to help incrementally migrate old, exception-unsafe708// code.709template <typename T> T unwrapIgnoringErrors(llvm::Expected<T> expected) {710if (expected)711return std::move(expected.get());712llvm::consumeError(expected.takeError());713return T();714}715716llvm::Expected<PythonObject> runStringOneLine(const llvm::Twine &string,717const PythonDictionary &globals,718const PythonDictionary &locals);719720llvm::Expected<PythonObject> runStringMultiLine(const llvm::Twine &string,721const PythonDictionary &globals,722const PythonDictionary &locals);723724// Sometimes the best way to interact with a python interpreter is725// to run some python code. You construct a PythonScript with726// script string. The script assigns some function to `_function_`727// and you get a C++ callable object that calls the python function.728//729// Example:730//731// const char script[] = R"(732// def main(x, y):733// ....734// )";735//736// Expected<PythonObject> cpp_foo_wrapper(PythonObject x, PythonObject y) {737// // no need to synchronize access to this global, we already have the GIL738// static PythonScript foo(script)739// return foo(x, y);740// }741class PythonScript {742const char *script;743PythonCallable function;744745llvm::Error Init();746747public:748PythonScript(const char *script) : script(script), function() {}749750template <typename... Args>751llvm::Expected<PythonObject> operator()(Args &&... args) {752if (llvm::Error error = Init())753return std::move(error);754return function.Call(std::forward<Args>(args)...);755}756};757758class StructuredPythonObject : public StructuredData::Generic {759public:760StructuredPythonObject() : StructuredData::Generic() {}761762// Take ownership of the object we received.763StructuredPythonObject(PythonObject obj)764: StructuredData::Generic(obj.release()) {}765766~StructuredPythonObject() override {767// Hand ownership back to a (temporary) PythonObject instance and let it768// take care of releasing it.769PythonObject(PyRefType::Owned, static_cast<PyObject *>(GetValue()));770}771772bool IsValid() const override { return GetValue() && GetValue() != Py_None; }773774void Serialize(llvm::json::OStream &s) const override;775776private:777StructuredPythonObject(const StructuredPythonObject &) = delete;778const StructuredPythonObject &779operator=(const StructuredPythonObject &) = delete;780};781782} // namespace python783} // namespace lldb_private784785#endif786787#endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H788789790