#include "Python.h"
#include "structmember.h"
#include <stdlib.h>
#include <string.h>
#include <lzma.h>
#include "pycore_blocks_output_buffer.h"
#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
#error "The maximum block size accepted by liblzma is SIZE_MAX."
#endif
static inline Py_ssize_t
OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
uint8_t **next_out, size_t *avail_out)
{
Py_ssize_t allocated;
allocated = _BlocksOutputBuffer_InitAndGrow(
buffer, max_length, (void**) next_out);
*avail_out = (size_t) allocated;
return allocated;
}
static inline Py_ssize_t
OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
uint8_t **next_out, size_t *avail_out)
{
Py_ssize_t allocated;
allocated = _BlocksOutputBuffer_Grow(
buffer, (void**) next_out, (Py_ssize_t) *avail_out);
*avail_out = (size_t) allocated;
return allocated;
}
static inline Py_ssize_t
OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out)
{
return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
}
static inline PyObject *
OutputBuffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out)
{
return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
}
static inline void
OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
{
_BlocksOutputBuffer_OnError(buffer);
}
#define ACQUIRE_LOCK(obj) do { \
if (!PyThread_acquire_lock((obj)->lock, 0)) { \
Py_BEGIN_ALLOW_THREADS \
PyThread_acquire_lock((obj)->lock, 1); \
Py_END_ALLOW_THREADS \
} } while (0)
#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
typedef struct {
PyTypeObject *lzma_compressor_type;
PyTypeObject *lzma_decompressor_type;
PyObject *error;
PyObject *empty_tuple;
} _lzma_state;
static inline _lzma_state*
get_lzma_state(PyObject *module)
{
void *state = PyModule_GetState(module);
assert(state != NULL);
return (_lzma_state *)state;
}
enum {
FORMAT_AUTO,
FORMAT_XZ,
FORMAT_ALONE,
FORMAT_RAW,
};
#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
typedef struct {
PyObject_HEAD
lzma_allocator alloc;
lzma_stream lzs;
int flushed;
PyThread_type_lock lock;
} Compressor;
typedef struct {
PyObject_HEAD
lzma_allocator alloc;
lzma_stream lzs;
int check;
char eof;
PyObject *unused_data;
char needs_input;
uint8_t *input_buffer;
size_t input_buffer_size;
PyThread_type_lock lock;
} Decompressor;
static int
catch_lzma_error(_lzma_state *state, lzma_ret lzret)
{
switch (lzret) {
case LZMA_OK:
case LZMA_GET_CHECK:
case LZMA_NO_CHECK:
case LZMA_STREAM_END:
return 0;
case LZMA_UNSUPPORTED_CHECK:
PyErr_SetString(state->error, "Unsupported integrity check");
return 1;
case LZMA_MEM_ERROR:
PyErr_NoMemory();
return 1;
case LZMA_MEMLIMIT_ERROR:
PyErr_SetString(state->error, "Memory usage limit exceeded");
return 1;
case LZMA_FORMAT_ERROR:
PyErr_SetString(state->error, "Input format not supported by decoder");
return 1;
case LZMA_OPTIONS_ERROR:
PyErr_SetString(state->error, "Invalid or unsupported options");
return 1;
case LZMA_DATA_ERROR:
PyErr_SetString(state->error, "Corrupt input data");
return 1;
case LZMA_BUF_ERROR:
PyErr_SetString(state->error, "Insufficient buffer space");
return 1;
case LZMA_PROG_ERROR:
PyErr_SetString(state->error, "Internal error");
return 1;
default:
PyErr_Format(state->error, "Unrecognized error from liblzma: %d", lzret);
return 1;
}
}
static void*
PyLzma_Malloc(void *opaque, size_t items, size_t size)
{
if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size) {
return NULL;
}
return PyMem_RawMalloc(items * size);
}
static void
PyLzma_Free(void *opaque, void *ptr)
{
PyMem_RawFree(ptr);
}
#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
static int \
FUNCNAME(PyObject *obj, void *ptr) \
{ \
unsigned long long val; \
\
val = PyLong_AsUnsignedLongLong(obj); \
if (PyErr_Occurred()) \
return 0; \
if ((unsigned long long)(TYPE)val != val) { \
PyErr_SetString(PyExc_OverflowError, \
"Value too large for " #TYPE " type"); \
return 0; \
} \
*(TYPE *)ptr = (TYPE)val; \
return 1; \
}
INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
#undef INT_TYPE_CONVERTER_FUNC
static void *
parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
{
static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
"pb", "mode", "nice_len", "mf", "depth", NULL};
PyObject *id;
PyObject *preset_obj;
uint32_t preset = LZMA_PRESET_DEFAULT;
lzma_options_lzma *options;
preset_obj = PyMapping_GetItemString(spec, "preset");
if (preset_obj == NULL) {
if (PyErr_ExceptionMatches(PyExc_KeyError)) {
PyErr_Clear();
}
else {
return NULL;
}
} else {
int ok = uint32_converter(preset_obj, &preset);
Py_DECREF(preset_obj);
if (!ok) {
return NULL;
}
}
options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
if (options == NULL) {
return PyErr_NoMemory();
}
if (lzma_lzma_preset(options, preset)) {
PyMem_Free(options);
PyErr_Format(state->error, "Invalid compression preset: %u", preset);
return NULL;
}
if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
"|OOO&O&O&O&O&O&O&O&", optnames,
&id, &preset_obj,
uint32_converter, &options->dict_size,
uint32_converter, &options->lc,
uint32_converter, &options->lp,
uint32_converter, &options->pb,
lzma_mode_converter, &options->mode,
uint32_converter, &options->nice_len,
lzma_mf_converter, &options->mf,
uint32_converter, &options->depth)) {
PyErr_SetString(PyExc_ValueError,
"Invalid filter specifier for LZMA filter");
PyMem_Free(options);
return NULL;
}
return options;
}
static void *
parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
{
static char *optnames[] = {"id", "dist", NULL};
PyObject *id;
uint32_t dist = 1;
lzma_options_delta *options;
if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
&id, uint32_converter, &dist)) {
PyErr_SetString(PyExc_ValueError,
"Invalid filter specifier for delta filter");
return NULL;
}
options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
if (options == NULL) {
return PyErr_NoMemory();
}
options->type = LZMA_DELTA_TYPE_BYTE;
options->dist = dist;
return options;
}
static void *
parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
{
static char *optnames[] = {"id", "start_offset", NULL};
PyObject *id;
uint32_t start_offset = 0;
lzma_options_bcj *options;
if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
&id, uint32_converter, &start_offset)) {
PyErr_SetString(PyExc_ValueError,
"Invalid filter specifier for BCJ filter");
return NULL;
}
options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
if (options == NULL) {
return PyErr_NoMemory();
}
options->start_offset = start_offset;
return options;
}
static int
lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr)
{
lzma_filter *f = (lzma_filter *)ptr;
PyObject *id_obj;
if (!PyMapping_Check(spec)) {
PyErr_SetString(PyExc_TypeError,
"Filter specifier must be a dict or dict-like object");
return 0;
}
id_obj = PyMapping_GetItemString(spec, "id");
if (id_obj == NULL) {
if (PyErr_ExceptionMatches(PyExc_KeyError))
PyErr_SetString(PyExc_ValueError,
"Filter specifier must have an \"id\" entry");
return 0;
}
f->id = PyLong_AsUnsignedLongLong(id_obj);
Py_DECREF(id_obj);
if (PyErr_Occurred()) {
return 0;
}
switch (f->id) {
case LZMA_FILTER_LZMA1:
case LZMA_FILTER_LZMA2:
f->options = parse_filter_spec_lzma(state, spec);
return f->options != NULL;
case LZMA_FILTER_DELTA:
f->options = parse_filter_spec_delta(state, spec);
return f->options != NULL;
case LZMA_FILTER_X86:
case LZMA_FILTER_POWERPC:
case LZMA_FILTER_IA64:
case LZMA_FILTER_ARM:
case LZMA_FILTER_ARMTHUMB:
case LZMA_FILTER_SPARC:
f->options = parse_filter_spec_bcj(state, spec);
return f->options != NULL;
default:
PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
return 0;
}
}
static void
free_filter_chain(lzma_filter filters[])
{
for (int i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) {
PyMem_Free(filters[i].options);
}
}
static int
parse_filter_chain_spec(_lzma_state *state, lzma_filter filters[], PyObject *filterspecs)
{
Py_ssize_t i, num_filters;
num_filters = PySequence_Length(filterspecs);
if (num_filters == -1) {
return -1;
}
if (num_filters > LZMA_FILTERS_MAX) {
PyErr_Format(PyExc_ValueError,
"Too many filters - liblzma supports a maximum of %d",
LZMA_FILTERS_MAX);
return -1;
}
for (i = 0; i < num_filters; i++) {
int ok = 1;
PyObject *spec = PySequence_GetItem(filterspecs, i);
if (spec == NULL || !lzma_filter_converter(state, spec, &filters[i])) {
ok = 0;
}
Py_XDECREF(spec);
if (!ok) {
filters[i].id = LZMA_VLI_UNKNOWN;
free_filter_chain(filters);
return -1;
}
}
filters[num_filters].id = LZMA_VLI_UNKNOWN;
return 0;
}
static int
spec_add_field(PyObject *spec, const char *key, unsigned long long value)
{
PyObject *value_object = PyLong_FromUnsignedLongLong(value);
if (value_object == NULL) {
return -1;
}
PyObject *key_object = PyUnicode_InternFromString(key);
if (key_object == NULL) {
Py_DECREF(value_object);
return -1;
}
int status = PyDict_SetItem(spec, key_object, value_object);
Py_DECREF(key_object);
Py_DECREF(value_object);
return status;
}
static PyObject *
build_filter_spec(const lzma_filter *f)
{
PyObject *spec;
spec = PyDict_New();
if (spec == NULL) {
return NULL;
}
#define ADD_FIELD(SOURCE, FIELD) \
do { \
if (spec_add_field(spec, #FIELD, SOURCE->FIELD) == -1) \
goto error;\
} while (0)
ADD_FIELD(f, id);
switch (f->id) {
case LZMA_FILTER_LZMA1: {
lzma_options_lzma *options = f->options;
ADD_FIELD(options, lc);
ADD_FIELD(options, lp);
ADD_FIELD(options, pb);
ADD_FIELD(options, dict_size);
break;
}
case LZMA_FILTER_LZMA2: {
lzma_options_lzma *options = f->options;
ADD_FIELD(options, dict_size);
break;
}
case LZMA_FILTER_DELTA: {
lzma_options_delta *options = f->options;
ADD_FIELD(options, dist);
break;
}
case LZMA_FILTER_X86:
case LZMA_FILTER_POWERPC:
case LZMA_FILTER_IA64:
case LZMA_FILTER_ARM:
case LZMA_FILTER_ARMTHUMB:
case LZMA_FILTER_SPARC: {
lzma_options_bcj *options = f->options;
ADD_FIELD(options, start_offset);
break;
}
default:
PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
goto error;
}
#undef ADD_FIELD
return spec;
error:
Py_DECREF(spec);
return NULL;
}
#include "clinic/_lzmamodule.c.h"
static PyObject *
compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
{
PyObject *result;
_BlocksOutputBuffer buffer = {.list = NULL};
_lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
assert(state != NULL);
if (OutputBuffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
goto error;
}
c->lzs.next_in = data;
c->lzs.avail_in = len;
for (;;) {
lzma_ret lzret;
Py_BEGIN_ALLOW_THREADS
lzret = lzma_code(&c->lzs, action);
Py_END_ALLOW_THREADS
if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
lzret = LZMA_OK;
}
if (catch_lzma_error(state, lzret)) {
goto error;
}
if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
(action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
break;
} else if (c->lzs.avail_out == 0) {
if (OutputBuffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
goto error;
}
}
}
result = OutputBuffer_Finish(&buffer, c->lzs.avail_out);
if (result != NULL) {
return result;
}
error:
OutputBuffer_OnError(&buffer);
return NULL;
}
static PyObject *
_lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
{
PyObject *result = NULL;
ACQUIRE_LOCK(self);
if (self->flushed) {
PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
}
else {
result = compress(self, data->buf, data->len, LZMA_RUN);
}
RELEASE_LOCK(self);
return result;
}
static PyObject *
_lzma_LZMACompressor_flush_impl(Compressor *self)
{
PyObject *result = NULL;
ACQUIRE_LOCK(self);
if (self->flushed) {
PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
} else {
self->flushed = 1;
result = compress(self, NULL, 0, LZMA_FINISH);
}
RELEASE_LOCK(self);
return result;
}
static int
Compressor_init_xz(_lzma_state *state, lzma_stream *lzs,
int check, uint32_t preset, PyObject *filterspecs)
{
lzma_ret lzret;
if (filterspecs == Py_None) {
lzret = lzma_easy_encoder(lzs, preset, check);
} else {
lzma_filter filters[LZMA_FILTERS_MAX + 1];
if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
return -1;
lzret = lzma_stream_encoder(lzs, filters, check);
free_filter_chain(filters);
}
if (catch_lzma_error(state, lzret)) {
return -1;
}
else {
return 0;
}
}
static int
Compressor_init_alone(_lzma_state *state, lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
{
lzma_ret lzret;
if (filterspecs == Py_None) {
lzma_options_lzma options;
if (lzma_lzma_preset(&options, preset)) {
PyErr_Format(state->error, "Invalid compression preset: %u", preset);
return -1;
}
lzret = lzma_alone_encoder(lzs, &options);
} else {
lzma_filter filters[LZMA_FILTERS_MAX + 1];
if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
return -1;
if (filters[0].id == LZMA_FILTER_LZMA1 &&
filters[1].id == LZMA_VLI_UNKNOWN) {
lzret = lzma_alone_encoder(lzs, filters[0].options);
} else {
PyErr_SetString(PyExc_ValueError,
"Invalid filter chain for FORMAT_ALONE - "
"must be a single LZMA1 filter");
lzret = LZMA_PROG_ERROR;
}
free_filter_chain(filters);
}
if (PyErr_Occurred() || catch_lzma_error(state, lzret)) {
return -1;
}
else {
return 0;
}
}
static int
Compressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
{
lzma_filter filters[LZMA_FILTERS_MAX + 1];
lzma_ret lzret;
if (filterspecs == Py_None) {
PyErr_SetString(PyExc_ValueError,
"Must specify filters for FORMAT_RAW");
return -1;
}
if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
return -1;
}
lzret = lzma_raw_encoder(lzs, filters);
free_filter_chain(filters);
if (catch_lzma_error(state, lzret)) {
return -1;
}
else {
return 0;
}
}
static PyObject *
Compressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
{
static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
int format = FORMAT_XZ;
int check = -1;
uint32_t preset = LZMA_PRESET_DEFAULT;
PyObject *preset_obj = Py_None;
PyObject *filterspecs = Py_None;
Compressor *self;
_lzma_state *state = PyType_GetModuleState(type);
assert(state != NULL);
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
"|iiOO:LZMACompressor", arg_names,
&format, &check, &preset_obj,
&filterspecs)) {
return NULL;
}
if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
PyErr_SetString(PyExc_ValueError,
"Integrity checks are only supported by FORMAT_XZ");
return NULL;
}
if (preset_obj != Py_None && filterspecs != Py_None) {
PyErr_SetString(PyExc_ValueError,
"Cannot specify both preset and filter chain");
return NULL;
}
if (preset_obj != Py_None && !uint32_converter(preset_obj, &preset)) {
return NULL;
}
assert(type != NULL && type->tp_alloc != NULL);
self = (Compressor *)type->tp_alloc(type, 0);
if (self == NULL) {
return NULL;
}
self->alloc.opaque = NULL;
self->alloc.alloc = PyLzma_Malloc;
self->alloc.free = PyLzma_Free;
self->lzs.allocator = &self->alloc;
self->lock = PyThread_allocate_lock();
if (self->lock == NULL) {
Py_DECREF(self);
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
return NULL;
}
self->flushed = 0;
switch (format) {
case FORMAT_XZ:
if (check == -1) {
check = LZMA_CHECK_CRC64;
}
if (Compressor_init_xz(state, &self->lzs, check, preset, filterspecs) != 0) {
goto error;
}
break;
case FORMAT_ALONE:
if (Compressor_init_alone(state, &self->lzs, preset, filterspecs) != 0) {
goto error;
}
break;
case FORMAT_RAW:
if (Compressor_init_raw(state, &self->lzs, filterspecs) != 0) {
goto error;
}
break;
default:
PyErr_Format(PyExc_ValueError,
"Invalid container format: %d", format);
goto error;
}
return (PyObject *)self;
error:
Py_DECREF(self);
return NULL;
}
static void
Compressor_dealloc(Compressor *self)
{
lzma_end(&self->lzs);
if (self->lock != NULL) {
PyThread_free_lock(self->lock);
}
PyTypeObject *tp = Py_TYPE(self);
tp->tp_free((PyObject *)self);
Py_DECREF(tp);
}
static PyMethodDef Compressor_methods[] = {
_LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
_LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
{NULL}
};
static int
Compressor_traverse(Compressor *self, visitproc visit, void *arg)
{
Py_VISIT(Py_TYPE(self));
return 0;
}
PyDoc_STRVAR(Compressor_doc,
"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
"\n"
"Create a compressor object for compressing data incrementally.\n"
"\n"
"format specifies the container format to use for the output. This can\n"
"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
"\n"
"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
"\n"
"The settings used by the compressor can be specified either as a\n"
"preset compression level (with the 'preset' argument), or in detail\n"
"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
"the raw compressor does not support preset compression levels.\n"
"\n"
"preset (if provided) should be an integer in the range 0-9, optionally\n"
"OR-ed with the constant PRESET_EXTREME.\n"
"\n"
"filters (if provided) should be a sequence of dicts. Each dict should\n"
"have an entry for \"id\" indicating the ID of the filter, plus\n"
"additional entries for options to the filter.\n"
"\n"
"For one-shot compression, use the compress() function instead.\n");
static PyType_Slot lzma_compressor_type_slots[] = {
{Py_tp_dealloc, Compressor_dealloc},
{Py_tp_methods, Compressor_methods},
{Py_tp_new, Compressor_new},
{Py_tp_doc, (char *)Compressor_doc},
{Py_tp_traverse, Compressor_traverse},
{0, 0}
};
static PyType_Spec lzma_compressor_type_spec = {
.name = "_lzma.LZMACompressor",
.basicsize = sizeof(Compressor),
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
.slots = lzma_compressor_type_slots,
};
static PyObject*
decompress_buf(Decompressor *d, Py_ssize_t max_length)
{
PyObject *result;
lzma_stream *lzs = &d->lzs;
_BlocksOutputBuffer buffer = {.list = NULL};
_lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
assert(state != NULL);
if (OutputBuffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) {
goto error;
}
for (;;) {
lzma_ret lzret;
Py_BEGIN_ALLOW_THREADS
lzret = lzma_code(lzs, LZMA_RUN);
Py_END_ALLOW_THREADS
if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
lzret = LZMA_OK;
}
if (catch_lzma_error(state, lzret)) {
goto error;
}
if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
d->check = lzma_get_check(&d->lzs);
}
if (lzret == LZMA_STREAM_END) {
d->eof = 1;
break;
} else if (lzs->avail_out == 0) {
if (OutputBuffer_GetDataSize(&buffer, lzs->avail_out) == max_length) {
break;
}
if (OutputBuffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) {
goto error;
}
} else if (lzs->avail_in == 0) {
break;
}
}
result = OutputBuffer_Finish(&buffer, lzs->avail_out);
if (result != NULL) {
return result;
}
error:
OutputBuffer_OnError(&buffer);
return NULL;
}
static PyObject *
decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
{
char input_buffer_in_use;
PyObject *result;
lzma_stream *lzs = &d->lzs;
if (lzs->next_in != NULL) {
size_t avail_now, avail_total;
avail_now = (d->input_buffer + d->input_buffer_size)
- (lzs->next_in + lzs->avail_in);
avail_total = d->input_buffer_size - lzs->avail_in;
if (avail_total < len) {
size_t offset = lzs->next_in - d->input_buffer;
uint8_t *tmp;
size_t new_size = d->input_buffer_size + len - avail_now;
tmp = PyMem_Realloc(d->input_buffer, new_size);
if (tmp == NULL) {
PyErr_SetNone(PyExc_MemoryError);
return NULL;
}
d->input_buffer = tmp;
d->input_buffer_size = new_size;
lzs->next_in = d->input_buffer + offset;
}
else if (avail_now < len) {
memmove(d->input_buffer, lzs->next_in,
lzs->avail_in);
lzs->next_in = d->input_buffer;
}
memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
lzs->avail_in += len;
input_buffer_in_use = 1;
}
else {
lzs->next_in = data;
lzs->avail_in = len;
input_buffer_in_use = 0;
}
result = decompress_buf(d, max_length);
if (result == NULL) {
lzs->next_in = NULL;
return NULL;
}
if (d->eof) {
d->needs_input = 0;
if (lzs->avail_in > 0) {
Py_XSETREF(d->unused_data,
PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
if (d->unused_data == NULL) {
goto error;
}
}
}
else if (lzs->avail_in == 0) {
lzs->next_in = NULL;
if (lzs->avail_out == 0) {
d->needs_input = 0;
assert(max_length >= 0);
} else {
d->needs_input = 1;
}
}
else {
d->needs_input = 0;
if (!input_buffer_in_use) {
if (d->input_buffer != NULL &&
d->input_buffer_size < lzs->avail_in) {
PyMem_Free(d->input_buffer);
d->input_buffer = NULL;
}
if (d->input_buffer == NULL) {
d->input_buffer = PyMem_Malloc(lzs->avail_in);
if (d->input_buffer == NULL) {
PyErr_SetNone(PyExc_MemoryError);
goto error;
}
d->input_buffer_size = lzs->avail_in;
}
memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
lzs->next_in = d->input_buffer;
}
}
return result;
error:
Py_XDECREF(result);
return NULL;
}
static PyObject *
_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
Py_ssize_t max_length)
{
PyObject *result = NULL;
ACQUIRE_LOCK(self);
if (self->eof)
PyErr_SetString(PyExc_EOFError, "Already at end of stream");
else
result = decompress(self, data->buf, data->len, max_length);
RELEASE_LOCK(self);
return result;
}
static int
Decompressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
{
lzma_filter filters[LZMA_FILTERS_MAX + 1];
lzma_ret lzret;
if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
return -1;
}
lzret = lzma_raw_decoder(lzs, filters);
free_filter_chain(filters);
if (catch_lzma_error(state, lzret)) {
return -1;
}
else {
return 0;
}
}
static PyObject *
_lzma_LZMADecompressor_impl(PyTypeObject *type, int format,
PyObject *memlimit, PyObject *filters)
{
Decompressor *self;
const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
uint64_t memlimit_ = UINT64_MAX;
lzma_ret lzret;
_lzma_state *state = PyType_GetModuleState(type);
assert(state != NULL);
if (memlimit != Py_None) {
if (format == FORMAT_RAW) {
PyErr_SetString(PyExc_ValueError,
"Cannot specify memory limit with FORMAT_RAW");
return NULL;
}
memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
if (PyErr_Occurred()) {
return NULL;
}
}
if (format == FORMAT_RAW && filters == Py_None) {
PyErr_SetString(PyExc_ValueError,
"Must specify filters for FORMAT_RAW");
return NULL;
} else if (format != FORMAT_RAW && filters != Py_None) {
PyErr_SetString(PyExc_ValueError,
"Cannot specify filters except with FORMAT_RAW");
return NULL;
}
assert(type != NULL && type->tp_alloc != NULL);
self = (Decompressor *)type->tp_alloc(type, 0);
if (self == NULL) {
return NULL;
}
self->alloc.opaque = NULL;
self->alloc.alloc = PyLzma_Malloc;
self->alloc.free = PyLzma_Free;
self->lzs.allocator = &self->alloc;
self->lzs.next_in = NULL;
self->lock = PyThread_allocate_lock();
if (self->lock == NULL) {
Py_DECREF(self);
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
return NULL;
}
self->check = LZMA_CHECK_UNKNOWN;
self->needs_input = 1;
self->input_buffer = NULL;
self->input_buffer_size = 0;
Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
if (self->unused_data == NULL) {
goto error;
}
switch (format) {
case FORMAT_AUTO:
lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
if (catch_lzma_error(state, lzret)) {
goto error;
}
break;
case FORMAT_XZ:
lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
if (catch_lzma_error(state, lzret)) {
goto error;
}
break;
case FORMAT_ALONE:
self->check = LZMA_CHECK_NONE;
lzret = lzma_alone_decoder(&self->lzs, memlimit_);
if (catch_lzma_error(state, lzret)) {
goto error;
}
break;
case FORMAT_RAW:
self->check = LZMA_CHECK_NONE;
if (Decompressor_init_raw(state, &self->lzs, filters) == -1) {
goto error;
}
break;
default:
PyErr_Format(PyExc_ValueError,
"Invalid container format: %d", format);
goto error;
}
return (PyObject *)self;
error:
Py_DECREF(self);
return NULL;
}
static void
Decompressor_dealloc(Decompressor *self)
{
if(self->input_buffer != NULL)
PyMem_Free(self->input_buffer);
lzma_end(&self->lzs);
Py_CLEAR(self->unused_data);
if (self->lock != NULL) {
PyThread_free_lock(self->lock);
}
PyTypeObject *tp = Py_TYPE(self);
tp->tp_free((PyObject *)self);
Py_DECREF(tp);
}
static int
Decompressor_traverse(Decompressor *self, visitproc visit, void *arg)
{
Py_VISIT(Py_TYPE(self));
return 0;
}
static PyMethodDef Decompressor_methods[] = {
_LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
{NULL}
};
PyDoc_STRVAR(Decompressor_check_doc,
"ID of the integrity check used by the input stream.");
PyDoc_STRVAR(Decompressor_eof_doc,
"True if the end-of-stream marker has been reached.");
PyDoc_STRVAR(Decompressor_needs_input_doc,
"True if more input is needed before more decompressed data can be produced.");
PyDoc_STRVAR(Decompressor_unused_data_doc,
"Data found after the end of the compressed stream.");
static PyMemberDef Decompressor_members[] = {
{"check", T_INT, offsetof(Decompressor, check), READONLY,
Decompressor_check_doc},
{"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
Decompressor_eof_doc},
{"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
Decompressor_needs_input_doc},
{"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
Decompressor_unused_data_doc},
{NULL}
};
static PyType_Slot lzma_decompressor_type_slots[] = {
{Py_tp_dealloc, Decompressor_dealloc},
{Py_tp_methods, Decompressor_methods},
{Py_tp_new, _lzma_LZMADecompressor},
{Py_tp_doc, (char *)_lzma_LZMADecompressor__doc__},
{Py_tp_traverse, Decompressor_traverse},
{Py_tp_members, Decompressor_members},
{0, 0}
};
static PyType_Spec lzma_decompressor_type_spec = {
.name = "_lzma.LZMADecompressor",
.basicsize = sizeof(Decompressor),
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
.slots = lzma_decompressor_type_slots,
};
static PyObject *
_lzma_is_check_supported_impl(PyObject *module, int check_id)
{
return PyBool_FromLong(lzma_check_is_supported(check_id));
}
PyDoc_STRVAR(_lzma__encode_filter_properties__doc__,
"_encode_filter_properties($module, filter, /)\n"
"--\n"
"\n"
"Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).\n"
"\n"
"The result does not include the filter ID itself, only the options.");
#define _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF \
{"_encode_filter_properties", (PyCFunction)_lzma__encode_filter_properties, METH_O, _lzma__encode_filter_properties__doc__},
static PyObject *
_lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter);
static PyObject *
_lzma__encode_filter_properties(PyObject *module, PyObject *arg)
{
PyObject *return_value = NULL;
lzma_filter filter = {LZMA_VLI_UNKNOWN, NULL};
_lzma_state *state = get_lzma_state(module);
assert(state != NULL);
if (!lzma_filter_converter(state, arg, &filter)) {
goto exit;
}
return_value = _lzma__encode_filter_properties_impl(module, filter);
exit:
if (filter.id != LZMA_VLI_UNKNOWN) {
PyMem_Free(filter.options);
}
return return_value;
}
static PyObject *
_lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
{
lzma_ret lzret;
uint32_t encoded_size;
PyObject *result = NULL;
_lzma_state *state = get_lzma_state(module);
assert(state != NULL);
lzret = lzma_properties_size(&encoded_size, &filter);
if (catch_lzma_error(state, lzret))
goto error;
result = PyBytes_FromStringAndSize(NULL, encoded_size);
if (result == NULL)
goto error;
lzret = lzma_properties_encode(
&filter, (uint8_t *)PyBytes_AS_STRING(result));
if (catch_lzma_error(state, lzret)) {
goto error;
}
return result;
error:
Py_XDECREF(result);
return NULL;
}
static PyObject *
_lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
Py_buffer *encoded_props)
{
lzma_filter filter;
lzma_ret lzret;
PyObject *result = NULL;
filter.id = filter_id;
_lzma_state *state = get_lzma_state(module);
assert(state != NULL);
lzret = lzma_properties_decode(
&filter, NULL, encoded_props->buf, encoded_props->len);
if (catch_lzma_error(state, lzret)) {
return NULL;
}
result = build_filter_spec(&filter);
free(filter.options);
return result;
}
static int
module_add_int_constant(PyObject *m, const char *name, long long value)
{
PyObject *o = PyLong_FromLongLong(value);
if (o == NULL) {
return -1;
}
if (PyModule_AddObject(m, name, o) == 0) {
return 0;
}
Py_DECREF(o);
return -1;
}
static int
lzma_exec(PyObject *module)
{
#define ADD_INT_PREFIX_MACRO(module, macro) \
do { \
if (module_add_int_constant(module, #macro, LZMA_ ## macro) < 0) { \
return -1; \
} \
} while(0)
#define ADD_INT_MACRO(module, macro) \
do { \
if (PyModule_AddIntMacro(module, macro) < 0) { \
return -1; \
} \
} while (0)
_lzma_state *state = get_lzma_state(module);
state->empty_tuple = PyTuple_New(0);
if (state->empty_tuple == NULL) {
return -1;
}
ADD_INT_MACRO(module, FORMAT_AUTO);
ADD_INT_MACRO(module, FORMAT_XZ);
ADD_INT_MACRO(module, FORMAT_ALONE);
ADD_INT_MACRO(module, FORMAT_RAW);
ADD_INT_PREFIX_MACRO(module, CHECK_NONE);
ADD_INT_PREFIX_MACRO(module, CHECK_CRC32);
ADD_INT_PREFIX_MACRO(module, CHECK_CRC64);
ADD_INT_PREFIX_MACRO(module, CHECK_SHA256);
ADD_INT_PREFIX_MACRO(module, CHECK_ID_MAX);
ADD_INT_PREFIX_MACRO(module, CHECK_UNKNOWN);
ADD_INT_PREFIX_MACRO(module, FILTER_LZMA1);
ADD_INT_PREFIX_MACRO(module, FILTER_LZMA2);
ADD_INT_PREFIX_MACRO(module, FILTER_DELTA);
ADD_INT_PREFIX_MACRO(module, FILTER_X86);
ADD_INT_PREFIX_MACRO(module, FILTER_IA64);
ADD_INT_PREFIX_MACRO(module, FILTER_ARM);
ADD_INT_PREFIX_MACRO(module, FILTER_ARMTHUMB);
ADD_INT_PREFIX_MACRO(module, FILTER_SPARC);
ADD_INT_PREFIX_MACRO(module, FILTER_POWERPC);
ADD_INT_PREFIX_MACRO(module, MF_HC3);
ADD_INT_PREFIX_MACRO(module, MF_HC4);
ADD_INT_PREFIX_MACRO(module, MF_BT2);
ADD_INT_PREFIX_MACRO(module, MF_BT3);
ADD_INT_PREFIX_MACRO(module, MF_BT4);
ADD_INT_PREFIX_MACRO(module, MODE_FAST);
ADD_INT_PREFIX_MACRO(module, MODE_NORMAL);
ADD_INT_PREFIX_MACRO(module, PRESET_DEFAULT);
ADD_INT_PREFIX_MACRO(module, PRESET_EXTREME);
state->error = PyErr_NewExceptionWithDoc("_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
if (state->error == NULL) {
return -1;
}
if (PyModule_AddType(module, (PyTypeObject *)state->error) < 0) {
return -1;
}
state->lzma_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
&lzma_compressor_type_spec, NULL);
if (state->lzma_compressor_type == NULL) {
return -1;
}
if (PyModule_AddType(module, state->lzma_compressor_type) < 0) {
return -1;
}
state->lzma_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
&lzma_decompressor_type_spec, NULL);
if (state->lzma_decompressor_type == NULL) {
return -1;
}
if (PyModule_AddType(module, state->lzma_decompressor_type) < 0) {
return -1;
}
return 0;
}
static PyMethodDef lzma_methods[] = {
_LZMA_IS_CHECK_SUPPORTED_METHODDEF
_LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
_LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
{NULL}
};
static PyModuleDef_Slot lzma_slots[] = {
{Py_mod_exec, lzma_exec},
{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
{0, NULL}
};
static int
lzma_traverse(PyObject *module, visitproc visit, void *arg)
{
_lzma_state *state = get_lzma_state(module);
Py_VISIT(state->lzma_compressor_type);
Py_VISIT(state->lzma_decompressor_type);
Py_VISIT(state->error);
Py_VISIT(state->empty_tuple);
return 0;
}
static int
lzma_clear(PyObject *module)
{
_lzma_state *state = get_lzma_state(module);
Py_CLEAR(state->lzma_compressor_type);
Py_CLEAR(state->lzma_decompressor_type);
Py_CLEAR(state->error);
Py_CLEAR(state->empty_tuple);
return 0;
}
static void
lzma_free(void *module)
{
lzma_clear((PyObject *)module);
}
static PyModuleDef _lzmamodule = {
PyModuleDef_HEAD_INIT,
.m_name = "_lzma",
.m_size = sizeof(_lzma_state),
.m_methods = lzma_methods,
.m_slots = lzma_slots,
.m_traverse = lzma_traverse,
.m_clear = lzma_clear,
.m_free = lzma_free,
};
PyMODINIT_FUNC
PyInit__lzma(void)
{
return PyModuleDef_Init(&_lzmamodule);
}