#include "Python.h"
#include "structmember.h"
#include <bzlib.h>
#include <stdio.h>
#include "pycore_blocks_output_buffer.h"
#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > UINT32_MAX
#error "The maximum block size accepted by libbzip2 is UINT32_MAX."
#endif
typedef struct {
PyTypeObject *bz2_compressor_type;
PyTypeObject *bz2_decompressor_type;
} _bz2_state;
static inline _bz2_state *
get_module_state(PyObject *module)
{
void *state = PyModule_GetState(module);
assert(state != NULL);
return (_bz2_state *)state;
}
static struct PyModuleDef _bz2module;
static inline _bz2_state *
find_module_state_by_def(PyTypeObject *type)
{
PyObject *module = PyType_GetModuleByDef(type, &_bz2module);
assert(module != NULL);
return get_module_state(module);
}
static inline Py_ssize_t
OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
char **next_out, uint32_t *avail_out)
{
Py_ssize_t allocated;
allocated = _BlocksOutputBuffer_InitAndGrow(
buffer, max_length, (void**) next_out);
*avail_out = (uint32_t) allocated;
return allocated;
}
static inline Py_ssize_t
OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
char **next_out, uint32_t *avail_out)
{
Py_ssize_t allocated;
allocated = _BlocksOutputBuffer_Grow(
buffer, (void**) next_out, (Py_ssize_t) *avail_out);
*avail_out = (uint32_t) allocated;
return allocated;
}
static inline Py_ssize_t
OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, uint32_t avail_out)
{
return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
}
static inline PyObject *
OutputBuffer_Finish(_BlocksOutputBuffer *buffer, uint32_t avail_out)
{
return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
}
static inline void
OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
{
_BlocksOutputBuffer_OnError(buffer);
}
#ifndef BZ_CONFIG_ERROR
#define BZ2_bzCompress bzCompress
#define BZ2_bzCompressInit bzCompressInit
#define BZ2_bzCompressEnd bzCompressEnd
#define BZ2_bzDecompress bzDecompress
#define BZ2_bzDecompressInit bzDecompressInit
#define BZ2_bzDecompressEnd bzDecompressEnd
#endif
#define ACQUIRE_LOCK(obj) do { \
if (!PyThread_acquire_lock((obj)->lock, 0)) { \
Py_BEGIN_ALLOW_THREADS \
PyThread_acquire_lock((obj)->lock, 1); \
Py_END_ALLOW_THREADS \
} } while (0)
#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
typedef struct {
PyObject_HEAD
bz_stream bzs;
int flushed;
PyThread_type_lock lock;
} BZ2Compressor;
typedef struct {
PyObject_HEAD
bz_stream bzs;
char eof;
PyObject *unused_data;
char needs_input;
char *input_buffer;
size_t input_buffer_size;
size_t bzs_avail_in_real;
PyThread_type_lock lock;
} BZ2Decompressor;
static int
catch_bz2_error(int bzerror)
{
switch(bzerror) {
case BZ_OK:
case BZ_RUN_OK:
case BZ_FLUSH_OK:
case BZ_FINISH_OK:
case BZ_STREAM_END:
return 0;
#ifdef BZ_CONFIG_ERROR
case BZ_CONFIG_ERROR:
PyErr_SetString(PyExc_SystemError,
"libbzip2 was not compiled correctly");
return 1;
#endif
case BZ_PARAM_ERROR:
PyErr_SetString(PyExc_ValueError,
"Internal error - "
"invalid parameters passed to libbzip2");
return 1;
case BZ_MEM_ERROR:
PyErr_NoMemory();
return 1;
case BZ_DATA_ERROR:
case BZ_DATA_ERROR_MAGIC:
PyErr_SetString(PyExc_OSError, "Invalid data stream");
return 1;
case BZ_IO_ERROR:
PyErr_SetString(PyExc_OSError, "Unknown I/O error");
return 1;
case BZ_UNEXPECTED_EOF:
PyErr_SetString(PyExc_EOFError,
"Compressed file ended before the logical "
"end-of-stream was detected");
return 1;
case BZ_SEQUENCE_ERROR:
PyErr_SetString(PyExc_RuntimeError,
"Internal error - "
"Invalid sequence of commands sent to libbzip2");
return 1;
default:
PyErr_Format(PyExc_OSError,
"Unrecognized error from libbzip2: %d", bzerror);
return 1;
}
}
static PyObject *
compress(BZ2Compressor *c, char *data, size_t len, int action)
{
PyObject *result;
_BlocksOutputBuffer buffer = {.list = NULL};
if (OutputBuffer_InitAndGrow(&buffer, -1, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
goto error;
}
c->bzs.next_in = data;
c->bzs.avail_in = 0;
for (;;) {
int bzerror;
if (c->bzs.avail_in == 0 && len > 0) {
c->bzs.avail_in = (unsigned int)Py_MIN(len, UINT_MAX);
len -= c->bzs.avail_in;
}
if (action == BZ_RUN && c->bzs.avail_in == 0)
break;
if (c->bzs.avail_out == 0) {
if (OutputBuffer_Grow(&buffer, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
goto error;
}
}
Py_BEGIN_ALLOW_THREADS
bzerror = BZ2_bzCompress(&c->bzs, action);
Py_END_ALLOW_THREADS
if (catch_bz2_error(bzerror))
goto error;
if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
break;
}
result = OutputBuffer_Finish(&buffer, c->bzs.avail_out);
if (result != NULL) {
return result;
}
error:
OutputBuffer_OnError(&buffer);
return NULL;
}
#define clinic_state() (find_module_state_by_def(type))
#include "clinic/_bz2module.c.h"
#undef clinic_state
static PyObject *
_bz2_BZ2Compressor_compress_impl(BZ2Compressor *self, Py_buffer *data)
{
PyObject *result = NULL;
ACQUIRE_LOCK(self);
if (self->flushed)
PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
else
result = compress(self, data->buf, data->len, BZ_RUN);
RELEASE_LOCK(self);
return result;
}
static PyObject *
_bz2_BZ2Compressor_flush_impl(BZ2Compressor *self)
{
PyObject *result = NULL;
ACQUIRE_LOCK(self);
if (self->flushed)
PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
else {
self->flushed = 1;
result = compress(self, NULL, 0, BZ_FINISH);
}
RELEASE_LOCK(self);
return result;
}
static void*
BZ2_Malloc(void* ctx, int items, int size)
{
if (items < 0 || size < 0)
return NULL;
if (size != 0 && (size_t)items > (size_t)PY_SSIZE_T_MAX / (size_t)size)
return NULL;
return PyMem_RawMalloc((size_t)items * (size_t)size);
}
static void
BZ2_Free(void* ctx, void *ptr)
{
PyMem_RawFree(ptr);
}
static PyObject *
_bz2_BZ2Compressor_impl(PyTypeObject *type, int compresslevel)
{
int bzerror;
BZ2Compressor *self;
if (!(1 <= compresslevel && compresslevel <= 9)) {
PyErr_SetString(PyExc_ValueError,
"compresslevel must be between 1 and 9");
return NULL;
}
assert(type != NULL && type->tp_alloc != NULL);
self = (BZ2Compressor *)type->tp_alloc(type, 0);
if (self == NULL) {
return NULL;
}
self->lock = PyThread_allocate_lock();
if (self->lock == NULL) {
Py_DECREF(self);
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
return NULL;
}
self->bzs.opaque = NULL;
self->bzs.bzalloc = BZ2_Malloc;
self->bzs.bzfree = BZ2_Free;
bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
if (catch_bz2_error(bzerror))
goto error;
return (PyObject *)self;
error:
Py_DECREF(self);
return NULL;
}
static void
BZ2Compressor_dealloc(BZ2Compressor *self)
{
BZ2_bzCompressEnd(&self->bzs);
if (self->lock != NULL) {
PyThread_free_lock(self->lock);
}
PyTypeObject *tp = Py_TYPE(self);
tp->tp_free((PyObject *)self);
Py_DECREF(tp);
}
static int
BZ2Compressor_traverse(BZ2Compressor *self, visitproc visit, void *arg)
{
Py_VISIT(Py_TYPE(self));
return 0;
}
static PyMethodDef BZ2Compressor_methods[] = {
_BZ2_BZ2COMPRESSOR_COMPRESS_METHODDEF
_BZ2_BZ2COMPRESSOR_FLUSH_METHODDEF
{NULL}
};
static PyType_Slot bz2_compressor_type_slots[] = {
{Py_tp_dealloc, BZ2Compressor_dealloc},
{Py_tp_methods, BZ2Compressor_methods},
{Py_tp_new, _bz2_BZ2Compressor},
{Py_tp_doc, (char *)_bz2_BZ2Compressor__doc__},
{Py_tp_traverse, BZ2Compressor_traverse},
{0, 0}
};
static PyType_Spec bz2_compressor_type_spec = {
.name = "_bz2.BZ2Compressor",
.basicsize = sizeof(BZ2Compressor),
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
.slots = bz2_compressor_type_slots,
};
static PyObject*
decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
{
PyObject *result;
_BlocksOutputBuffer buffer = {.list = NULL};
bz_stream *bzs = &d->bzs;
if (OutputBuffer_InitAndGrow(&buffer, max_length, &bzs->next_out, &bzs->avail_out) < 0) {
goto error;
}
for (;;) {
int bzret;
bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
d->bzs_avail_in_real -= bzs->avail_in;
Py_BEGIN_ALLOW_THREADS
bzret = BZ2_bzDecompress(bzs);
Py_END_ALLOW_THREADS
d->bzs_avail_in_real += bzs->avail_in;
if (catch_bz2_error(bzret))
goto error;
if (bzret == BZ_STREAM_END) {
d->eof = 1;
break;
} else if (d->bzs_avail_in_real == 0) {
break;
} else if (bzs->avail_out == 0) {
if (OutputBuffer_GetDataSize(&buffer, bzs->avail_out) == max_length) {
break;
}
if (OutputBuffer_Grow(&buffer, &bzs->next_out, &bzs->avail_out) < 0) {
goto error;
}
}
}
result = OutputBuffer_Finish(&buffer, bzs->avail_out);
if (result != NULL) {
return result;
}
error:
OutputBuffer_OnError(&buffer);
return NULL;
}
static PyObject *
decompress(BZ2Decompressor *d, char *data, size_t len, Py_ssize_t max_length)
{
char input_buffer_in_use;
PyObject *result;
bz_stream *bzs = &d->bzs;
if (bzs->next_in != NULL) {
size_t avail_now, avail_total;
avail_now = (d->input_buffer + d->input_buffer_size)
- (bzs->next_in + d->bzs_avail_in_real);
avail_total = d->input_buffer_size - d->bzs_avail_in_real;
if (avail_total < len) {
size_t offset = bzs->next_in - d->input_buffer;
char *tmp;
size_t new_size = d->input_buffer_size + len - avail_now;
tmp = PyMem_Realloc(d->input_buffer, new_size);
if (tmp == NULL) {
PyErr_SetNone(PyExc_MemoryError);
return NULL;
}
d->input_buffer = tmp;
d->input_buffer_size = new_size;
bzs->next_in = d->input_buffer + offset;
}
else if (avail_now < len) {
memmove(d->input_buffer, bzs->next_in,
d->bzs_avail_in_real);
bzs->next_in = d->input_buffer;
}
memcpy((void*)(bzs->next_in + d->bzs_avail_in_real), data, len);
d->bzs_avail_in_real += len;
input_buffer_in_use = 1;
}
else {
bzs->next_in = data;
d->bzs_avail_in_real = len;
input_buffer_in_use = 0;
}
result = decompress_buf(d, max_length);
if(result == NULL) {
bzs->next_in = NULL;
return NULL;
}
if (d->eof) {
d->needs_input = 0;
if (d->bzs_avail_in_real > 0) {
Py_XSETREF(d->unused_data,
PyBytes_FromStringAndSize(bzs->next_in, d->bzs_avail_in_real));
if (d->unused_data == NULL)
goto error;
}
}
else if (d->bzs_avail_in_real == 0) {
bzs->next_in = NULL;
d->needs_input = 1;
}
else {
d->needs_input = 0;
if (!input_buffer_in_use) {
if (d->input_buffer != NULL &&
d->input_buffer_size < d->bzs_avail_in_real) {
PyMem_Free(d->input_buffer);
d->input_buffer = NULL;
}
if (d->input_buffer == NULL) {
d->input_buffer = PyMem_Malloc(d->bzs_avail_in_real);
if (d->input_buffer == NULL) {
PyErr_SetNone(PyExc_MemoryError);
goto error;
}
d->input_buffer_size = d->bzs_avail_in_real;
}
memcpy(d->input_buffer, bzs->next_in, d->bzs_avail_in_real);
bzs->next_in = d->input_buffer;
}
}
return result;
error:
Py_XDECREF(result);
return NULL;
}
static PyObject *
_bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
Py_ssize_t max_length)
{
PyObject *result = NULL;
ACQUIRE_LOCK(self);
if (self->eof)
PyErr_SetString(PyExc_EOFError, "End of stream already reached");
else
result = decompress(self, data->buf, data->len, max_length);
RELEASE_LOCK(self);
return result;
}
static PyObject *
_bz2_BZ2Decompressor_impl(PyTypeObject *type)
{
BZ2Decompressor *self;
int bzerror;
assert(type != NULL && type->tp_alloc != NULL);
self = (BZ2Decompressor *)type->tp_alloc(type, 0);
if (self == NULL) {
return NULL;
}
self->lock = PyThread_allocate_lock();
if (self->lock == NULL) {
Py_DECREF(self);
PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
return NULL;
}
self->needs_input = 1;
self->bzs_avail_in_real = 0;
self->input_buffer = NULL;
self->input_buffer_size = 0;
self->unused_data = PyBytes_FromStringAndSize(NULL, 0);
if (self->unused_data == NULL)
goto error;
bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
if (catch_bz2_error(bzerror))
goto error;
return (PyObject *)self;
error:
Py_DECREF(self);
return NULL;
}
static void
BZ2Decompressor_dealloc(BZ2Decompressor *self)
{
if(self->input_buffer != NULL) {
PyMem_Free(self->input_buffer);
}
BZ2_bzDecompressEnd(&self->bzs);
Py_CLEAR(self->unused_data);
if (self->lock != NULL) {
PyThread_free_lock(self->lock);
}
PyTypeObject *tp = Py_TYPE(self);
tp->tp_free((PyObject *)self);
Py_DECREF(tp);
}
static int
BZ2Decompressor_traverse(BZ2Decompressor *self, visitproc visit, void *arg)
{
Py_VISIT(Py_TYPE(self));
return 0;
}
static PyMethodDef BZ2Decompressor_methods[] = {
_BZ2_BZ2DECOMPRESSOR_DECOMPRESS_METHODDEF
{NULL}
};
PyDoc_STRVAR(BZ2Decompressor_eof__doc__,
"True if the end-of-stream marker has been reached.");
PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__,
"Data found after the end of the compressed stream.");
PyDoc_STRVAR(BZ2Decompressor_needs_input_doc,
"True if more input is needed before more decompressed data can be produced.");
static PyMemberDef BZ2Decompressor_members[] = {
{"eof", T_BOOL, offsetof(BZ2Decompressor, eof),
READONLY, BZ2Decompressor_eof__doc__},
{"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data),
READONLY, BZ2Decompressor_unused_data__doc__},
{"needs_input", T_BOOL, offsetof(BZ2Decompressor, needs_input), READONLY,
BZ2Decompressor_needs_input_doc},
{NULL}
};
static PyType_Slot bz2_decompressor_type_slots[] = {
{Py_tp_dealloc, BZ2Decompressor_dealloc},
{Py_tp_methods, BZ2Decompressor_methods},
{Py_tp_doc, (char *)_bz2_BZ2Decompressor__doc__},
{Py_tp_members, BZ2Decompressor_members},
{Py_tp_new, _bz2_BZ2Decompressor},
{Py_tp_traverse, BZ2Decompressor_traverse},
{0, 0}
};
static PyType_Spec bz2_decompressor_type_spec = {
.name = "_bz2.BZ2Decompressor",
.basicsize = sizeof(BZ2Decompressor),
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
.slots = bz2_decompressor_type_slots,
};
static int
_bz2_exec(PyObject *module)
{
_bz2_state *state = get_module_state(module);
state->bz2_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
&bz2_compressor_type_spec, NULL);
if (state->bz2_compressor_type == NULL) {
return -1;
}
if (PyModule_AddType(module, state->bz2_compressor_type) < 0) {
return -1;
}
state->bz2_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
&bz2_decompressor_type_spec, NULL);
if (state->bz2_decompressor_type == NULL) {
return -1;
}
if (PyModule_AddType(module, state->bz2_decompressor_type) < 0) {
return -1;
}
return 0;
}
static int
_bz2_traverse(PyObject *module, visitproc visit, void *arg)
{
_bz2_state *state = get_module_state(module);
Py_VISIT(state->bz2_compressor_type);
Py_VISIT(state->bz2_decompressor_type);
return 0;
}
static int
_bz2_clear(PyObject *module)
{
_bz2_state *state = get_module_state(module);
Py_CLEAR(state->bz2_compressor_type);
Py_CLEAR(state->bz2_decompressor_type);
return 0;
}
static void
_bz2_free(void *module)
{
(void)_bz2_clear((PyObject *)module);
}
static struct PyModuleDef_Slot _bz2_slots[] = {
{Py_mod_exec, _bz2_exec},
{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
{0, NULL}
};
static struct PyModuleDef _bz2module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "_bz2",
.m_size = sizeof(_bz2_state),
.m_traverse = _bz2_traverse,
.m_clear = _bz2_clear,
.m_free = _bz2_free,
.m_slots = _bz2_slots,
};
PyMODINIT_FUNC
PyInit__bz2(void)
{
return PyModuleDef_Init(&_bz2module);
}