CoCalc -- _lzmamodule.c

GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_lzmamodule.c
¹² views
1
/* _lzma - Low-level Python interface to liblzma.
2

3
   Initial implementation by Per Øyvind Karlsen.
4
   Rewritten by Nadeem Vawda.
5

6
*/
7

8
#include "Python.h"
9
#include "structmember.h"         // PyMemberDef
10

11
#include <stdlib.h>               // free()
12
#include <string.h>
13

14
#include <lzma.h>
15

16
// Blocks output buffer wrappers
17
#include "pycore_blocks_output_buffer.h"
18

19
#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
20
    #error "The maximum block size accepted by liblzma is SIZE_MAX."
21
#endif
22

23
/* On success, return value >= 0
24
   On failure, return -1 */
25
static inline Py_ssize_t
26
OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
27
                         uint8_t **next_out, size_t *avail_out)
28
{
29
    Py_ssize_t allocated;
30

31
    allocated = _BlocksOutputBuffer_InitAndGrow(
32
                    buffer, max_length, (void**) next_out);
33
    *avail_out = (size_t) allocated;
34
    return allocated;
35
}
36

37
/* On success, return value >= 0
38
   On failure, return -1 */
39
static inline Py_ssize_t
40
OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
41
                  uint8_t **next_out, size_t *avail_out)
42
{
43
    Py_ssize_t allocated;
44

45
    allocated = _BlocksOutputBuffer_Grow(
46
                    buffer, (void**) next_out, (Py_ssize_t) *avail_out);
47
    *avail_out = (size_t) allocated;
48
    return allocated;
49
}
50

51
static inline Py_ssize_t
52
OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out)
53
{
54
    return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
55
}
56

57
static inline PyObject *
58
OutputBuffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out)
59
{
60
    return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
61
}
62

63
static inline void
64
OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
65
{
66
    _BlocksOutputBuffer_OnError(buffer);
67
}
68

69

70
#define ACQUIRE_LOCK(obj) do { \
71
    if (!PyThread_acquire_lock((obj)->lock, 0)) { \
72
        Py_BEGIN_ALLOW_THREADS \
73
        PyThread_acquire_lock((obj)->lock, 1); \
74
        Py_END_ALLOW_THREADS \
75
    } } while (0)
76
#define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
77

78
typedef struct {
79
    PyTypeObject *lzma_compressor_type;
80
    PyTypeObject *lzma_decompressor_type;
81
    PyObject *error;
82
    PyObject *empty_tuple;
83
} _lzma_state;
84

85
static inline _lzma_state*
86
get_lzma_state(PyObject *module)
87
{
88
    void *state = PyModule_GetState(module);
89
    assert(state != NULL);
90
    return (_lzma_state *)state;
91
}
92

93
/* Container formats: */
94
enum {
95
    FORMAT_AUTO,
96
    FORMAT_XZ,
97
    FORMAT_ALONE,
98
    FORMAT_RAW,
99
};
100

101
#define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
102

103

104
typedef struct {
105
    PyObject_HEAD
106
    lzma_allocator alloc;
107
    lzma_stream lzs;
108
    int flushed;
109
    PyThread_type_lock lock;
110
} Compressor;
111

112
typedef struct {
113
    PyObject_HEAD
114
    lzma_allocator alloc;
115
    lzma_stream lzs;
116
    int check;
117
    char eof;
118
    PyObject *unused_data;
119
    char needs_input;
120
    uint8_t *input_buffer;
121
    size_t input_buffer_size;
122
    PyThread_type_lock lock;
123
} Decompressor;
124

125
/* Helper functions. */
126

127
static int
128
catch_lzma_error(_lzma_state *state, lzma_ret lzret)
129
{
130
    switch (lzret) {
131
        case LZMA_OK:
132
        case LZMA_GET_CHECK:
133
        case LZMA_NO_CHECK:
134
        case LZMA_STREAM_END:
135
            return 0;
136
        case LZMA_UNSUPPORTED_CHECK:
137
            PyErr_SetString(state->error, "Unsupported integrity check");
138
            return 1;
139
        case LZMA_MEM_ERROR:
140
            PyErr_NoMemory();
141
            return 1;
142
        case LZMA_MEMLIMIT_ERROR:
143
            PyErr_SetString(state->error, "Memory usage limit exceeded");
144
            return 1;
145
        case LZMA_FORMAT_ERROR:
146
            PyErr_SetString(state->error, "Input format not supported by decoder");
147
            return 1;
148
        case LZMA_OPTIONS_ERROR:
149
            PyErr_SetString(state->error, "Invalid or unsupported options");
150
            return 1;
151
        case LZMA_DATA_ERROR:
152
            PyErr_SetString(state->error, "Corrupt input data");
153
            return 1;
154
        case LZMA_BUF_ERROR:
155
            PyErr_SetString(state->error, "Insufficient buffer space");
156
            return 1;
157
        case LZMA_PROG_ERROR:
158
            PyErr_SetString(state->error, "Internal error");
159
            return 1;
160
        default:
161
            PyErr_Format(state->error, "Unrecognized error from liblzma: %d", lzret);
162
            return 1;
163
    }
164
}
165

166
static void*
167
PyLzma_Malloc(void *opaque, size_t items, size_t size)
168
{
169
    if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size) {
170
        return NULL;
171
    }
172
    /* PyMem_Malloc() cannot be used:
173
       the GIL is not held when lzma_code() is called */
174
    return PyMem_RawMalloc(items * size);
175
}
176

177
static void
178
PyLzma_Free(void *opaque, void *ptr)
179
{
180
    PyMem_RawFree(ptr);
181
}
182

183

184
/* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
185
   since the predefined conversion specifiers do not suit our needs:
186

187
      uint32_t - the "I" (unsigned int) specifier is the right size, but
188
      silently ignores overflows on conversion.
189

190
      lzma_vli - the "K" (unsigned long long) specifier is the right
191
      size, but like "I" it silently ignores overflows on conversion.
192

193
      lzma_mode and lzma_match_finder - these are enumeration types, and
194
      so the size of each is implementation-defined. Worse, different
195
      enum types can be of different sizes within the same program, so
196
      to be strictly correct, we need to define two separate converters.
197
 */
198

199
#define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
200
    static int \
201
    FUNCNAME(PyObject *obj, void *ptr) \
202
    { \
203
        unsigned long long val; \
204
        \
205
        val = PyLong_AsUnsignedLongLong(obj); \
206
        if (PyErr_Occurred()) \
207
            return 0; \
208
        if ((unsigned long long)(TYPE)val != val) { \
209
            PyErr_SetString(PyExc_OverflowError, \
210
                            "Value too large for " #TYPE " type"); \
211
            return 0; \
212
        } \
213
        *(TYPE *)ptr = (TYPE)val; \
214
        return 1; \
215
    }
216

217
INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
218
INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
219
INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
220
INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
221

222
#undef INT_TYPE_CONVERTER_FUNC
223

224

225
/* Filter specifier parsing.
226

227
   This code handles converting filter specifiers (Python dicts) into
228
   the C lzma_filter structs expected by liblzma. */
229

230
static void *
231
parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
232
{
233
    static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
234
                               "pb", "mode", "nice_len", "mf", "depth", NULL};
235
    PyObject *id;
236
    PyObject *preset_obj;
237
    uint32_t preset = LZMA_PRESET_DEFAULT;
238
    lzma_options_lzma *options;
239

240
    /* First, fill in default values for all the options using a preset.
241
       Then, override the defaults with any values given by the caller. */
242

243
    preset_obj = PyMapping_GetItemString(spec, "preset");
244
    if (preset_obj == NULL) {
245
        if (PyErr_ExceptionMatches(PyExc_KeyError)) {
246
            PyErr_Clear();
247
        }
248
        else {
249
            return NULL;
250
        }
251
    } else {
252
        int ok = uint32_converter(preset_obj, &preset);
253
        Py_DECREF(preset_obj);
254
        if (!ok) {
255
            return NULL;
256
        }
257
    }
258

259
    options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
260
    if (options == NULL) {
261
        return PyErr_NoMemory();
262
    }
263

264
    if (lzma_lzma_preset(options, preset)) {
265
        PyMem_Free(options);
266
        PyErr_Format(state->error, "Invalid compression preset: %u", preset);
267
        return NULL;
268
    }
269

270
    if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
271
                                     "|OOO&O&O&O&O&O&O&O&", optnames,
272
                                     &id, &preset_obj,
273
                                     uint32_converter, &options->dict_size,
274
                                     uint32_converter, &options->lc,
275
                                     uint32_converter, &options->lp,
276
                                     uint32_converter, &options->pb,
277
                                     lzma_mode_converter, &options->mode,
278
                                     uint32_converter, &options->nice_len,
279
                                     lzma_mf_converter, &options->mf,
280
                                     uint32_converter, &options->depth)) {
281
        PyErr_SetString(PyExc_ValueError,
282
                        "Invalid filter specifier for LZMA filter");
283
        PyMem_Free(options);
284
        return NULL;
285
    }
286

287
    return options;
288
}
289

290
static void *
291
parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
292
{
293
    static char *optnames[] = {"id", "dist", NULL};
294
    PyObject *id;
295
    uint32_t dist = 1;
296
    lzma_options_delta *options;
297

298
    if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
299
                                     &id, uint32_converter, &dist)) {
300
        PyErr_SetString(PyExc_ValueError,
301
                        "Invalid filter specifier for delta filter");
302
        return NULL;
303
    }
304

305
    options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
306
    if (options == NULL) {
307
        return PyErr_NoMemory();
308
    }
309
    options->type = LZMA_DELTA_TYPE_BYTE;
310
    options->dist = dist;
311
    return options;
312
}
313

314
static void *
315
parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
316
{
317
    static char *optnames[] = {"id", "start_offset", NULL};
318
    PyObject *id;
319
    uint32_t start_offset = 0;
320
    lzma_options_bcj *options;
321

322
    if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
323
                                     &id, uint32_converter, &start_offset)) {
324
        PyErr_SetString(PyExc_ValueError,
325
                        "Invalid filter specifier for BCJ filter");
326
        return NULL;
327
    }
328

329
    options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
330
    if (options == NULL) {
331
        return PyErr_NoMemory();
332
    }
333
    options->start_offset = start_offset;
334
    return options;
335
}
336

337
static int
338
lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr)
339
{
340
    lzma_filter *f = (lzma_filter *)ptr;
341
    PyObject *id_obj;
342

343
    if (!PyMapping_Check(spec)) {
344
        PyErr_SetString(PyExc_TypeError,
345
                        "Filter specifier must be a dict or dict-like object");
346
        return 0;
347
    }
348
    id_obj = PyMapping_GetItemString(spec, "id");
349
    if (id_obj == NULL) {
350
        if (PyErr_ExceptionMatches(PyExc_KeyError))
351
            PyErr_SetString(PyExc_ValueError,
352
                            "Filter specifier must have an \"id\" entry");
353
        return 0;
354
    }
355
    f->id = PyLong_AsUnsignedLongLong(id_obj);
356
    Py_DECREF(id_obj);
357
    if (PyErr_Occurred()) {
358
        return 0;
359
    }
360

361
    switch (f->id) {
362
        case LZMA_FILTER_LZMA1:
363
        case LZMA_FILTER_LZMA2:
364
            f->options = parse_filter_spec_lzma(state, spec);
365
            return f->options != NULL;
366
        case LZMA_FILTER_DELTA:
367
            f->options = parse_filter_spec_delta(state, spec);
368
            return f->options != NULL;
369
        case LZMA_FILTER_X86:
370
        case LZMA_FILTER_POWERPC:
371
        case LZMA_FILTER_IA64:
372
        case LZMA_FILTER_ARM:
373
        case LZMA_FILTER_ARMTHUMB:
374
        case LZMA_FILTER_SPARC:
375
            f->options = parse_filter_spec_bcj(state, spec);
376
            return f->options != NULL;
377
        default:
378
            PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
379
            return 0;
380
    }
381
}
382

383
static void
384
free_filter_chain(lzma_filter filters[])
385
{
386
    for (int i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) {
387
        PyMem_Free(filters[i].options);
388
    }
389
}
390

391
static int
392
parse_filter_chain_spec(_lzma_state *state, lzma_filter filters[], PyObject *filterspecs)
393
{
394
    Py_ssize_t i, num_filters;
395

396
    num_filters = PySequence_Length(filterspecs);
397
    if (num_filters == -1) {
398
        return -1;
399
    }
400
    if (num_filters > LZMA_FILTERS_MAX) {
401
        PyErr_Format(PyExc_ValueError,
402
                     "Too many filters - liblzma supports a maximum of %d",
403
                     LZMA_FILTERS_MAX);
404
        return -1;
405
    }
406

407
    for (i = 0; i < num_filters; i++) {
408
        int ok = 1;
409
        PyObject *spec = PySequence_GetItem(filterspecs, i);
410
        if (spec == NULL || !lzma_filter_converter(state, spec, &filters[i])) {
411
            ok = 0;
412
        }
413
        Py_XDECREF(spec);
414
        if (!ok) {
415
            filters[i].id = LZMA_VLI_UNKNOWN;
416
            free_filter_chain(filters);
417
            return -1;
418
        }
419
    }
420
    filters[num_filters].id = LZMA_VLI_UNKNOWN;
421
    return 0;
422
}
423

424

425
/* Filter specifier construction.
426

427
   This code handles converting C lzma_filter structs into
428
   Python-level filter specifiers (represented as dicts). */
429

430
static int
431
spec_add_field(PyObject *spec, const char *key, unsigned long long value)
432
{
433
    PyObject *value_object = PyLong_FromUnsignedLongLong(value);
434
    if (value_object == NULL) {
435
        return -1;
436
    }
437
    PyObject *key_object = PyUnicode_InternFromString(key);
438
    if (key_object == NULL) {
439
        Py_DECREF(value_object);
440
        return -1;
441
    }
442
    int status = PyDict_SetItem(spec, key_object, value_object);
443
    Py_DECREF(key_object);
444
    Py_DECREF(value_object);
445
    return status;
446
}
447

448
static PyObject *
449
build_filter_spec(const lzma_filter *f)
450
{
451
    PyObject *spec;
452

453
    spec = PyDict_New();
454
    if (spec == NULL) {
455
        return NULL;
456
    }
457

458
#define ADD_FIELD(SOURCE, FIELD) \
459
    do { \
460
        if (spec_add_field(spec, #FIELD, SOURCE->FIELD) == -1) \
461
            goto error;\
462
    } while (0)
463

464
    ADD_FIELD(f, id);
465

466
    switch (f->id) {
467
        /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
468
           lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
469
           dict_size field is used. */
470
        case LZMA_FILTER_LZMA1: {
471
            lzma_options_lzma *options = f->options;
472
            ADD_FIELD(options, lc);
473
            ADD_FIELD(options, lp);
474
            ADD_FIELD(options, pb);
475
            ADD_FIELD(options, dict_size);
476
            break;
477
        }
478
        case LZMA_FILTER_LZMA2: {
479
            lzma_options_lzma *options = f->options;
480
            ADD_FIELD(options, dict_size);
481
            break;
482
        }
483
        case LZMA_FILTER_DELTA: {
484
            lzma_options_delta *options = f->options;
485
            ADD_FIELD(options, dist);
486
            break;
487
        }
488
        case LZMA_FILTER_X86:
489
        case LZMA_FILTER_POWERPC:
490
        case LZMA_FILTER_IA64:
491
        case LZMA_FILTER_ARM:
492
        case LZMA_FILTER_ARMTHUMB:
493
        case LZMA_FILTER_SPARC: {
494
            lzma_options_bcj *options = f->options;
495
            ADD_FIELD(options, start_offset);
496
            break;
497
        }
498
        default:
499
            PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
500
            goto error;
501
    }
502

503
#undef ADD_FIELD
504

505
    return spec;
506

507
error:
508
    Py_DECREF(spec);
509
    return NULL;
510
}
511

512

513
/*[clinic input]
514
module _lzma
515
class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
516
class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
517
[clinic start generated code]*/
518
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
519

520
#include "clinic/_lzmamodule.c.h"
521

522
/*[python input]
523

524
class lzma_vli_converter(CConverter):
525
    type = 'lzma_vli'
526
    converter = 'lzma_vli_converter'
527

528
class lzma_filter_converter(CConverter):
529
    type = 'lzma_filter'
530
    converter = 'lzma_filter_converter'
531
    c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
532

533
    def cleanup(self):
534
        name = ensure_legal_c_identifier(self.name)
535
        return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
536
                '   PyMem_Free(%(name)s.options);\n') % {'name': name}
537

538
[python start generated code]*/
539
/*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
540

541

542
/* LZMACompressor class. */
543

544
static PyObject *
545
compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
546
{
547
    PyObject *result;
548
    _BlocksOutputBuffer buffer = {.list = NULL};
549
    _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
550
    assert(state != NULL);
551

552
    if (OutputBuffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
553
        goto error;
554
    }
555
    c->lzs.next_in = data;
556
    c->lzs.avail_in = len;
557

558
    for (;;) {
559
        lzma_ret lzret;
560

561
        Py_BEGIN_ALLOW_THREADS
562
        lzret = lzma_code(&c->lzs, action);
563
        Py_END_ALLOW_THREADS
564

565
        if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
566
            lzret = LZMA_OK; /* That wasn't a real error */
567
        }
568
        if (catch_lzma_error(state, lzret)) {
569
            goto error;
570
        }
571
        if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
572
            (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
573
            break;
574
        } else if (c->lzs.avail_out == 0) {
575
            if (OutputBuffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
576
                goto error;
577
            }
578
        }
579
    }
580

581
    result = OutputBuffer_Finish(&buffer, c->lzs.avail_out);
582
    if (result != NULL) {
583
        return result;
584
    }
585

586
error:
587
    OutputBuffer_OnError(&buffer);
588
    return NULL;
589
}
590

591
/*[clinic input]
592
_lzma.LZMACompressor.compress
593

594
    data: Py_buffer
595
    /
596

597
Provide data to the compressor object.
598

599
Returns a chunk of compressed data if possible, or b'' otherwise.
600

601
When you have finished providing data to the compressor, call the
602
flush() method to finish the compression process.
603
[clinic start generated code]*/
604

605
static PyObject *
606
_lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
607
/*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
608
{
609
    PyObject *result = NULL;
610

611
    ACQUIRE_LOCK(self);
612
    if (self->flushed) {
613
        PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
614
    }
615
    else {
616
        result = compress(self, data->buf, data->len, LZMA_RUN);
617
    }
618
    RELEASE_LOCK(self);
619
    return result;
620
}
621

622
/*[clinic input]
623
_lzma.LZMACompressor.flush
624

625
Finish the compression process.
626

627
Returns the compressed data left in internal buffers.
628

629
The compressor object may not be used after this method is called.
630
[clinic start generated code]*/
631

632
static PyObject *
633
_lzma_LZMACompressor_flush_impl(Compressor *self)
634
/*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
635
{
636
    PyObject *result = NULL;
637

638
    ACQUIRE_LOCK(self);
639
    if (self->flushed) {
640
        PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
641
    } else {
642
        self->flushed = 1;
643
        result = compress(self, NULL, 0, LZMA_FINISH);
644
    }
645
    RELEASE_LOCK(self);
646
    return result;
647
}
648

649
static int
650
Compressor_init_xz(_lzma_state *state, lzma_stream *lzs,
651
                   int check, uint32_t preset, PyObject *filterspecs)
652
{
653
    lzma_ret lzret;
654

655
    if (filterspecs == Py_None) {
656
        lzret = lzma_easy_encoder(lzs, preset, check);
657
    } else {
658
        lzma_filter filters[LZMA_FILTERS_MAX + 1];
659

660
        if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
661
            return -1;
662
        lzret = lzma_stream_encoder(lzs, filters, check);
663
        free_filter_chain(filters);
664
    }
665
    if (catch_lzma_error(state, lzret)) {
666
        return -1;
667
    }
668
    else {
669
        return 0;
670
    }
671
}
672

673
static int
674
Compressor_init_alone(_lzma_state *state, lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
675
{
676
    lzma_ret lzret;
677

678
    if (filterspecs == Py_None) {
679
        lzma_options_lzma options;
680

681
        if (lzma_lzma_preset(&options, preset)) {
682
            PyErr_Format(state->error, "Invalid compression preset: %u", preset);
683
            return -1;
684
        }
685
        lzret = lzma_alone_encoder(lzs, &options);
686
    } else {
687
        lzma_filter filters[LZMA_FILTERS_MAX + 1];
688

689
        if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
690
            return -1;
691
        if (filters[0].id == LZMA_FILTER_LZMA1 &&
692
            filters[1].id == LZMA_VLI_UNKNOWN) {
693
            lzret = lzma_alone_encoder(lzs, filters[0].options);
694
        } else {
695
            PyErr_SetString(PyExc_ValueError,
696
                            "Invalid filter chain for FORMAT_ALONE - "
697
                            "must be a single LZMA1 filter");
698
            lzret = LZMA_PROG_ERROR;
699
        }
700
        free_filter_chain(filters);
701
    }
702
    if (PyErr_Occurred() || catch_lzma_error(state, lzret)) {
703
        return -1;
704
    }
705
    else {
706
        return 0;
707
    }
708
}
709

710
static int
711
Compressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
712
{
713
    lzma_filter filters[LZMA_FILTERS_MAX + 1];
714
    lzma_ret lzret;
715

716
    if (filterspecs == Py_None) {
717
        PyErr_SetString(PyExc_ValueError,
718
                        "Must specify filters for FORMAT_RAW");
719
        return -1;
720
    }
721
    if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
722
        return -1;
723
    }
724
    lzret = lzma_raw_encoder(lzs, filters);
725
    free_filter_chain(filters);
726
    if (catch_lzma_error(state, lzret)) {
727
        return -1;
728
    }
729
    else {
730
        return 0;
731
    }
732
}
733

734
/*[-clinic input]
735
@classmethod
736
_lzma.LZMACompressor.__new__
737

738
    format: int(c_default="FORMAT_XZ") = FORMAT_XZ
739
        The container format to use for the output.  This can
740
        be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
741

742
    check: int(c_default="-1") = unspecified
743
        The integrity check to use.  For FORMAT_XZ, the default
744
        is CHECK_CRC64.  FORMAT_ALONE and FORMAT_RAW do not support integrity
745
        checks; for these formats, check must be omitted, or be CHECK_NONE.
746

747
    preset: object = None
748
        If provided should be an integer in the range 0-9, optionally
749
        OR-ed with the constant PRESET_EXTREME.
750

751
    filters: object = None
752
        If provided should be a sequence of dicts.  Each dict should
753
        have an entry for "id" indicating the ID of the filter, plus
754
        additional entries for options to the filter.
755

756
Create a compressor object for compressing data incrementally.
757

758
The settings used by the compressor can be specified either as a
759
preset compression level (with the 'preset' argument), or in detail
760
as a custom filter chain (with the 'filters' argument).  For FORMAT_XZ
761
and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
762
level.  For FORMAT_RAW, the caller must always specify a filter chain;
763
the raw compressor does not support preset compression levels.
764

765
For one-shot compression, use the compress() function instead.
766
[-clinic start generated code]*/
767
static PyObject *
768
Compressor_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
769
{
770
    static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
771
    int format = FORMAT_XZ;
772
    int check = -1;
773
    uint32_t preset = LZMA_PRESET_DEFAULT;
774
    PyObject *preset_obj = Py_None;
775
    PyObject *filterspecs = Py_None;
776
    Compressor *self;
777

778
    _lzma_state *state = PyType_GetModuleState(type);
779
    assert(state != NULL);
780
    if (!PyArg_ParseTupleAndKeywords(args, kwargs,
781
                                     "|iiOO:LZMACompressor", arg_names,
782
                                     &format, &check, &preset_obj,
783
                                     &filterspecs)) {
784
        return NULL;
785
    }
786

787
    if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
788
        PyErr_SetString(PyExc_ValueError,
789
                        "Integrity checks are only supported by FORMAT_XZ");
790
        return NULL;
791
    }
792

793
    if (preset_obj != Py_None && filterspecs != Py_None) {
794
        PyErr_SetString(PyExc_ValueError,
795
                        "Cannot specify both preset and filter chain");
796
        return NULL;
797
    }
798

799
    if (preset_obj != Py_None && !uint32_converter(preset_obj, &preset)) {
800
        return NULL;
801
    }
802

803
    assert(type != NULL && type->tp_alloc != NULL);
804
    self = (Compressor *)type->tp_alloc(type, 0);
805
    if (self == NULL) {
806
        return NULL;
807
    }
808

809
    self->alloc.opaque = NULL;
810
    self->alloc.alloc = PyLzma_Malloc;
811
    self->alloc.free = PyLzma_Free;
812
    self->lzs.allocator = &self->alloc;
813

814
    self->lock = PyThread_allocate_lock();
815
    if (self->lock == NULL) {
816
        Py_DECREF(self);
817
        PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
818
        return NULL;
819
    }
820

821
    self->flushed = 0;
822
    switch (format) {
823
        case FORMAT_XZ:
824
            if (check == -1) {
825
                check = LZMA_CHECK_CRC64;
826
            }
827
            if (Compressor_init_xz(state, &self->lzs, check, preset, filterspecs) != 0) {
828
                goto error;
829
            }
830
            break;
831

832
        case FORMAT_ALONE:
833
            if (Compressor_init_alone(state, &self->lzs, preset, filterspecs) != 0) {
834
                goto error;
835
            }
836
            break;
837

838
        case FORMAT_RAW:
839
            if (Compressor_init_raw(state, &self->lzs, filterspecs) != 0) {
840
                goto error;
841
            }
842
            break;
843

844
        default:
845
            PyErr_Format(PyExc_ValueError,
846
                         "Invalid container format: %d", format);
847
            goto error;
848
    }
849

850
    return (PyObject *)self;
851

852
error:
853
    Py_DECREF(self);
854
    return NULL;
855
}
856

857
static void
858
Compressor_dealloc(Compressor *self)
859
{
860
    lzma_end(&self->lzs);
861
    if (self->lock != NULL) {
862
        PyThread_free_lock(self->lock);
863
    }
864
    PyTypeObject *tp = Py_TYPE(self);
865
    tp->tp_free((PyObject *)self);
866
    Py_DECREF(tp);
867
}
868

869
static PyMethodDef Compressor_methods[] = {
870
    _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
871
    _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
872
    {NULL}
873
};
874

875
static int
876
Compressor_traverse(Compressor *self, visitproc visit, void *arg)
877
{
878
    Py_VISIT(Py_TYPE(self));
879
    return 0;
880
}
881

882
PyDoc_STRVAR(Compressor_doc,
883
"LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
884
"\n"
885
"Create a compressor object for compressing data incrementally.\n"
886
"\n"
887
"format specifies the container format to use for the output. This can\n"
888
"be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
889
"\n"
890
"check specifies the integrity check to use. For FORMAT_XZ, the default\n"
891
"is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
892
"checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
893
"\n"
894
"The settings used by the compressor can be specified either as a\n"
895
"preset compression level (with the 'preset' argument), or in detail\n"
896
"as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
897
"and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
898
"level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
899
"the raw compressor does not support preset compression levels.\n"
900
"\n"
901
"preset (if provided) should be an integer in the range 0-9, optionally\n"
902
"OR-ed with the constant PRESET_EXTREME.\n"
903
"\n"
904
"filters (if provided) should be a sequence of dicts. Each dict should\n"
905
"have an entry for \"id\" indicating the ID of the filter, plus\n"
906
"additional entries for options to the filter.\n"
907
"\n"
908
"For one-shot compression, use the compress() function instead.\n");
909

910
static PyType_Slot lzma_compressor_type_slots[] = {
911
    {Py_tp_dealloc, Compressor_dealloc},
912
    {Py_tp_methods, Compressor_methods},
913
    {Py_tp_new, Compressor_new},
914
    {Py_tp_doc, (char *)Compressor_doc},
915
    {Py_tp_traverse, Compressor_traverse},
916
    {0, 0}
917
};
918

919
static PyType_Spec lzma_compressor_type_spec = {
920
    .name = "_lzma.LZMACompressor",
921
    .basicsize = sizeof(Compressor),
922
    // Calling PyType_GetModuleState() on a subclass is not safe.
923
    // lzma_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
924
    // which prevents to create a subclass.
925
    // So calling PyType_GetModuleState() in this file is always safe.
926
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
927
    .slots = lzma_compressor_type_slots,
928
};
929

930
/* LZMADecompressor class. */
931

932
/* Decompress data of length d->lzs.avail_in in d->lzs.next_in.  The output
933
   buffer is allocated dynamically and returned.  At most max_length bytes are
934
   returned, so some of the input may not be consumed. d->lzs.next_in and
935
   d->lzs.avail_in are updated to reflect the consumed input. */
936
static PyObject*
937
decompress_buf(Decompressor *d, Py_ssize_t max_length)
938
{
939
    PyObject *result;
940
    lzma_stream *lzs = &d->lzs;
941
    _BlocksOutputBuffer buffer = {.list = NULL};
942
    _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
943
    assert(state != NULL);
944

945
    if (OutputBuffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) {
946
        goto error;
947
    }
948

949
    for (;;) {
950
        lzma_ret lzret;
951

952
        Py_BEGIN_ALLOW_THREADS
953
        lzret = lzma_code(lzs, LZMA_RUN);
954
        Py_END_ALLOW_THREADS
955

956
        if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
957
            lzret = LZMA_OK; /* That wasn't a real error */
958
        }
959
        if (catch_lzma_error(state, lzret)) {
960
            goto error;
961
        }
962
        if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
963
            d->check = lzma_get_check(&d->lzs);
964
        }
965
        if (lzret == LZMA_STREAM_END) {
966
            d->eof = 1;
967
            break;
968
        } else if (lzs->avail_out == 0) {
969
            /* Need to check lzs->avail_out before lzs->avail_in.
970
               Maybe lzs's internal state still have a few bytes
971
               can be output, grow the output buffer and continue
972
               if max_lengh < 0. */
973
            if (OutputBuffer_GetDataSize(&buffer, lzs->avail_out) == max_length) {
974
                break;
975
            }
976
            if (OutputBuffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) {
977
                goto error;
978
            }
979
        } else if (lzs->avail_in == 0) {
980
            break;
981
        }
982
    }
983

984
    result = OutputBuffer_Finish(&buffer, lzs->avail_out);
985
    if (result != NULL) {
986
        return result;
987
    }
988

989
error:
990
    OutputBuffer_OnError(&buffer);
991
    return NULL;
992
}
993

994
static PyObject *
995
decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
996
{
997
    char input_buffer_in_use;
998
    PyObject *result;
999
    lzma_stream *lzs = &d->lzs;
1000

1001
    /* Prepend unconsumed input if necessary */
1002
    if (lzs->next_in != NULL) {
1003
        size_t avail_now, avail_total;
1004

1005
        /* Number of bytes we can append to input buffer */
1006
        avail_now = (d->input_buffer + d->input_buffer_size)
1007
            - (lzs->next_in + lzs->avail_in);
1008

1009
        /* Number of bytes we can append if we move existing
1010
           contents to beginning of buffer (overwriting
1011
           consumed input) */
1012
        avail_total = d->input_buffer_size - lzs->avail_in;
1013

1014
        if (avail_total < len) {
1015
            size_t offset = lzs->next_in - d->input_buffer;
1016
            uint8_t *tmp;
1017
            size_t new_size = d->input_buffer_size + len - avail_now;
1018

1019
            /* Assign to temporary variable first, so we don't
1020
               lose address of allocated buffer if realloc fails */
1021
            tmp = PyMem_Realloc(d->input_buffer, new_size);
1022
            if (tmp == NULL) {
1023
                PyErr_SetNone(PyExc_MemoryError);
1024
                return NULL;
1025
            }
1026
            d->input_buffer = tmp;
1027
            d->input_buffer_size = new_size;
1028

1029
            lzs->next_in = d->input_buffer + offset;
1030
        }
1031
        else if (avail_now < len) {
1032
            memmove(d->input_buffer, lzs->next_in,
1033
                    lzs->avail_in);
1034
            lzs->next_in = d->input_buffer;
1035
        }
1036
        memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
1037
        lzs->avail_in += len;
1038
        input_buffer_in_use = 1;
1039
    }
1040
    else {
1041
        lzs->next_in = data;
1042
        lzs->avail_in = len;
1043
        input_buffer_in_use = 0;
1044
    }
1045

1046
    result = decompress_buf(d, max_length);
1047
    if (result == NULL) {
1048
        lzs->next_in = NULL;
1049
        return NULL;
1050
    }
1051

1052
    if (d->eof) {
1053
        d->needs_input = 0;
1054
        if (lzs->avail_in > 0) {
1055
            Py_XSETREF(d->unused_data,
1056
                      PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
1057
            if (d->unused_data == NULL) {
1058
                goto error;
1059
            }
1060
        }
1061
    }
1062
    else if (lzs->avail_in == 0) {
1063
        lzs->next_in = NULL;
1064

1065
        if (lzs->avail_out == 0) {
1066
            /* (avail_in==0 && avail_out==0)
1067
               Maybe lzs's internal state still have a few bytes can
1068
               be output, try to output them next time. */
1069
            d->needs_input = 0;
1070

1071
            /* If max_length < 0, lzs->avail_out always > 0 */
1072
            assert(max_length >= 0);
1073
        } else {
1074
            /* Input buffer exhausted, output buffer has space. */
1075
            d->needs_input = 1;
1076
        }
1077
    }
1078
    else {
1079
        d->needs_input = 0;
1080

1081
        /* If we did not use the input buffer, we now have
1082
           to copy the tail from the caller's buffer into the
1083
           input buffer */
1084
        if (!input_buffer_in_use) {
1085

1086
            /* Discard buffer if it's too small
1087
               (resizing it may needlessly copy the current contents) */
1088
            if (d->input_buffer != NULL &&
1089
                d->input_buffer_size < lzs->avail_in) {
1090
                PyMem_Free(d->input_buffer);
1091
                d->input_buffer = NULL;
1092
            }
1093

1094
            /* Allocate if necessary */
1095
            if (d->input_buffer == NULL) {
1096
                d->input_buffer = PyMem_Malloc(lzs->avail_in);
1097
                if (d->input_buffer == NULL) {
1098
                    PyErr_SetNone(PyExc_MemoryError);
1099
                    goto error;
1100
                }
1101
                d->input_buffer_size = lzs->avail_in;
1102
            }
1103

1104
            /* Copy tail */
1105
            memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
1106
            lzs->next_in = d->input_buffer;
1107
        }
1108
    }
1109

1110
    return result;
1111

1112
error:
1113
    Py_XDECREF(result);
1114
    return NULL;
1115
}
1116

1117
/*[clinic input]
1118
_lzma.LZMADecompressor.decompress
1119

1120
    data: Py_buffer
1121
    max_length: Py_ssize_t=-1
1122

1123
Decompress *data*, returning uncompressed data as bytes.
1124

1125
If *max_length* is nonnegative, returns at most *max_length* bytes of
1126
decompressed data. If this limit is reached and further output can be
1127
produced, *self.needs_input* will be set to ``False``. In this case, the next
1128
call to *decompress()* may provide *data* as b'' to obtain more of the output.
1129

1130
If all of the input data was decompressed and returned (either because this
1131
was less than *max_length* bytes, or because *max_length* was negative),
1132
*self.needs_input* will be set to True.
1133

1134
Attempting to decompress data after the end of stream is reached raises an
1135
EOFError.  Any data found after the end of the stream is ignored and saved in
1136
the unused_data attribute.
1137
[clinic start generated code]*/
1138

1139
static PyObject *
1140
_lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
1141
                                       Py_ssize_t max_length)
1142
/*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
1143
{
1144
    PyObject *result = NULL;
1145

1146
    ACQUIRE_LOCK(self);
1147
    if (self->eof)
1148
        PyErr_SetString(PyExc_EOFError, "Already at end of stream");
1149
    else
1150
        result = decompress(self, data->buf, data->len, max_length);
1151
    RELEASE_LOCK(self);
1152
    return result;
1153
}
1154

1155
static int
1156
Decompressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
1157
{
1158
    lzma_filter filters[LZMA_FILTERS_MAX + 1];
1159
    lzma_ret lzret;
1160

1161
    if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
1162
        return -1;
1163
    }
1164
    lzret = lzma_raw_decoder(lzs, filters);
1165
    free_filter_chain(filters);
1166
    if (catch_lzma_error(state, lzret)) {
1167
        return -1;
1168
    }
1169
    else {
1170
        return 0;
1171
    }
1172
}
1173

1174
/*[clinic input]
1175
@classmethod
1176
_lzma.LZMADecompressor.__new__
1177

1178
    format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
1179
        Specifies the container format of the input stream.  If this is
1180
        FORMAT_AUTO (the default), the decompressor will automatically detect
1181
        whether the input is FORMAT_XZ or FORMAT_ALONE.  Streams created with
1182
        FORMAT_RAW cannot be autodetected.
1183

1184
    memlimit: object = None
1185
        Limit the amount of memory used by the decompressor.  This will cause
1186
        decompression to fail if the input cannot be decompressed within the
1187
        given limit.
1188

1189
    filters: object = None
1190
        A custom filter chain.  This argument is required for FORMAT_RAW, and
1191
        not accepted with any other format.  When provided, this should be a
1192
        sequence of dicts, each indicating the ID and options for a single
1193
        filter.
1194

1195
Create a decompressor object for decompressing data incrementally.
1196

1197
For one-shot decompression, use the decompress() function instead.
1198
[clinic start generated code]*/
1199

1200
static PyObject *
1201
_lzma_LZMADecompressor_impl(PyTypeObject *type, int format,
1202
                            PyObject *memlimit, PyObject *filters)
1203
/*[clinic end generated code: output=2d46d5e70f10bc7f input=ca40cd1cb1202b0d]*/
1204
{
1205
    Decompressor *self;
1206
    const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
1207
    uint64_t memlimit_ = UINT64_MAX;
1208
    lzma_ret lzret;
1209
    _lzma_state *state = PyType_GetModuleState(type);
1210
    assert(state != NULL);
1211

1212
    if (memlimit != Py_None) {
1213
        if (format == FORMAT_RAW) {
1214
            PyErr_SetString(PyExc_ValueError,
1215
                            "Cannot specify memory limit with FORMAT_RAW");
1216
            return NULL;
1217
        }
1218
        memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
1219
        if (PyErr_Occurred()) {
1220
            return NULL;
1221
        }
1222
    }
1223

1224
    if (format == FORMAT_RAW && filters == Py_None) {
1225
        PyErr_SetString(PyExc_ValueError,
1226
                        "Must specify filters for FORMAT_RAW");
1227
        return NULL;
1228
    } else if (format != FORMAT_RAW && filters != Py_None) {
1229
        PyErr_SetString(PyExc_ValueError,
1230
                        "Cannot specify filters except with FORMAT_RAW");
1231
        return NULL;
1232
    }
1233

1234
    assert(type != NULL && type->tp_alloc != NULL);
1235
    self = (Decompressor *)type->tp_alloc(type, 0);
1236
    if (self == NULL) {
1237
        return NULL;
1238
    }
1239
    self->alloc.opaque = NULL;
1240
    self->alloc.alloc = PyLzma_Malloc;
1241
    self->alloc.free = PyLzma_Free;
1242
    self->lzs.allocator = &self->alloc;
1243
    self->lzs.next_in = NULL;
1244

1245
    self->lock = PyThread_allocate_lock();
1246
    if (self->lock == NULL) {
1247
        Py_DECREF(self);
1248
        PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
1249
        return NULL;
1250
    }
1251

1252
    self->check = LZMA_CHECK_UNKNOWN;
1253
    self->needs_input = 1;
1254
    self->input_buffer = NULL;
1255
    self->input_buffer_size = 0;
1256
    Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
1257
    if (self->unused_data == NULL) {
1258
        goto error;
1259
    }
1260

1261
    switch (format) {
1262
        case FORMAT_AUTO:
1263
            lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
1264
            if (catch_lzma_error(state, lzret)) {
1265
                goto error;
1266
            }
1267
            break;
1268

1269
        case FORMAT_XZ:
1270
            lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
1271
            if (catch_lzma_error(state, lzret)) {
1272
                goto error;
1273
            }
1274
            break;
1275

1276
        case FORMAT_ALONE:
1277
            self->check = LZMA_CHECK_NONE;
1278
            lzret = lzma_alone_decoder(&self->lzs, memlimit_);
1279
            if (catch_lzma_error(state, lzret)) {
1280
                goto error;
1281
            }
1282
            break;
1283

1284
        case FORMAT_RAW:
1285
            self->check = LZMA_CHECK_NONE;
1286
            if (Decompressor_init_raw(state, &self->lzs, filters) == -1) {
1287
                goto error;
1288
            }
1289
            break;
1290

1291
        default:
1292
            PyErr_Format(PyExc_ValueError,
1293
                         "Invalid container format: %d", format);
1294
            goto error;
1295
    }
1296

1297
    return (PyObject *)self;
1298

1299
error:
1300
    Py_DECREF(self);
1301
    return NULL;
1302
}
1303

1304
static void
1305
Decompressor_dealloc(Decompressor *self)
1306
{
1307
    if(self->input_buffer != NULL)
1308
        PyMem_Free(self->input_buffer);
1309

1310
    lzma_end(&self->lzs);
1311
    Py_CLEAR(self->unused_data);
1312
    if (self->lock != NULL) {
1313
        PyThread_free_lock(self->lock);
1314
    }
1315
    PyTypeObject *tp = Py_TYPE(self);
1316
    tp->tp_free((PyObject *)self);
1317
    Py_DECREF(tp);
1318
}
1319

1320
static int
1321
Decompressor_traverse(Decompressor *self, visitproc visit, void *arg)
1322
{
1323
    Py_VISIT(Py_TYPE(self));
1324
    return 0;
1325
}
1326

1327
static PyMethodDef Decompressor_methods[] = {
1328
    _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
1329
    {NULL}
1330
};
1331

1332
PyDoc_STRVAR(Decompressor_check_doc,
1333
"ID of the integrity check used by the input stream.");
1334

1335
PyDoc_STRVAR(Decompressor_eof_doc,
1336
"True if the end-of-stream marker has been reached.");
1337

1338
PyDoc_STRVAR(Decompressor_needs_input_doc,
1339
"True if more input is needed before more decompressed data can be produced.");
1340

1341
PyDoc_STRVAR(Decompressor_unused_data_doc,
1342
"Data found after the end of the compressed stream.");
1343

1344
static PyMemberDef Decompressor_members[] = {
1345
    {"check", T_INT, offsetof(Decompressor, check), READONLY,
1346
     Decompressor_check_doc},
1347
    {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
1348
     Decompressor_eof_doc},
1349
    {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
1350
     Decompressor_needs_input_doc},
1351
    {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1352
     Decompressor_unused_data_doc},
1353
    {NULL}
1354
};
1355

1356
static PyType_Slot lzma_decompressor_type_slots[] = {
1357
    {Py_tp_dealloc, Decompressor_dealloc},
1358
    {Py_tp_methods, Decompressor_methods},
1359
    {Py_tp_new, _lzma_LZMADecompressor},
1360
    {Py_tp_doc, (char *)_lzma_LZMADecompressor__doc__},
1361
    {Py_tp_traverse, Decompressor_traverse},
1362
    {Py_tp_members, Decompressor_members},
1363
    {0, 0}
1364
};
1365

1366
static PyType_Spec lzma_decompressor_type_spec = {
1367
    .name = "_lzma.LZMADecompressor",
1368
    .basicsize = sizeof(Decompressor),
1369
    // Calling PyType_GetModuleState() on a subclass is not safe.
1370
    // lzma_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
1371
    // which prevents to create a subclass.
1372
    // So calling PyType_GetModuleState() in this file is always safe.
1373
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
1374
    .slots = lzma_decompressor_type_slots,
1375
};
1376

1377

1378
/* Module-level functions. */
1379

1380
/*[clinic input]
1381
_lzma.is_check_supported
1382
    check_id: int
1383
    /
1384

1385
Test whether the given integrity check is supported.
1386

1387
Always returns True for CHECK_NONE and CHECK_CRC32.
1388
[clinic start generated code]*/
1389

1390
static PyObject *
1391
_lzma_is_check_supported_impl(PyObject *module, int check_id)
1392
/*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
1393
{
1394
    return PyBool_FromLong(lzma_check_is_supported(check_id));
1395
}
1396

1397
PyDoc_STRVAR(_lzma__encode_filter_properties__doc__,
1398
"_encode_filter_properties($module, filter, /)\n"
1399
"--\n"
1400
"\n"
1401
"Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).\n"
1402
"\n"
1403
"The result does not include the filter ID itself, only the options.");
1404

1405
#define _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF    \
1406
    {"_encode_filter_properties", (PyCFunction)_lzma__encode_filter_properties, METH_O, _lzma__encode_filter_properties__doc__},
1407

1408
static PyObject *
1409
_lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter);
1410

1411
static PyObject *
1412
_lzma__encode_filter_properties(PyObject *module, PyObject *arg)
1413
{
1414
    PyObject *return_value = NULL;
1415
    lzma_filter filter = {LZMA_VLI_UNKNOWN, NULL};
1416
    _lzma_state *state = get_lzma_state(module);
1417
    assert(state != NULL);
1418
    if (!lzma_filter_converter(state, arg, &filter)) {
1419
        goto exit;
1420
    }
1421
    return_value = _lzma__encode_filter_properties_impl(module, filter);
1422

1423
exit:
1424
    /* Cleanup for filter */
1425
    if (filter.id != LZMA_VLI_UNKNOWN) {
1426
       PyMem_Free(filter.options);
1427
    }
1428

1429
    return return_value;
1430
}
1431

1432
static PyObject *
1433
_lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
1434
{
1435
    lzma_ret lzret;
1436
    uint32_t encoded_size;
1437
    PyObject *result = NULL;
1438
    _lzma_state *state = get_lzma_state(module);
1439
    assert(state != NULL);
1440

1441
    lzret = lzma_properties_size(&encoded_size, &filter);
1442
    if (catch_lzma_error(state, lzret))
1443
        goto error;
1444

1445
    result = PyBytes_FromStringAndSize(NULL, encoded_size);
1446
    if (result == NULL)
1447
        goto error;
1448

1449
    lzret = lzma_properties_encode(
1450
            &filter, (uint8_t *)PyBytes_AS_STRING(result));
1451
    if (catch_lzma_error(state, lzret)) {
1452
        goto error;
1453
    }
1454

1455
    return result;
1456

1457
error:
1458
    Py_XDECREF(result);
1459
    return NULL;
1460
}
1461

1462

1463
/*[clinic input]
1464
_lzma._decode_filter_properties
1465
    filter_id: lzma_vli
1466
    encoded_props: Py_buffer
1467
    /
1468

1469
Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1470

1471
The result does not include the filter ID itself, only the options.
1472
[clinic start generated code]*/
1473

1474
static PyObject *
1475
_lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
1476
                                     Py_buffer *encoded_props)
1477
/*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
1478
{
1479
    lzma_filter filter;
1480
    lzma_ret lzret;
1481
    PyObject *result = NULL;
1482
    filter.id = filter_id;
1483
    _lzma_state *state = get_lzma_state(module);
1484
    assert(state != NULL);
1485

1486
    lzret = lzma_properties_decode(
1487
            &filter, NULL, encoded_props->buf, encoded_props->len);
1488
    if (catch_lzma_error(state, lzret)) {
1489
        return NULL;
1490
    }
1491

1492
    result = build_filter_spec(&filter);
1493

1494
    /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1495
       allocated by lzma_properties_decode() using the default allocator. */
1496
    free(filter.options);
1497
    return result;
1498
}
1499

1500
/* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1501
   would not work correctly on platforms with 32-bit longs. */
1502
static int
1503
module_add_int_constant(PyObject *m, const char *name, long long value)
1504
{
1505
    PyObject *o = PyLong_FromLongLong(value);
1506
    if (o == NULL) {
1507
        return -1;
1508
    }
1509
    if (PyModule_AddObject(m, name, o) == 0) {
1510
        return 0;
1511
    }
1512
    Py_DECREF(o);
1513
    return -1;
1514
}
1515

1516
static int
1517
lzma_exec(PyObject *module)
1518
{
1519
#define ADD_INT_PREFIX_MACRO(module, macro)                                 \
1520
    do {                                                                    \
1521
        if (module_add_int_constant(module, #macro, LZMA_ ## macro) < 0) {  \
1522
            return -1;                                                      \
1523
        }                                                                   \
1524
    } while(0)
1525

1526
#define ADD_INT_MACRO(module, macro)                                        \
1527
    do {                                                                    \
1528
        if (PyModule_AddIntMacro(module, macro) < 0) {                      \
1529
            return -1;                                                      \
1530
        }                                                                   \
1531
    } while (0)
1532

1533

1534
    _lzma_state *state = get_lzma_state(module);
1535

1536
    state->empty_tuple = PyTuple_New(0);
1537
    if (state->empty_tuple == NULL) {
1538
        return -1;
1539
    }
1540

1541
    ADD_INT_MACRO(module, FORMAT_AUTO);
1542
    ADD_INT_MACRO(module, FORMAT_XZ);
1543
    ADD_INT_MACRO(module, FORMAT_ALONE);
1544
    ADD_INT_MACRO(module, FORMAT_RAW);
1545
    ADD_INT_PREFIX_MACRO(module, CHECK_NONE);
1546
    ADD_INT_PREFIX_MACRO(module, CHECK_CRC32);
1547
    ADD_INT_PREFIX_MACRO(module, CHECK_CRC64);
1548
    ADD_INT_PREFIX_MACRO(module, CHECK_SHA256);
1549
    ADD_INT_PREFIX_MACRO(module, CHECK_ID_MAX);
1550
    ADD_INT_PREFIX_MACRO(module, CHECK_UNKNOWN);
1551
    ADD_INT_PREFIX_MACRO(module, FILTER_LZMA1);
1552
    ADD_INT_PREFIX_MACRO(module, FILTER_LZMA2);
1553
    ADD_INT_PREFIX_MACRO(module, FILTER_DELTA);
1554
    ADD_INT_PREFIX_MACRO(module, FILTER_X86);
1555
    ADD_INT_PREFIX_MACRO(module, FILTER_IA64);
1556
    ADD_INT_PREFIX_MACRO(module, FILTER_ARM);
1557
    ADD_INT_PREFIX_MACRO(module, FILTER_ARMTHUMB);
1558
    ADD_INT_PREFIX_MACRO(module, FILTER_SPARC);
1559
    ADD_INT_PREFIX_MACRO(module, FILTER_POWERPC);
1560
    ADD_INT_PREFIX_MACRO(module, MF_HC3);
1561
    ADD_INT_PREFIX_MACRO(module, MF_HC4);
1562
    ADD_INT_PREFIX_MACRO(module, MF_BT2);
1563
    ADD_INT_PREFIX_MACRO(module, MF_BT3);
1564
    ADD_INT_PREFIX_MACRO(module, MF_BT4);
1565
    ADD_INT_PREFIX_MACRO(module, MODE_FAST);
1566
    ADD_INT_PREFIX_MACRO(module, MODE_NORMAL);
1567
    ADD_INT_PREFIX_MACRO(module, PRESET_DEFAULT);
1568
    ADD_INT_PREFIX_MACRO(module, PRESET_EXTREME);
1569

1570
    state->error = PyErr_NewExceptionWithDoc("_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1571
    if (state->error == NULL) {
1572
        return -1;
1573
    }
1574

1575
    if (PyModule_AddType(module, (PyTypeObject *)state->error) < 0) {
1576
        return -1;
1577
    }
1578

1579

1580
    state->lzma_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1581
                                                            &lzma_compressor_type_spec, NULL);
1582
    if (state->lzma_compressor_type == NULL) {
1583
        return -1;
1584
    }
1585

1586
    if (PyModule_AddType(module, state->lzma_compressor_type) < 0) {
1587
        return -1;
1588
    }
1589

1590
    state->lzma_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1591
                                                         &lzma_decompressor_type_spec, NULL);
1592
    if (state->lzma_decompressor_type == NULL) {
1593
        return -1;
1594
    }
1595

1596
    if (PyModule_AddType(module, state->lzma_decompressor_type) < 0) {
1597
        return -1;
1598
    }
1599

1600
    return 0;
1601
}
1602

1603
static PyMethodDef lzma_methods[] = {
1604
    _LZMA_IS_CHECK_SUPPORTED_METHODDEF
1605
    _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
1606
    _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1607
    {NULL}
1608
};
1609

1610
static PyModuleDef_Slot lzma_slots[] = {
1611
    {Py_mod_exec, lzma_exec},
1612
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1613
    {0, NULL}
1614
};
1615

1616
static int
1617
lzma_traverse(PyObject *module, visitproc visit, void *arg)
1618
{
1619
    _lzma_state *state = get_lzma_state(module);
1620
    Py_VISIT(state->lzma_compressor_type);
1621
    Py_VISIT(state->lzma_decompressor_type);
1622
    Py_VISIT(state->error);
1623
    Py_VISIT(state->empty_tuple);
1624
    return 0;
1625
}
1626

1627
static int
1628
lzma_clear(PyObject *module)
1629
{
1630
    _lzma_state *state = get_lzma_state(module);
1631
    Py_CLEAR(state->lzma_compressor_type);
1632
    Py_CLEAR(state->lzma_decompressor_type);
1633
    Py_CLEAR(state->error);
1634
    Py_CLEAR(state->empty_tuple);
1635
    return 0;
1636
}
1637

1638
static void
1639
lzma_free(void *module)
1640
{
1641
    lzma_clear((PyObject *)module);
1642
}
1643

1644
static PyModuleDef _lzmamodule = {
1645
    PyModuleDef_HEAD_INIT,
1646
    .m_name = "_lzma",
1647
    .m_size = sizeof(_lzma_state),
1648
    .m_methods = lzma_methods,
1649
    .m_slots = lzma_slots,
1650
    .m_traverse = lzma_traverse,
1651
    .m_clear = lzma_clear,
1652
    .m_free = lzma_free,
1653
};
1654

1655
PyMODINIT_FUNC
1656
PyInit__lzma(void)
1657
{
1658
    return PyModuleDef_Init(&_lzmamodule);
1659
}
1660

1661
Product

Resources

Company