CoCalc -- draw-scale-simple.c

bin / mupdf / mupdf-1.7 / source / fitz / draw-scale-simple.c
⁷⁶⁴³ views
1
/*
2
This code does smooth scaling of a pixmap.
3

4
This function returns a new pixmap representing the area starting at (0,0)
5
given by taking the source pixmap src, scaling it to width w, and height h,
6
and then positioning it at (frac(x),frac(y)).
7

8
This is a cut-down version of draw_scale.c that only copes with filters
9
that return values strictly in the 0..1 range, and uses bytes for
10
intermediate results rather than ints.
11
*/
12

13
#include "mupdf/fitz.h"
14
#include "draw-imp.h"
15

16
/* Do we special case handling of single pixel high/wide images? The
17
 * 'purest' handling is given by not special casing them, but certain
18
 * files that use such images 'stack' them to give full images. Not
19
 * special casing them results in then being fainter and giving noticeable
20
 * rounding errors.
21
 */
22
#define SINGLE_PIXEL_SPECIALS
23

24
#ifdef DEBUG_SCALING
25
#ifdef WIN32
26
#include <windows.h>
27
static void debug_print(const char *fmt, ...)
28
{
29
	va_list args;
30
	char text[256];
31
	va_start(args, fmt);
32
	vsprintf(text, fmt, args);
33
	va_end(args);
34
	OutputDebugStringA(text);
35
	printf(text);
36
}
37
#else
38
static void debug_print(const char *fmt, ...)
39
{
40
	va_list args;
41
	va_start(args, fmt);
42
	vfprintf(stderr, fmt, args);
43
	va_end(args);
44
}
45
#endif
46
#endif
47
#ifdef DEBUG_SCALING
48
#define DBUG(A) debug_print A
49
#else
50
#define DBUG(A) do {} while(0==1)
51
#endif
52

53
/*
54
Consider a row of source samples, src, of width src_w, positioned at x,
55
scaled to width dst_w.
56

57
src[i] is centred at: x + (i + 0.5)*dst_w/src_w
58

59
Therefore the distance between the centre of the jth output pixel and
60
the centre of the ith source sample is:
61

62
dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w)
63

64
When scaling up, therefore:
65

66
dst[j] = SUM(filter(dist[j,i]) * src[i])
67
	(for all ints i)
68

69
This can be simplified by noticing that filters are only non zero within
70
a given filter width (henceforth called W). So:
71

72
dst[j] = SUM(filter(dist[j,i]) * src[i])
73
	(for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W)
74

75
When scaling down, each filtered source sample is stretched to be wider
76
to avoid aliasing issues. This effectively reduces the distance between
77
centres.
78

79
dst[j] = SUM(filter(dist[j,i] * F) * F * src[i])
80
	(where F = dst_w/src_w)
81
	(for ints i, s.t. (j-W)/F < i < (j+W)/F)
82

83
*/
84

85
typedef struct fz_scale_filter_s fz_scale_filter;
86

87
struct fz_scale_filter_s
88
{
89
	int width;
90
	float (*fn)(fz_scale_filter *, float);
91
};
92

93
/* Image scale filters */
94

95
static float
96
triangle(fz_scale_filter *filter, float f)
97
{
98
	if (f >= 1)
99
		return 0;
100
	return 1-f;
101
}
102

103
static float
104
box(fz_scale_filter *filter, float f)
105
{
106
	if (f >= 0.5f)
107
		return 0;
108
	return 1;
109
}
110

111
static float
112
simple(fz_scale_filter *filter, float x)
113
{
114
	if (x >= 1)
115
		return 0;
116
	return 1 + (2*x - 3)*x*x;
117
}
118

119
fz_scale_filter fz_scale_filter_box = { 1, box };
120
fz_scale_filter fz_scale_filter_triangle = { 1, triangle };
121
fz_scale_filter fz_scale_filter_simple = { 1, simple };
122

123
/*
124
We build ourselves a set of tables to contain the precalculated weights
125
for a given set of scale settings.
126

127
The first dst_w entries in index are the index into index of the
128
sets of weight for each destination pixel.
129

130
Each of the sets of weights is a set of values consisting of:
131
	the minimum source pixel index used for this destination pixel
132
	the number of weights used for this destination pixel
133
	the weights themselves
134

135
So to calculate dst[i] we do the following:
136

137
	weights = &index[index[i]];
138
	min = *weights++;
139
	len = *weights++;
140
	dst[i] = 0;
141
	while (--len > 0)
142
		dst[i] += src[min++] * *weights++
143

144
in addition, we guarantee that at the end of this process weights will now
145
point to the weights value for dst pixel i+1.
146

147
In the simplest version of this algorithm, we would scale the whole image
148
horizontally first into a temporary buffer, then scale that temporary
149
buffer again vertically to give us our result. Using such a simple
150
algorithm would mean that could use the same style of weights for both
151
horizontal and vertical scaling.
152

153
Unfortunately, this would also require a large temporary buffer,
154
particularly in the case where we are scaling up.
155

156
We therefore modify the algorithm as follows; we scale scanlines from the
157
source image horizontally into a temporary buffer, until we have all the
158
contributors for a given output scanline. We then produce that output
159
scanline from the temporary buffer. In this way we restrict the height
160
of the temporary buffer to a small fraction of the final size.
161

162
Unfortunately, this means that the pseudo code for recombining a
163
scanline of fully scaled pixels is as follows:
164

165
	weights = &index[index[y]];
166
	min = *weights++;
167
	len = *weights++;
168
	for (x=0 to dst_w)
169
		min2 = min
170
		len2 = len
171
		weights2 = weights
172
		dst[x] = 0;
173
		while (--len2 > 0)
174
			dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++
175

176
i.e. it requires a % operation for every source pixel - this is typically
177
expensive.
178

179
To avoid this, we alter the order in which vertical weights are stored,
180
so that they are ordered in the same order as the temporary buffer lines
181
would appear. This simplifies the algorithm to:
182

183
	weights = &index[index[y]];
184
	min = *weights++;
185
	len = *weights++;
186
	for (x=0 to dst_w)
187
		min2 = 0
188
		len2 = len
189
		weights2 = weights
190
		dst[x] = 0;
191
		while (--len2 > 0)
192
			dst[x] += temp[i][min2++] * *weights2++
193

194
This means that len may be larger than it needs to be (due to the
195
possible inclusion of a zero weight row or two), but in practise this
196
is only an increase of 1 or 2 at worst.
197

198
We implement this by generating the weights as normal (but ensuring we
199
leave enough space) and then reordering afterwards.
200

201
*/
202

203
typedef struct fz_weights_s fz_weights;
204

205
/* This structure is accessed from ARM code - bear this in mind before
206
 * altering it! */
207
struct fz_weights_s
208
{
209
	int flip;	/* true if outputting reversed */
210
	int count;	/* number of output pixels we have records for in this table */
211
	int max_len;	/* Maximum number of weights for any one output pixel */
212
	int n;		/* number of components (src->n) */
213
	int new_line;	/* True if no weights for the current output pixel */
214
	int patch_l;	/* How many output pixels we skip over */
215
	int index[1];
216
};
217

218
struct fz_scale_cache_s
219
{
220
	int src_w;
221
	float x;
222
	float dst_w;
223
	fz_scale_filter *filter;
224
	int vertical;
225
	int dst_w_int;
226
	int patch_l;
227
	int patch_r;
228
	int n;
229
	int flip;
230
	fz_weights *weights;
231
};
232

233
static fz_weights *
234
new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l)
235
{
236
	int max_len;
237
	fz_weights *weights;
238

239
	if (src_w > dst_w)
240
	{
241
		/* Scaling down, so there will be a maximum of
242
		 * 2*filterwidth*src_w/dst_w src pixels
243
		 * contributing to each dst pixel. */
244
		max_len = (int)ceilf((2 * filter->width * src_w)/dst_w);
245
		if (max_len > src_w)
246
			max_len = src_w;
247
	}
248
	else
249
	{
250
		/* Scaling up, so there will be a maximum of
251
		 * 2*filterwidth src pixels contributing to each dst pixel.
252
		 */
253
		max_len = 2 * filter->width;
254
	}
255
	/* We need the size of the struct,
256
	 * plus patch_w*sizeof(int) for the index
257
	 * plus (2+max_len)*sizeof(int) for the weights
258
	 * plus room for an extra set of weights for reordering.
259
	 */
260
	weights = fz_malloc(ctx, sizeof(*weights)+(max_len+3)*(patch_w+1)*sizeof(int));
261
	if (!weights)
262
		return NULL;
263
	weights->count = -1;
264
	weights->max_len = max_len;
265
	weights->index[0] = patch_w;
266
	weights->n = n;
267
	weights->patch_l = patch_l;
268
	weights->flip = flip;
269
	return weights;
270
}
271

272
/* j is destination pixel in the patch_l..patch_l+patch_w range */
273
static void
274
init_weights(fz_weights *weights, int j)
275
{
276
	int index;
277

278
	j -= weights->patch_l;
279
	assert(weights->count == j-1);
280
	weights->count++;
281
	weights->new_line = 1;
282
	if (j == 0)
283
		index = weights->index[0];
284
	else
285
	{
286
		index = weights->index[j-1];
287
		index += 2 + weights->index[index+1];
288
	}
289
	weights->index[j] = index; /* row pointer */
290
	weights->index[index] = 0; /* min */
291
	weights->index[index+1] = 0; /* len */
292
}
293

294
static void
295
add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter,
296
	float x, float F, float G, int src_w, float dst_w)
297
{
298
	float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w);
299
	float f;
300
	int min, len, index, weight;
301

302
	dist *= G;
303
	if (dist < 0)
304
		dist = -dist;
305
	f = filter->fn(filter, dist)*F;
306
	weight = (int)(256*f+0.5f);
307

308
	/* Ensure i is in range */
309
	if (i < 0 || i >= src_w)
310
		return;
311
	if (weight == 0)
312
	{
313
		/* We add a fudge factor here to allow for extreme downscales
314
		 * where all the weights round to 0. Ensure that at least one
315
		 * (arbitrarily the first one) is non zero. */
316
		if (weights->new_line && f > 0)
317
			weight = 1;
318
		else
319
			return;
320
	}
321

322
	DBUG(("add_weight[%d][%d] = %d(%g) dist=%g\n",j,i,weight,f,dist));
323

324
	/* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */
325
	j -= weights->patch_l;
326
	if (weights->new_line)
327
	{
328
		/* New line */
329
		weights->new_line = 0;
330
		index = weights->index[j]; /* row pointer */
331
		weights->index[index] = i; /* min */
332
		weights->index[index+1] = 0; /* len */
333
	}
334
	index = weights->index[j];
335
	min = weights->index[index++];
336
	len = weights->index[index++];
337
	while (i < min)
338
	{
339
		/* This only happens in rare cases, but we need to insert
340
		 * one earlier. In exceedingly rare cases we may need to
341
		 * insert more than one earlier. */
342
		int k;
343

344
		for (k = len; k > 0; k--)
345
		{
346
			weights->index[index+k] = weights->index[index+k-1];
347
		}
348
		weights->index[index] = 0;
349
		min--;
350
		len++;
351
		weights->index[index-2] = min;
352
		weights->index[index-1] = len;
353
	}
354
	if (i-min >= len)
355
	{
356
		/* The usual case */
357
		while (i-min >= ++len)
358
		{
359
			weights->index[index+len-1] = 0;
360
		}
361
		assert(len-1 == i-min);
362
		weights->index[index+i-min] = weight;
363
		weights->index[index-1] = len;
364
		assert(len <= weights->max_len);
365
	}
366
	else
367
	{
368
		/* Infrequent case */
369
		weights->index[index+i-min] += weight;
370
	}
371
}
372

373
static void
374
reorder_weights(fz_weights *weights, int j, int src_w)
375
{
376
	int idx = weights->index[j - weights->patch_l];
377
	int min = weights->index[idx++];
378
	int len = weights->index[idx++];
379
	int max = weights->max_len;
380
	int tmp = idx+max;
381
	int i, off;
382

383
	/* Copy into the temporary area */
384
	memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len);
385

386
	/* Pad out if required */
387
	assert(len <= max);
388
	assert(min+len <= src_w);
389
	off = 0;
390
	if (len < max)
391
	{
392
		memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len));
393
		len = max;
394
		if (min + len > src_w)
395
		{
396
			off = min + len - src_w;
397
			min = src_w - len;
398
			weights->index[idx-2] = min;
399
		}
400
		weights->index[idx-1] = len;
401
	}
402

403
	/* Copy back into the proper places */
404
	for (i = 0; i < len; i++)
405
	{
406
		weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i];
407
	}
408
}
409

410
/* Due to rounding and edge effects, the sums for the weights sometimes don't
411
 * add up to 256. This causes visible rendering effects. Therefore, we take
412
 * pains to ensure that they 1) never exceed 256, and 2) add up to exactly
413
 * 256 for all pixels that are completely covered. See bug #691629. */
414
static void
415
check_weights(fz_weights *weights, int j, int w, float x, float wf)
416
{
417
	int idx, len;
418
	int sum = 0;
419
	int max = -256;
420
	int maxidx = 0;
421
	int i;
422

423
	idx = weights->index[j - weights->patch_l];
424
	idx++; /* min */
425
	len = weights->index[idx++];
426

427
	for(i=0; i < len; i++)
428
	{
429
		int v = weights->index[idx++];
430
		sum += v;
431
		if (v > max)
432
		{
433
			max = v;
434
			maxidx = idx;
435
		}
436
	}
437
	/* If we aren't the first or last pixel, OR if the sum is too big
438
	 * then adjust it. */
439
	if (((j != 0) && (j != w-1)) || (sum > 256))
440
		weights->index[maxidx-1] += 256-sum;
441
	/* Otherwise, if we are the first pixel, and it's fully covered, then
442
	 * adjust it. */
443
	else if ((j == 0) && (x < 0.0001F) && (sum != 256))
444
		weights->index[maxidx-1] += 256-sum;
445
	/* Finally, if we are the last pixel, and it's fully covered, then
446
	 * adjust it. */
447
	else if ((j == w-1) && ((float)w-wf < 0.0001F) && (sum != 256))
448
		weights->index[maxidx-1] += 256-sum;
449
	DBUG(("total weight %d = %d\n", j, sum));
450
}
451

452
static fz_weights *
453
make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache)
454
{
455
	fz_weights *weights;
456
	float F, G;
457
	float window;
458
	int j;
459

460
	if (cache)
461
	{
462
		if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w &&
463
			cache->filter == filter && cache->vertical == vertical &&
464
			cache->dst_w_int == dst_w_int &&
465
			cache->patch_l == patch_l && cache->patch_r == patch_r &&
466
			cache->n == n && cache->flip == flip)
467
		{
468
			return cache->weights;
469
		}
470
		cache->src_w = src_w;
471
		cache->x = x;
472
		cache->dst_w = dst_w;
473
		cache->filter = filter;
474
		cache->vertical = vertical;
475
		cache->dst_w_int = dst_w_int;
476
		cache->patch_l = patch_l;
477
		cache->patch_r = patch_r;
478
		cache->n = n;
479
		cache->flip = flip;
480
		fz_free(ctx, cache->weights);
481
		cache->weights = NULL;
482
	}
483

484
	if (dst_w < src_w)
485
	{
486
		/* Scaling down */
487
		F = dst_w / src_w;
488
		G = 1;
489
	}
490
	else
491
	{
492
		/* Scaling up */
493
		F = 1;
494
		G = src_w / dst_w;
495
	}
496
	window = filter->width / F;
497
	DBUG(("make_weights src_w=%d x=%g dst_w=%g patch_l=%d patch_r=%d F=%g window=%g\n", src_w, x, dst_w, patch_l, patch_r, F, window));
498
	weights	= new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l);
499
	if (!weights)
500
		return NULL;
501
	for (j = patch_l; j < patch_r; j++)
502
	{
503
		/* find the position of the centre of dst[j] in src space */
504
		float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f;
505
		int l, r;
506
		l = ceilf(centre - window);
507
		r = floorf(centre + window);
508
		DBUG(("%d: centre=%g l=%d r=%d\n", j, centre, l, r));
509
		init_weights(weights, j);
510
		for (; l <= r; l++)
511
		{
512
			add_weight(weights, j, l, filter, x, F, G, src_w, dst_w);
513
		}
514
		check_weights(weights, j, dst_w_int, x, dst_w);
515
		if (vertical)
516
		{
517
			reorder_weights(weights, j, src_w);
518
		}
519
	}
520
	weights->count++; /* weights->count = dst_w_int now */
521
	if (cache)
522
	{
523
		cache->weights = weights;
524
	}
525
	return weights;
526
}
527

528
static void
529
scale_row_to_temp(unsigned char *dst, unsigned char *src, fz_weights *weights)
530
{
531
	int *contrib = &weights->index[weights->index[0]];
532
	int len, i, j, n;
533
	unsigned char *min;
534
	int tmp[FZ_MAX_COLORS];
535
	int *t = tmp;
536

537
	n = weights->n;
538
	for (j = 0; j < n; j++)
539
		tmp[j] = 128;
540
	if (weights->flip)
541
	{
542
		dst += (weights->count-1)*n;
543
		for (i=weights->count; i > 0; i--)
544
		{
545
			min = &src[n * *contrib++];
546
			len = *contrib++;
547
			while (len-- > 0)
548
			{
549
				for (j = n; j > 0; j--)
550
					*t++ += *min++ * *contrib;
551
				t -= n;
552
				contrib++;
553
			}
554
			for (j = n; j > 0; j--)
555
			{
556
				*dst++ = (unsigned char)(*t>>8);
557
				*t++ = 128;
558
			}
559
			t -= n;
560
			dst -= n*2;
561
		}
562
	}
563
	else
564
	{
565
		for (i=weights->count; i > 0; i--)
566
		{
567
			min = &src[n * *contrib++];
568
			len = *contrib++;
569
			while (len-- > 0)
570
			{
571
				for (j = n; j > 0; j--)
572
					*t++ += *min++ * *contrib;
573
				t -= n;
574
				contrib++;
575
			}
576
			for (j = n; j > 0; j--)
577
			{
578
				*dst++ = (unsigned char)(*t>>8);
579
				*t++ = 128;
580
			}
581
			t -= n;
582
		}
583
	}
584
}
585

586
#ifdef ARCH_ARM
587

588
static void
589
scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights)
590
__attribute__((naked));
591

592
static void
593
scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights)
594
__attribute__((naked));
595

596
static void
597
scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights)
598
__attribute__((naked));
599

600
static void
601
scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row)
602
__attribute__((naked));
603

604
static void
605
scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights)
606
{
607
	asm volatile(
608
	ENTER_ARM
609
	"stmfd	r13!,{r4-r7,r9,r14}				\n"
610
	"@ r0 = dst						\n"
611
	"@ r1 = src						\n"
612
	"@ r2 = weights						\n"
613
	"ldr	r12,[r2],#4		@ r12= flip		\n"
614
	"ldr	r3, [r2],#20		@ r3 = count r2 = &index\n"
615
	"ldr	r4, [r2]		@ r4 = index[0]		\n"
616
	"cmp	r12,#0			@ if (flip)		\n"
617
	"beq	5f			@ {			\n"
618
	"add	r2, r2, r4, LSL #2	@ r2 = &index[index[0]] \n"
619
	"add	r0, r0, r3		@ dst += count		\n"
620
	"1:							\n"
621
	"ldr	r4, [r2], #4		@ r4 = *contrib++	\n"
622
	"ldr	r9, [r2], #4		@ r9 = len = *contrib++	\n"
623
	"mov	r5, #128		@ r5 = a = 128		\n"
624
	"add	r4, r1, r4		@ r4 = min = &src[r4]	\n"
625
	"subs	r9, r9, #1		@ len--			\n"
626
	"blt	3f			@ while (len >= 0)	\n"
627
	"2:				@ {			\n"
628
	"ldrgt	r6, [r2], #4		@ r6 = *contrib++	\n"
629
	"ldrgtb	r7, [r4], #1		@ r7 = *min++		\n"
630
	"ldr	r12,[r2], #4		@ r12 = *contrib++	\n"
631
	"ldrb	r14,[r4], #1		@ r14 = *min++		\n"
632
	"mlagt	r5, r6, r7, r5		@ g += r6 * r7		\n"
633
	"subs	r9, r9, #2		@ r9 = len -= 2		\n"
634
	"mla	r5, r12,r14,r5		@ g += r14 * r12	\n"
635
	"bge	2b			@ }			\n"
636
	"3:							\n"
637
	"mov	r5, r5, lsr #8		@ g >>= 8		\n"
638
	"strb	r5,[r0, #-1]!		@ *--dst=a		\n"
639
	"subs	r3, r3, #1		@ i--			\n"
640
	"bgt	1b			@ 			\n"
641
	"ldmfd	r13!,{r4-r7,r9,PC}	@ pop, return to thumb	\n"
642
	"5:"
643
	"add	r2, r2, r4, LSL #2	@ r2 = &index[index[0]] \n"
644
	"6:"
645
	"ldr	r4, [r2], #4		@ r4 = *contrib++	\n"
646
	"ldr	r9, [r2], #4		@ r9 = len = *contrib++	\n"
647
	"mov	r5, #128		@ r5 = a = 128		\n"
648
	"add	r4, r1, r4		@ r4 = min = &src[r4]	\n"
649
	"subs	r9, r9, #1		@ len--			\n"
650
	"blt	9f			@ while (len > 0)	\n"
651
	"7:				@ {			\n"
652
	"ldrgt	r6, [r2], #4		@ r6 = *contrib++	\n"
653
	"ldrgtb	r7, [r4], #1		@ r7 = *min++		\n"
654
	"ldr	r12,[r2], #4		@ r12 = *contrib++	\n"
655
	"ldrb	r14,[r4], #1		@ r14 = *min++		\n"
656
	"mlagt	r5, r6,r7,r5		@ a += r6 * r7		\n"
657
	"subs	r9, r9, #2		@ r9 = len -= 2		\n"
658
	"mla	r5, r12,r14,r5		@ a += r14 * r12	\n"
659
	"bge	7b			@ }			\n"
660
	"9:							\n"
661
	"mov	r5, r5, LSR #8		@ a >>= 8		\n"
662
	"strb	r5, [r0], #1		@ *dst++=a		\n"
663
	"subs	r3, r3, #1		@ i--			\n"
664
	"bgt	6b			@ 			\n"
665
	"ldmfd	r13!,{r4-r7,r9,PC}	@ pop, return to thumb	\n"
666
	ENTER_THUMB
667
	);
668
}
669

670
static void
671
scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights)
672
{
673
	asm volatile(
674
	ENTER_ARM
675
	"stmfd	r13!,{r4-r6,r9-r11,r14}				\n"
676
	"@ r0 = dst						\n"
677
	"@ r1 = src						\n"
678
	"@ r2 = weights						\n"
679
	"ldr	r12,[r2],#4		@ r12= flip		\n"
680
	"ldr	r3, [r2],#20		@ r3 = count r2 = &index\n"
681
	"ldr	r4, [r2]		@ r4 = index[0]		\n"
682
	"cmp	r12,#0			@ if (flip)		\n"
683
	"beq	4f			@ {			\n"
684
	"add	r2, r2, r4, LSL #2	@ r2 = &index[index[0]] \n"
685
	"add	r0, r0, r3, LSL #1	@ dst += 2*count	\n"
686
	"1:							\n"
687
	"ldr	r4, [r2], #4		@ r4 = *contrib++	\n"
688
	"ldr	r9, [r2], #4		@ r9 = len = *contrib++	\n"
689
	"mov	r5, #128		@ r5 = g = 128		\n"
690
	"mov	r6, #128		@ r6 = a = 128		\n"
691
	"add	r4, r1, r4, LSL #1	@ r4 = min = &src[2*r4]	\n"
692
	"cmp	r9, #0			@ while (len-- > 0)	\n"
693
	"beq	3f			@ {			\n"
694
	"2:							\n"
695
	"ldr	r14,[r2], #4		@ r14 = *contrib++	\n"
696
	"ldrb	r11,[r4], #1		@ r11 = *min++		\n"
697
	"ldrb	r12,[r4], #1		@ r12 = *min++		\n"
698
	"subs	r9, r9, #1		@ r9 = len--		\n"
699
	"mla	r5, r14,r11,r5		@ g += r11 * r14	\n"
700
	"mla	r6, r14,r12,r6		@ a += r12 * r14	\n"
701
	"bgt	2b			@ }			\n"
702
	"3:							\n"
703
	"mov	r5, r5, lsr #8		@ g >>= 8		\n"
704
	"mov	r6, r6, lsr #8		@ a >>= 8		\n"
705
	"strb	r5, [r0, #-2]!		@ *--dst=a		\n"
706
	"strb	r6, [r0, #1]		@ *--dst=g		\n"
707
	"subs	r3, r3, #1		@ i--			\n"
708
	"bgt	1b			@ 			\n"
709
	"ldmfd	r13!,{r4-r6,r9-r11,PC}	@ pop, return to thumb	\n"
710
	"4:"
711
	"add	r2, r2, r4, LSL #2	@ r2 = &index[index[0]] \n"
712
	"5:"
713
	"ldr	r4, [r2], #4		@ r4 = *contrib++	\n"
714
	"ldr	r9, [r2], #4		@ r9 = len = *contrib++	\n"
715
	"mov	r5, #128		@ r5 = g = 128		\n"
716
	"mov	r6, #128		@ r6 = a = 128		\n"
717
	"add	r4, r1, r4, LSL #1	@ r4 = min = &src[2*r4]	\n"
718
	"cmp	r9, #0			@ while (len-- > 0)	\n"
719
	"beq	7f			@ {			\n"
720
	"6:							\n"
721
	"ldr	r14,[r2], #4		@ r10 = *contrib++	\n"
722
	"ldrb	r11,[r4], #1		@ r11 = *min++		\n"
723
	"ldrb	r12,[r4], #1		@ r12 = *min++		\n"
724
	"subs	r9, r9, #1		@ r9 = len--		\n"
725
	"mla	r5, r14,r11,r5		@ g += r11 * r14	\n"
726
	"mla	r6, r14,r12,r6		@ a += r12 * r14	\n"
727
	"bgt	6b			@ }			\n"
728
	"7:							\n"
729
	"mov	r5, r5, lsr #8		@ g >>= 8		\n"
730
	"mov	r6, r6, lsr #8		@ a >>= 8		\n"
731
	"strb	r5, [r0], #1		@ *dst++=g		\n"
732
	"strb	r6, [r0], #1		@ *dst++=a		\n"
733
	"subs	r3, r3, #1		@ i--			\n"
734
	"bgt	5b			@ 			\n"
735
	"ldmfd	r13!,{r4-r6,r9-r11,PC}	@ pop, return to thumb	\n"
736
	ENTER_THUMB
737
	);
738
}
739

740
static void
741
scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights)
742
{
743
	asm volatile(
744
	ENTER_ARM
745
	"stmfd	r13!,{r4-r11,r14}				\n"
746
	"@ r0 = dst						\n"
747
	"@ r1 = src						\n"
748
	"@ r2 = weights						\n"
749
	"ldr	r12,[r2],#4		@ r12= flip		\n"
750
	"ldr	r3, [r2],#20		@ r3 = count r2 = &index\n"
751
	"ldr	r4, [r2]		@ r4 = index[0]		\n"
752
	"ldr	r5,=0x00800080		@ r5 = rounding		\n"
753
	"ldr	r6,=0x00FF00FF		@ r7 = 0x00FF00FF	\n"
754
	"cmp	r12,#0			@ if (flip)		\n"
755
	"beq	4f			@ {			\n"
756
	"add	r2, r2, r4, LSL #2	@ r2 = &index[index[0]] \n"
757
	"add	r0, r0, r3, LSL #2	@ dst += 4*count	\n"
758
	"1:							\n"
759
	"ldr	r4, [r2], #4		@ r4 = *contrib++	\n"
760
	"ldr	r9, [r2], #4		@ r9 = len = *contrib++	\n"
761
	"mov	r7, r5			@ r7 = b = rounding	\n"
762
	"mov	r8, r5			@ r8 = a = rounding	\n"
763
	"add	r4, r1, r4, LSL #2	@ r4 = min = &src[4*r4]	\n"
764
	"cmp	r9, #0			@ while (len-- > 0)	\n"
765
	"beq	3f			@ {			\n"
766
	"2:							\n"
767
	"ldr	r11,[r4], #4		@ r11 = *min++		\n"
768
	"ldr	r10,[r2], #4		@ r10 = *contrib++	\n"
769
	"subs	r9, r9, #1		@ r9 = len--		\n"
770
	"and	r12,r6, r11		@ r12 = __22__00	\n"
771
	"and	r11,r6, r11,LSR #8	@ r11 = __33__11	\n"
772
	"mla	r7, r10,r12,r7		@ b += r14 * r10	\n"
773
	"mla	r8, r10,r11,r8		@ a += r11 * r10	\n"
774
	"bgt	2b			@ }			\n"
775
	"3:							\n"
776
	"and	r7, r6, r7, lsr #8	@ r7 = __22__00		\n"
777
	"bic	r8, r8, r6		@ r8 = 33__11__		\n"
778
	"orr	r7, r7, r8		@ r7 = 33221100		\n"
779
	"str	r7, [r0, #-4]!		@ *--dst=r		\n"
780
	"subs	r3, r3, #1		@ i--			\n"
781
	"bgt	1b			@ 			\n"
782
	"ldmfd	r13!,{r4-r11,PC}	@ pop, return to thumb	\n"
783
	"4:							\n"
784
	"add	r2, r2, r4, LSL #2	@ r2 = &index[index[0]] \n"
785
	"5:							\n"
786
	"ldr	r4, [r2], #4		@ r4 = *contrib++	\n"
787
	"ldr	r9, [r2], #4		@ r9 = len = *contrib++	\n"
788
	"mov	r7, r5			@ r7 = b = rounding	\n"
789
	"mov	r8, r5			@ r8 = a = rounding	\n"
790
	"add	r4, r1, r4, LSL #2	@ r4 = min = &src[4*r4]	\n"
791
	"cmp	r9, #0			@ while (len-- > 0)	\n"
792
	"beq	7f			@ {			\n"
793
	"6:							\n"
794
	"ldr	r11,[r4], #4		@ r11 = *min++		\n"
795
	"ldr	r10,[r2], #4		@ r10 = *contrib++	\n"
796
	"subs	r9, r9, #1		@ r9 = len--		\n"
797
	"and	r12,r6, r11		@ r12 = __22__00	\n"
798
	"and	r11,r6, r11,LSR #8	@ r11 = __33__11	\n"
799
	"mla	r7, r10,r12,r7		@ b += r14 * r10	\n"
800
	"mla	r8, r10,r11,r8		@ a += r11 * r10	\n"
801
	"bgt	6b			@ }			\n"
802
	"7:							\n"
803
	"and	r7, r6, r7, lsr #8	@ r7 = __22__00		\n"
804
	"bic	r8, r8, r6		@ r8 = 33__11__		\n"
805
	"orr	r7, r7, r8		@ r7 = 33221100		\n"
806
	"str	r7, [r0], #4		@ *dst++=r		\n"
807
	"subs	r3, r3, #1		@ i--			\n"
808
	"bgt	5b			@ 			\n"
809
	"ldmfd	r13!,{r4-r11,PC}	@ pop, return to thumb	\n"
810
	ENTER_THUMB
811
	);
812
}
813

814
static void
815
scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row)
816
{
817
	asm volatile(
818
	ENTER_ARM
819
	"ldr	r12,[r13]		@ r12= row		\n"
820
	"add	r2, r2, #24		@ r2 = weights->index	\n"
821
	"stmfd	r13!,{r4-r11,r14}				\n"
822
	"@ r0 = dst						\n"
823
	"@ r1 = src						\n"
824
	"@ r2 = &weights->index[0]				\n"
825
	"@ r3 = width						\n"
826
	"@ r12= row						\n"
827
	"ldr	r4, [r2, r12, LSL #2]	@ r4 = index[row]	\n"
828
	"add	r2, r2, #4		@ r2 = &index[1]	\n"
829
	"subs	r6, r3, #4		@ r6 = x = width-4	\n"
830
	"ldr	r14,[r2, r4, LSL #2]!	@ r2 = contrib = index[index[row]+1]\n"
831
	"				@ r14= len = *contrib	\n"
832
	"blt	4f			@ while (x >= 0) {	\n"
833
#ifndef ARCH_ARM_CAN_LOAD_UNALIGNED
834
	"tst	r3, #3			@ if ((r3 & 3)		\n"
835
	"tsteq	r1, #3			@	|| (r1 & 3))	\n"
836
	"bne	4f			@ can't do fast code	\n"
837
#endif
838
	"ldr	r9, =0x00FF00FF		@ r9 = 0x00FF00FF	\n"
839
	"1:							\n"
840
	"ldr	r7, =0x00800080		@ r5 = val0 = round	\n"
841
	"stmfd	r13!,{r1,r2,r7}		@ stash r1,r2,r5	\n"
842
	"				@ r1 = min = src	\n"
843
	"				@ r2 = contrib2-4	\n"
844
	"movs	r8, r14			@ r8 = len2 = len	\n"
845
	"mov	r5, r7			@ r7 = val1 = round	\n"
846
	"ble	3f			@ while (len2-- > 0) {	\n"
847
	"2:							\n"
848
	"ldr	r12,[r1], r3		@ r12 = *min	r5 = min += width\n"
849
	"ldr	r10,[r2, #4]!		@ r10 = *contrib2++	\n"
850
	"subs	r8, r8, #1		@ len2--		\n"
851
	"and	r11,r9, r12		@ r11= __22__00		\n"
852
	"and	r12,r9, r12,LSR #8	@ r12= __33__11		\n"
853
	"mla	r5, r10,r11,r5		@ r5 = val0 += r11 * r10\n"
854
	"mla	r7, r10,r12,r7		@ r7 = val1 += r12 * r10\n"
855
	"bgt	2b			@ }			\n"
856
	"and	r5, r9, r5, LSR #8	@ r5 = __22__00		\n"
857
	"and	r7, r7, r9, LSL #8	@ r7 = 33__11__		\n"
858
	"orr	r5, r5, r7		@ r5 = 33221100		\n"
859
	"3:							\n"
860
	"ldmfd	r13!,{r1,r2,r7}		@ restore r1,r2,r7	\n"
861
	"subs	r6, r6, #4		@ x--			\n"
862
	"add	r1, r1, #4		@ src++			\n"
863
	"str	r5, [r0], #4		@ *dst++ = val		\n"
864
	"bge	1b			@ 			\n"
865
	"4:				@ } (Less than 4 to go)	\n"
866
	"adds	r6, r6, #4		@ r6 = x += 4		\n"
867
	"beq	8f			@ if (x == 0) done	\n"
868
	"5:							\n"
869
	"mov	r5, r1			@ r5 = min = src	\n"
870
	"mov	r7, #128		@ r7 = val = 128	\n"
871
	"movs	r8, r14			@ r8 = len2 = len	\n"
872
	"add	r9, r2, #4		@ r9 = contrib2		\n"
873
	"ble	7f			@ while (len2-- > 0) {	\n"
874
	"6:							\n"
875
	"ldr	r10,[r9], #4		@ r10 = *contrib2++	\n"
876
	"ldrb	r12,[r5], r3		@ r12 = *min	r5 = min += width\n"
877
	"subs	r8, r8, #1		@ len2--		\n"
878
	"@ stall r12						\n"
879
	"mla	r7, r10,r12,r7		@ val += r12 * r10	\n"
880
	"bgt	6b			@ }			\n"
881
	"7:							\n"
882
	"mov	r7, r7, asr #8		@ r7 = val >>= 8	\n"
883
	"subs	r6, r6, #1		@ x--			\n"
884
	"add	r1, r1, #1		@ src++			\n"
885
	"strb	r7, [r0], #1		@ *dst++ = val		\n"
886
	"bgt	5b			@ 			\n"
887
	"8:							\n"
888
	"ldmfd	r13!,{r4-r11,PC}	@ pop, return to thumb	\n"
889
	".ltorg							\n"
890
	ENTER_THUMB
891
	);
892
}
893
#else
894

895
static void
896
scale_row_to_temp1(unsigned char *dst, unsigned char *src, fz_weights *weights)
897
{
898
	int *contrib = &weights->index[weights->index[0]];
899
	int len, i;
900
	unsigned char *min;
901

902
	assert(weights->n == 1);
903
	if (weights->flip)
904
	{
905
		dst += weights->count;
906
		for (i=weights->count; i > 0; i--)
907
		{
908
			int val = 128;
909
			min = &src[*contrib++];
910
			len = *contrib++;
911
			while (len-- > 0)
912
			{
913
				val += *min++ * *contrib++;
914
			}
915
			*--dst = (unsigned char)(val>>8);
916
		}
917
	}
918
	else
919
	{
920
		for (i=weights->count; i > 0; i--)
921
		{
922
			int val = 128;
923
			min = &src[*contrib++];
924
			len = *contrib++;
925
			while (len-- > 0)
926
			{
927
				val += *min++ * *contrib++;
928
			}
929
			*dst++ = (unsigned char)(val>>8);
930
		}
931
	}
932
}
933

934
static void
935
scale_row_to_temp2(unsigned char *dst, unsigned char *src, fz_weights *weights)
936
{
937
	int *contrib = &weights->index[weights->index[0]];
938
	int len, i;
939
	unsigned char *min;
940

941
	assert(weights->n == 2);
942
	if (weights->flip)
943
	{
944
		dst += 2*weights->count;
945
		for (i=weights->count; i > 0; i--)
946
		{
947
			int c1 = 128;
948
			int c2 = 128;
949
			min = &src[2 * *contrib++];
950
			len = *contrib++;
951
			while (len-- > 0)
952
			{
953
				c1 += *min++ * *contrib;
954
				c2 += *min++ * *contrib++;
955
			}
956
			*--dst = (unsigned char)(c2>>8);
957
			*--dst = (unsigned char)(c1>>8);
958
		}
959
	}
960
	else
961
	{
962
		for (i=weights->count; i > 0; i--)
963
		{
964
			int c1 = 128;
965
			int c2 = 128;
966
			min = &src[2 * *contrib++];
967
			len = *contrib++;
968
			while (len-- > 0)
969
			{
970
				c1 += *min++ * *contrib;
971
				c2 += *min++ * *contrib++;
972
			}
973
			*dst++ = (unsigned char)(c1>>8);
974
			*dst++ = (unsigned char)(c2>>8);
975
		}
976
	}
977
}
978

979
static void
980
scale_row_to_temp4(unsigned char *dst, unsigned char *src, fz_weights *weights)
981
{
982
	int *contrib = &weights->index[weights->index[0]];
983
	int len, i;
984
	unsigned char *min;
985

986
	assert(weights->n == 4);
987
	if (weights->flip)
988
	{
989
		dst += 4*weights->count;
990
		for (i=weights->count; i > 0; i--)
991
		{
992
			int r = 128;
993
			int g = 128;
994
			int b = 128;
995
			int a = 128;
996
			min = &src[4 * *contrib++];
997
			len = *contrib++;
998
			while (len-- > 0)
999
			{
1000
				r += *min++ * *contrib;
1001
				g += *min++ * *contrib;
1002
				b += *min++ * *contrib;
1003
				a += *min++ * *contrib++;
1004
			}
1005
			*--dst = (unsigned char)(a>>8);
1006
			*--dst = (unsigned char)(b>>8);
1007
			*--dst = (unsigned char)(g>>8);
1008
			*--dst = (unsigned char)(r>>8);
1009
		}
1010
	}
1011
	else
1012
	{
1013
		for (i=weights->count; i > 0; i--)
1014
		{
1015
			int r = 128;
1016
			int g = 128;
1017
			int b = 128;
1018
			int a = 128;
1019
			min = &src[4 * *contrib++];
1020
			len = *contrib++;
1021
			while (len-- > 0)
1022
			{
1023
				r += *min++ * *contrib;
1024
				g += *min++ * *contrib;
1025
				b += *min++ * *contrib;
1026
				a += *min++ * *contrib++;
1027
			}
1028
			*dst++ = (unsigned char)(r>>8);
1029
			*dst++ = (unsigned char)(g>>8);
1030
			*dst++ = (unsigned char)(b>>8);
1031
			*dst++ = (unsigned char)(a>>8);
1032
		}
1033
	}
1034
}
1035

1036
static void
1037
scale_row_from_temp(unsigned char *dst, unsigned char *src, fz_weights *weights, int width, int row)
1038
{
1039
	int *contrib = &weights->index[weights->index[row]];
1040
	int len, x;
1041

1042
	contrib++; /* Skip min */
1043
	len = *contrib++;
1044
	for (x=width; x > 0; x--)
1045
	{
1046
		unsigned char *min = src;
1047
		int val = 128;
1048
		int len2 = len;
1049
		int *contrib2 = contrib;
1050

1051
		while (len2-- > 0)
1052
		{
1053
			val += *min * *contrib2++;
1054
			min += width;
1055
		}
1056
		*dst++ = (unsigned char)(val>>8);
1057
		src++;
1058
	}
1059
}
1060
#endif
1061

1062
#ifdef SINGLE_PIXEL_SPECIALS
1063
static void
1064
duplicate_single_pixel(unsigned char *dst, unsigned char *src, int n, int w, int h)
1065
{
1066
	int i;
1067

1068
	for (i = n; i > 0; i--)
1069
		*dst++ = *src++;
1070
	for (i = (w*h-1)*n; i > 0; i--)
1071
	{
1072
		*dst = dst[-n];
1073
		dst++;
1074
	}
1075
}
1076

1077
static void
1078
scale_single_row(unsigned char *dst, unsigned char *src, fz_weights *weights, int src_w, int h)
1079
{
1080
	int *contrib = &weights->index[weights->index[0]];
1081
	int min, len, i, j, n;
1082
	int tmp[FZ_MAX_COLORS];
1083

1084
	n = weights->n;
1085
	/* Scale a single row */
1086
	for (j = 0; j < n; j++)
1087
		tmp[j] = 128;
1088
	if (weights->flip)
1089
	{
1090
		dst += (weights->count-1)*n;
1091
		for (i=weights->count; i > 0; i--)
1092
		{
1093
			min = *contrib++;
1094
			len = *contrib++;
1095
			min *= n;
1096
			while (len-- > 0)
1097
			{
1098
				for (j = 0; j < n; j++)
1099
					tmp[j] += src[min++] * *contrib;
1100
				contrib++;
1101
			}
1102
			for (j = 0; j < n; j++)
1103
			{
1104
				*dst++ = (unsigned char)(tmp[j]>>8);
1105
				tmp[j] = 128;
1106
			}
1107
			dst -= 2*n;
1108
		}
1109
		dst += n * (weights->count+1);
1110
	}
1111
	else
1112
	{
1113
		for (i=weights->count; i > 0; i--)
1114
		{
1115
			min = *contrib++;
1116
			len = *contrib++;
1117
			min *= n;
1118
			while (len-- > 0)
1119
			{
1120
				for (j = 0; j < n; j++)
1121
					tmp[j] += src[min++] * *contrib;
1122
				contrib++;
1123
			}
1124
			for (j = 0; j < n; j++)
1125
			{
1126
				*dst++ = (unsigned char)(tmp[j]>>8);
1127
				tmp[j] = 128;
1128
			}
1129
		}
1130
	}
1131
	/* And then duplicate it h times */
1132
	n *= weights->count;
1133
	while (--h > 0)
1134
	{
1135
		memcpy(dst, dst-n, n);
1136
		dst += n;
1137
	}
1138
}
1139

1140
static void
1141
scale_single_col(unsigned char *dst, unsigned char *src, fz_weights *weights, int src_w, int n, int w, int flip_y)
1142
{
1143
	int *contrib = &weights->index[weights->index[0]];
1144
	int min, len, i, j;
1145
	int tmp[FZ_MAX_COLORS];
1146

1147
	for (j = 0; j < n; j++)
1148
		tmp[j] = 128;
1149
	if (flip_y)
1150
	{
1151
		src_w = (src_w-1)*n;
1152
		w = (w-1)*n;
1153
		for (i=weights->count; i > 0; i--)
1154
		{
1155
			/* Scale the next pixel in the column */
1156
			min = *contrib++;
1157
			len = *contrib++;
1158
			min = src_w-min*n;
1159
			while (len-- > 0)
1160
			{
1161
				for (j = 0; j < n; j++)
1162
					tmp[j] += src[src_w-min+j] * *contrib;
1163
				contrib++;
1164
			}
1165
			for (j = 0; j < n; j++)
1166
			{
1167
				*dst++ = (unsigned char)(tmp[j]>>8);
1168
				tmp[j] = 128;
1169
			}
1170
			/* And then duplicate it across the row */
1171
			for (j = w; j > 0; j--)
1172
			{
1173
				*dst = dst[-n];
1174
				dst++;
1175
			}
1176
		}
1177
	}
1178
	else
1179
	{
1180
		w = (w-1)*n;
1181
		for (i=weights->count; i > 0; i--)
1182
		{
1183
			/* Scale the next pixel in the column */
1184
			min = *contrib++;
1185
			len = *contrib++;
1186
			min *= n;
1187
			while (len-- > 0)
1188
			{
1189
				for (j = 0; j < n; j++)
1190
					tmp[j] += src[min++] * *contrib;
1191
				contrib++;
1192
			}
1193
			for (j = 0; j < n; j++)
1194
			{
1195
				*dst++ = (unsigned char)(tmp[j]>>8);
1196
				tmp[j] = 128;
1197
			}
1198
			/* And then duplicate it across the row */
1199
			for (j = w; j > 0; j--)
1200
			{
1201
				*dst = dst[-n];
1202
				dst++;
1203
			}
1204
		}
1205
	}
1206
}
1207
#endif /* SINGLE_PIXEL_SPECIALS */
1208

1209
fz_pixmap *
1210
fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, fz_irect *clip)
1211
{
1212
	return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL);
1213
}
1214

1215
fz_pixmap *
1216
fz_scale_pixmap_cached(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y)
1217
{
1218
	fz_scale_filter *filter = &fz_scale_filter_simple;
1219
	fz_weights *contrib_rows = NULL;
1220
	fz_weights *contrib_cols = NULL;
1221
	fz_pixmap *output = NULL;
1222
	unsigned char *temp = NULL;
1223
	int max_row, temp_span, temp_rows, row;
1224
	int dst_w_int, dst_h_int, dst_x_int, dst_y_int;
1225
	int flip_x, flip_y;
1226
	fz_rect patch;
1227

1228
	fz_var(contrib_cols);
1229
	fz_var(contrib_rows);
1230

1231
	DBUG(("Scale: (%d,%d) to (%g,%g) at (%g,%g)\n",src->w,src->h,w,h,x,y));
1232

1233
	/* Avoid extreme scales where overflows become problematic. */
1234
	if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24))
1235
		return NULL;
1236
	if (x > (1<<24) || y > (1<<24) || x < -(1<<24) || y < -(1<<24))
1237
		return NULL;
1238

1239
	/* Clamp small ranges of w and h */
1240
	if (w <= -1)
1241
	{
1242
	}
1243
	else if (w < 0)
1244
	{
1245
		w = -1;
1246
	}
1247
	else if (w < 1)
1248
	{
1249
		w = 1;
1250
	}
1251
	if (h <= -1)
1252
	{
1253
	}
1254
	else if (h < 0)
1255
	{
1256
		h = -1;
1257
	}
1258
	else if (h < 1)
1259
	{
1260
		h = 1;
1261
	}
1262

1263
	/* Find the destination bbox, width/height, and sub pixel offset,
1264
	 * allowing for whether we're flipping or not. */
1265
	/* The (x,y) position given describes where the top left corner
1266
	 * of the source image should be mapped to (i.e. where (0,0) in image
1267
	 * space ends up). Also there are differences in the way we scale
1268
	 * horizontally and vertically. When scaling rows horizontally, we
1269
	 * always read forwards through the source, and store either forwards
1270
	 * or in reverse as required. When scaling vertically, we always store
1271
	 * out forwards, but may feed source rows in in a different order.
1272
	 *
1273
	 * Consider the image rectangle 'r' to which the image is mapped,
1274
	 * and the (possibly) larger rectangle 'R', given by expanding 'r' to
1275
	 * complete pixels.
1276
	 *
1277
	 * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether
1278
	 * the image is x flipped or not. Whatever happens 0 <= x < 1.
1279
	 * y is always R.ymax - r.ymax.
1280
	 */
1281
	/* dst_x_int is calculated to be the left of the scaled image, and
1282
	 * x (the sub pixel offset) is the distance in from either the left
1283
	 * or right pixel expanded edge. */
1284
	flip_x = (w < 0);
1285
	if (flip_x)
1286
	{
1287
		float tmp;
1288
		w = -w;
1289
		dst_x_int = floorf(x-w);
1290
		tmp = ceilf(x);
1291
		dst_w_int = (int)tmp;
1292
		x = tmp - x;
1293
		dst_w_int -= dst_x_int;
1294
	}
1295
	else
1296
	{
1297
		dst_x_int = floorf(x);
1298
		x -= (float)dst_x_int;
1299
		dst_w_int = (int)ceilf(x + w);
1300
	}
1301
	/* dst_y_int is calculated to be the top of the scaled image, and
1302
	 * y (the sub pixel offset) is the distance in from either the top
1303
	 * or bottom pixel expanded edge.
1304
	 */
1305
	flip_y = (h < 0);
1306
	if (flip_y)
1307
	{
1308
		float tmp;
1309
		h = -h;
1310
		dst_y_int = floorf(y-h);
1311
		tmp = ceilf(y);
1312
		dst_h_int = (int)tmp;
1313
		y = tmp - y;
1314
		dst_h_int -= dst_y_int;
1315
	}
1316
	else
1317
	{
1318
		dst_y_int = floorf(y);
1319
		y -= (float)dst_y_int;
1320
		dst_h_int = (int)ceilf(y + h);
1321
	}
1322

1323
	DBUG(("Result image: (%d,%d) at (%d,%d) (subpix=%g,%g)\n", dst_w_int, dst_h_int, dst_x_int, dst_y_int, x, y));
1324

1325
	/* Step 0: Calculate the patch */
1326
	patch.x0 = 0;
1327
	patch.y0 = 0;
1328
	patch.x1 = dst_w_int;
1329
	patch.y1 = dst_h_int;
1330
	if (clip)
1331
	{
1332
		DBUG(("Clip: (%d,%d) -> (%d,%d)\n", clip->x0, clip->y0, clip->x1, clip->y1));
1333
		if (flip_x)
1334
		{
1335
			if (dst_x_int + dst_w_int > clip->x1)
1336
				patch.x0 = dst_x_int + dst_w_int - clip->x1;
1337
			if (clip->x0 > dst_x_int)
1338
			{
1339
				patch.x1 = dst_w_int - (clip->x0 - dst_x_int);
1340
				dst_x_int = clip->x0;
1341
			}
1342
		}
1343
		else
1344
		{
1345
			if (dst_x_int + dst_w_int > clip->x1)
1346
				patch.x1 = clip->x1 - dst_x_int;
1347
			if (clip->x0 > dst_x_int)
1348
			{
1349
				patch.x0 = clip->x0 - dst_x_int;
1350
				dst_x_int += patch.x0;
1351
			}
1352
		}
1353

1354
		if (flip_y)
1355
		{
1356
			if (dst_y_int + dst_h_int > clip->y1)
1357
				patch.y1 = clip->y1 - dst_y_int;
1358
			if (clip->y0 > dst_y_int)
1359
			{
1360
				patch.y0 = clip->y0 - dst_y_int;
1361
				dst_y_int = clip->y0;
1362
			}
1363
		}
1364
		else
1365
		{
1366
			if (dst_y_int + dst_h_int > clip->y1)
1367
				patch.y1 = clip->y1 - dst_y_int;
1368
			if (clip->y0 > dst_y_int)
1369
			{
1370
				patch.y0 = clip->y0 - dst_y_int;
1371
				dst_y_int += patch.y0;
1372
			}
1373
		}
1374
	}
1375
	DBUG(("Patch: (%g,%g) -> (%g,%g)\n", patch.x0, patch.y0, patch.x1, patch.y1));
1376
	if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1)
1377
		return NULL;
1378

1379
	fz_try(ctx)
1380
	{
1381
		/* Step 1: Calculate the weights for columns and rows */
1382
#ifdef SINGLE_PIXEL_SPECIALS
1383
		if (src->w == 1)
1384
			contrib_cols = NULL;
1385
		else
1386
#endif /* SINGLE_PIXEL_SPECIALS */
1387
			contrib_cols = make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x);
1388
#ifdef SINGLE_PIXEL_SPECIALS
1389
		if (src->h == 1)
1390
			contrib_rows = NULL;
1391
		else
1392
#endif /* SINGLE_PIXEL_SPECIALS */
1393
			contrib_rows = make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y);
1394

1395
		output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0);
1396
	}
1397
	fz_catch(ctx)
1398
	{
1399
		if (!cache_x)
1400
			fz_free(ctx, contrib_cols);
1401
		if (!cache_y)
1402
			fz_free(ctx, contrib_rows);
1403
		fz_rethrow(ctx);
1404
	}
1405
	output->x = dst_x_int;
1406
	output->y = dst_y_int;
1407

1408
	/* Step 2: Apply the weights */
1409
#ifdef SINGLE_PIXEL_SPECIALS
1410
	if (!contrib_rows)
1411
	{
1412
		/* Only 1 source pixel high. */
1413
		if (!contrib_cols)
1414
		{
1415
			/* Only 1 pixel in the entire image! */
1416
			duplicate_single_pixel(output->samples, src->samples, src->n, patch.x1-patch.x0, patch.y1-patch.y0);
1417
		}
1418
		else
1419
		{
1420
			/* Scale the row once, then copy it. */
1421
			scale_single_row(output->samples, src->samples, contrib_cols, src->w, patch.y1-patch.y0);
1422
		}
1423
	}
1424
	else if (!contrib_cols)
1425
	{
1426
		/* Only 1 source pixel wide. Scale the col and duplicate. */
1427
		scale_single_col(output->samples, src->samples, contrib_rows, src->h, src->n, patch.x1-patch.x0, flip_y);
1428
	}
1429
	else
1430
#endif /* SINGLE_PIXEL_SPECIALS */
1431
	{
1432
		void (*row_scale)(unsigned char *dst, unsigned char *src, fz_weights *weights);
1433

1434
		temp_span = contrib_cols->count * src->n;
1435
		temp_rows = contrib_rows->max_len;
1436
		if (temp_span <= 0 || temp_rows > INT_MAX / temp_span)
1437
			goto cleanup;
1438
		fz_try(ctx)
1439
		{
1440
			temp = fz_calloc(ctx, temp_span*temp_rows, sizeof(unsigned char));
1441
		}
1442
		fz_catch(ctx)
1443
		{
1444
			fz_drop_pixmap(ctx, output);
1445
			if (!cache_x)
1446
				fz_free(ctx, contrib_cols);
1447
			if (!cache_y)
1448
				fz_free(ctx, contrib_rows);
1449
			fz_rethrow(ctx);
1450
		}
1451
		switch (src->n)
1452
		{
1453
		default:
1454
			row_scale = scale_row_to_temp;
1455
			break;
1456
		case 1: /* Image mask case */
1457
			row_scale = scale_row_to_temp1;
1458
			break;
1459
		case 2: /* Greyscale with alpha case */
1460
			row_scale = scale_row_to_temp2;
1461
			break;
1462
		case 4: /* RGBA */
1463
			row_scale = scale_row_to_temp4;
1464
			break;
1465
		}
1466
		max_row = contrib_rows->index[contrib_rows->index[0]];
1467
		for (row = 0; row < contrib_rows->count; row++)
1468
		{
1469
			/*
1470
			Which source rows do we need to have scaled into the
1471
			temporary buffer in order to be able to do the final
1472
			scale?
1473
			*/
1474
			int row_index = contrib_rows->index[row];
1475
			int row_min = contrib_rows->index[row_index++];
1476
			int row_len = contrib_rows->index[row_index];
1477
			while (max_row < row_min+row_len)
1478
			{
1479
				/* Scale another row */
1480
				assert(max_row < src->h);
1481
				DBUG(("scaling row %d to temp\n", max_row));
1482
				(*row_scale)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->w*src->n], contrib_cols);
1483
				max_row++;
1484
			}
1485

1486
			DBUG(("scaling row %d from temp\n", row));
1487
			scale_row_from_temp(&output->samples[row*output->w*output->n], temp, contrib_rows, temp_span, row);
1488
		}
1489
		fz_free(ctx, temp);
1490
	}
1491

1492
cleanup:
1493
	if (!cache_y)
1494
		fz_free(ctx, contrib_rows);
1495
	if (!cache_x)
1496
		fz_free(ctx, contrib_cols);
1497
	return output;
1498
}
1499

1500
void
1501
fz_drop_scale_cache(fz_context *ctx, fz_scale_cache *sc)
1502
{
1503
	if (!sc)
1504
		return;
1505
	fz_free(ctx, sc->weights);
1506
	fz_free(ctx, sc);
1507
}
1508

1509
fz_scale_cache *
1510
fz_new_scale_cache(fz_context *ctx)
1511
{
1512
	return fz_malloc_struct(ctx, fz_scale_cache);
1513
}
1514

1515
Product

Resources

Company