CoCalc -- asm.h

Harvard Workshop: Distribution of modular symbols and L-values
code / alex / psage / psage / libs / smalljac / asm.h
²⁴¹⁸¹⁸ views
1
#ifndef _ASM_INCLUDE_
2
#define _ASM_INCLUDE_
3

4
/*
5
    Copyright 2007 Andrew V. Sutherland
6

7
    This file is part of smalljac.
8

9
    smalljac is free software: you can redistribute it and/or modify
10
    it under the terms of the GNU General Public License as published by
11
    the Free Software Foundation, either version 2 of the License, or
12
    (at your option) any later version.
13

14
    smalljac is distributed in the hope that it will be useful,
15
    but WITHOUT ANY WARRANTY; without even the implied warranty of
16
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
    GNU General Public License for more details.
18

19
    You should have received a copy of the GNU General Public License
20
    along with smalljac.  If not, see <http://www.gnu.org/licenses/>.
21
*/
22

23

24
// This code could all be optimized - it is just a first stab
25
static inline unsigned long _asm_highbit (unsigned long x) { asm ("bsrq %0, %0" : "=r" (x) : "0" (x)); return x; }
26

27
#define _asm_div_q_q(q,r,x,y)				asm ("divq %4" :"=a"(q) ,"=d"(r) : "0"(x), "1"(r), "rm"(y))
28
#define _asm_mult_1_1(z1,z0,x0,y0)		asm ("mulq %3" :"=a"(z0) ,"=d"(z1) : "a"(x0), "rm"(y0))
29
#define _asm_mult_2_2_1(z1,z0,x1,x0,y0)	asm ("mulq %3" :"=a"(z0) ,"=d"(z1) : "a"(x0), "rm"(y0));(z1)+=(y0)*(x1)
30
#define _asm_addto_2_2(z1,z0,x1,x0)		asm ("addq %3,%0;adcq %5,%1":"=r"(z0),"=r"(z1): "0"(z0), "rim"(x0),  "1"(z1), "rim"(x1):"cc")
31
#define _asm_addto_2_1(z1,z0,x0)			asm ("addq %3,%0;adcq $0,%1":"=r"(z0),"=r"(z1): "0"(z0), "rim"(x0),  "1"(z1):"cc")
32
#define _asm_addto_3_3(z2,z1,z0,x2,x1,x0)	asm ("addq %4,%0;adcq %6,%1;adcq %8,%2":"=r"(z0),"=r"(z1),"=r"(z2): "0"(z0), "rim"(x0),  "1"(z1), "rim"(x1), "2"(z2), "rim"(x2) :"cc")
33
#define _asm_addto_3_2(z2,z1,z0,x1,x0)		asm ("addq %4,%0;adcq %6,%1;adcq 0,%2":"=r"(z0),"=r"(z1),"=r"(z2): "0"(z0), "rim"(x0),  "1"(z1), "rim"(x1), "2"(z2) :"cc")
34
#define _asm_subfrom_2_2(z1,z0,x1,x0)		asm ("subq %3,%0;sbbq %5,%1":"=r"(z0),"=r"(z1): "0"(z0), "rim"(x0),  "1"(z1), "rim"(x1):"cc")
35
// increment needs to propogate the carry - performance not critical here anyway
36
#define _asm_inc_2(z1,z0)				asm ("addq $1,%0;adcq $0,%1":"=r"(z0),"=r"(z1): "0"(z0), "1"(z1):"cc")
37
//#define _asm_shiftl_2(z1,z0)				asm ("shlq %0,1;rclq %1,1":"=r"(z0),"=r"(z1): "0"(z0), "1"(z1):"cc")
38
//#define _asm_shiftr_2(z1,z0)				asm ("shrq %1,1;rcrq %0,1":"=r"(z0),"=r"(z1): "0"(z0), "1"(z1):"cc")
39

40

41
#define _asm_mult_3_2_1(z2,z1,z0,x1,x0,y0)	{ register unsigned long __u; \
42
									   _asm_mult_1_1 (__u,z0,x0,y0); \
43
									   _asm_mult_1_1 (z2,z1,x1,y0); \
44
									   _asm_addto_2_1 (z2,z1,__u); }
45

46
// This function assumes that x[1] and y[1] < 2^31
47
static inline unsigned long _asm_mult_3_2_2 (unsigned long z[3], unsigned long x[2], unsigned long y[2])
48
{
49
	register unsigned long U, V, R0,R1, R2;
50
	
51
	R1 = 0;
52
	_asm_mult_1_1(R0,z[0],x[0],y[0]);
53
	_asm_mult_1_1(U,V,x[0],y[1]);
54
	_asm_addto_2_2(R1,R0,U,V);
55
	_asm_mult_1_1(U,V,x[1],y[0]);
56
	_asm_addto_2_2(R1,R0,U,V);
57
	z[1] = R0;
58
	z[2] = x[1]*y[1]+R1;
59
}
60

61
// This function assumes that x[1] and y[1] < 2^31
62
static inline unsigned long _asm_mult_3_2_2r (unsigned long z2, unsigned long z1, unsigned long z0, unsigned long x[2], unsigned long y[2])
63
{
64
	register unsigned long U, V, R0,R1, R2;
65
	
66
	R1 = 0;
67
	_asm_mult_1_1(R0,z0,x[0],y[0]);
68
	_asm_mult_1_1(U,V,x[0],y[1]);
69
	_asm_addto_2_2(R1,R0,U,V);
70
	_asm_mult_1_1(U,V,x[1],y[0]);
71
	_asm_addto_2_2(R1,R0,U,V);
72
	z1 = R0;
73
	z2 = x[1]*y[1]+R1;
74
}
75

76

77
// This function assumes that x[1] < 2^31.  For no obvious reason, this is slower than multiplying?!
78
static inline unsigned long _asm_square_3_2 (unsigned long z[3], unsigned long x[2])
79
{
80
	register unsigned long U, V, R0,R1, R2;
81
	
82
	R1 = 0;
83
	_asm_mult_1_1(R0,z[0],x[0],x[0]);
84
	_asm_mult_1_1(U,V,x[0],x[1]);
85
	_asm_addto_2_2(R1,R0,U,V);
86
	_asm_addto_2_2(R1,R0,U,V);
87
	z[1] = R0;
88
	z[2] = x[1]*x[1]+R1;
89
}
90
#endif
91

92
Product

Resources

Company