Path: blob/master/src/sage/schemes/hyperelliptic_curves/hypellfrob/hypellfrob.cpp
8823 views
/* ============================================================================12hypellfrob.cpp: main matrix() routine34This file is part of hypellfrob (version 2.1.1).56Copyright (C) 2007, 2008, David Harvey78This program is free software; you can redistribute it and/or modify9it under the terms of the GNU General Public License as published by10the Free Software Foundation; either version 2 of the License, or11(at your option) any later version.1213This program is distributed in the hope that it will be useful,14but WITHOUT ANY WARRANTY; without even the implied warranty of15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the16GNU General Public License for more details.1718You should have received a copy of the GNU General Public License along19with this program; if not, write to the Free Software Foundation, Inc.,2051 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.2122============================================================================ */23242526#include "hypellfrob.h"27#include "recurrences_ntl.h"28#include "recurrences_zn_poly.h"293031NTL_CLIENT323334namespace hypellfrob {353637/*38For some reason NTL doesn't have conversions to mat_ZZ....39*/40void conv(mat_ZZ& x, const mat_ZZ_p& a)41{42x.SetDims(a.NumRows(), a.NumCols());43for (int i = 0; i < a.NumRows(); i++)44for (int j = 0; j < a.NumCols(); j++)45x[i][j] = rep(a[i][j]);46}4748void conv(mat_ZZ& x, const mat_zz_p& a)49{50x.SetDims(a.NumRows(), a.NumCols());51for (int i = 0; i < a.NumRows(); i++)52for (int j = 0; j < a.NumCols(); j++)53x[i][j] = rep(a[i][j]);54}5556inline mat_ZZ to_mat_ZZ(const mat_ZZ_p& a)57{ mat_ZZ x; conv(x, a); return x; }5859inline mat_ZZ to_mat_ZZ(const mat_zz_p& a)60{ mat_ZZ x; conv(x, a); return x; }61626364/*65Let M(x) be the matrix M0 + x*M1; this is a matrix of linear polys in x.66Let M(a, b) = M(a + 1) M(a + 2) ... M(b). This function evaluates the products67M(a[i], b[i]) for some sequence of intervals68a[0] < b[0] <= a[1] < b[1] <= ... <= a[n-1] < b[n-1].6970The intervals are supplied in "target", simply as the list71a[0], b[0], a[1], b[1], ...7273There are three possible underlying implementations:74* ntl_interval_products (ZZ_p version),75* ntl_interval_products (zz_p version),76* zn_poly_interval_products.77This function is a wrapper which takes ZZ_p input, calls one of the three78above implementations depending on the size of the current ZZ_p modulus, and79produces ouptut in ZZ_p format.8081If the force_ntl flag is set, it will never use the zn_poly version.8283Note that the zn_poly version occasionally fails; this happens more frequently84for smaller p, but is extremely rare for larger p. This wrapper detects this85and falls back on the zz_p/ZZ_p versions, which should never fail.8687PRECONDITIONS:88Let d = b[n-1] - a[0]. Then 2, 3, ... 1 + floor(sqrt(d)) must all be89invertible.9091*/92void interval_products_wrapper(vector<mat_ZZ_p>& output,93const mat_ZZ_p& M0, const mat_ZZ_p& M1,94const vector<ZZ>& target,95int force_ntl = 0)96{97const ZZ& modulus = ZZ_p::modulus();9899if (!force_ntl && modulus <= (1UL << (ULONG_BITS - 1)) - 1)100{101// Small modulus; let's try using zn_poly if we're allowed.102// (we don't allow the full ULONG_BITS bits, because I'm worried I103// sometimes use NTL code which requires longs rather than ulongs.)104zn_mod_t mod;105zn_mod_init(mod, to_ulong(modulus));106107int r = M0.NumRows();108109// convert to zn_mod format110vector<vector<ulong> > M0_temp(r, vector<ulong>(r));111vector<vector<ulong> > M1_temp(r, vector<ulong>(r));112vector<vector<vector<ulong> > > output_temp(target.size()/2, M0_temp);113114for (int x = 0; x < r; x++)115for (int y = 0; y < r; y++)116{117M0_temp[y][x] = to_ulong(rep(M0[y][x]));118M1_temp[y][x] = to_ulong(rep(M1[y][x]));119}120121int success = zn_poly_interval_products(output_temp, M0_temp, M1_temp,122target, mod);123zn_mod_clear(mod);124125// note: if we failed, we fall back on the ZZ_p or zz_p versions below126if (success)127{128// convert back to ZZ_p format129output.clear();130mat_ZZ_p temp;131temp.SetDims(r, r);132for (int i = 0; i < target.size()/2; i++)133{134for (int x = 0; x < r; x++)135for (int y = 0; y < r; y++)136temp[y][x] = output_temp[i][y][x];137output.push_back(temp);138}139140return;141}142143// failed144}145146if (!modulus.SinglePrecision())147{148// Large modulus.... go straight to the ZZ_p version149ntl_interval_products150<ZZ_p, ZZ_pX, ZZ_pXModulus, vec_ZZ_p, mat_ZZ_p, FFTRep>151(output, M0, M1, target);152}153else154{155// Modulus is small enough to work at single precision.156157// Save old single-precision modulus and set new modulus158zz_pContext context;159context.save();160zz_p::init(to_long(modulus));161162// Convert input data to single-precision format163int dim = M0.NumRows();164mat_zz_p M0_sp = to_mat_zz_p(to_mat_ZZ(M0));165mat_zz_p M1_sp = to_mat_zz_p(to_mat_ZZ(M1));166167// Run ntl_interval_products at single precision168vector<mat_zz_p> output_sp;169ntl_interval_products170<zz_p, zz_pX, zz_pXModulus, vec_zz_p, mat_zz_p, fftRep>171(output_sp, M0_sp, M1_sp, target);172173// convert output back to ZZ_p format174output.resize(output_sp.size());175for (int i = 0; i < output_sp.size(); i++)176output[i] = to_mat_ZZ_p(to_mat_ZZ(output_sp[i]));177178// restore old single-precision modulus179context.restore();180}181}182183184185/* ============================================================================186187Main routine for computing Frobenius matrix188189============================================================================ */190191192/*193Given polynomials f and g defined over Z/p^N, this routine computes polynomials194a and b such that a*f + b*g = 1 mod p^N, with deg(a) < deg(g) and195deg(b) < deg(f).196197PRECONDITIONS:198f and g must be relatively prime mod p.199The leading coefficients of f and g must be invertible mod p.200The current NTL modulus should be p^N, and should be the one used to create201the polynomials f and g.202203Returns 1 on success, 0 if f and g are not relatively prime mod p.204205NOTE:206NTL can almost do this, but not quite; its xgcd routine is only guaranteed207to work if the coefficient ring is a field. If you try it over Z/p^N, it208usually works, but sometimes you get unlucky, and it crashes and burns.209210*/211int padic_xgcd(ZZ_pX& a, ZZ_pX& b, const ZZ_pX& f, const ZZ_pX& g,212const ZZ& p, int N)213{214ZZ_pContext modulus;215modulus.save();216217// =================================================218// First do it mod p using NTL's xgcd routine219220// reduce down to Z/p221ZZ_p::init(p);222ZZ_pX f_red = to_ZZ_pX(to_ZZX(f));223ZZ_pX g_red = to_ZZ_pX(to_ZZX(g));224225// do xgcd mod p226ZZ_pX a_red, b_red, d_red;227XGCD(d_red, a_red, b_red, f_red, g_red);228229// lift results to Z/p^N230modulus.restore();231a = to_ZZ_pX(to_ZZX(a_red));232b = to_ZZ_pX(to_ZZX(b_red));233ZZ_pX d = to_ZZ_pX(to_ZZX(d_red));234235if (deg(d) != 0)236return 0;237238a /= d;239b /= d;240241// =================================================242// Now improve the approximation until we have enough precision243244for (int prec = 1; prec < N; prec *= 2)245{246ZZ_pX h = a*f + b*g - 1;247ZZ_pX a_adj = -(h * a) % g;248ZZ_pX b_adj = -(h * b) % f;249a += a_adj;250b += b_adj;251}252253return 1;254}255256257258/*259Given a matrix A over Z/p^N that is invertible mod p, this routine computes260the inverse B = A^(-1) over Z/p^N.261262PRECONDITIONS:263The current NTL modulus should be p^N, and should be the one used to create264the matrix A.265266NOTE:267It's not clear to me (from the code or the documentation) whether NTL's268matrix inversion is guaranteed to work over a non-field. So I implemented269this one just in case.270271*/272void padic_invert_matrix(mat_ZZ_p& B, const mat_ZZ_p& A, const ZZ& p, int N)273{274ZZ_pContext modulus;275modulus.save();276277int n = A.NumRows();278279// =================================================280// First do it mod p using NTL matrix inverse281282// reduce to Z/p283ZZ_p::init(p);284mat_ZZ_p A_red = to_mat_ZZ_p(to_mat_ZZ(A));285286// invert matrix mod p287mat_ZZ_p B_red;288inv(B_red, A_red);289290// lift back to Z/p^N291modulus.restore();292B = to_mat_ZZ_p(to_mat_ZZ(B_red));293294// =================================================295// Now improve the approximation until we have enough precision296297mat_ZZ_p two;298ident(two, n);299two *= 2;300301for (int prec = 1; prec < N; prec *= 2)302// if BA = I + error, then303// ((2I - BA)B)A = (I - err)(I + err) = I - err^2304B = (two - B*A) * B;305}306307308309/*310The main function exported from this module. See hypellfrob.h for information.311*/312int matrix(mat_ZZ& output, const ZZ& p, int N, const ZZX& Q, int force_ntl)313{314// check input conditions315if (N < 1 || p < 3)316return 0;317318if ((deg(Q) < 3) || (deg(Q) % 2 == 0) || (!IsOne(LeadCoeff(Q))))319return 0;320321int g = (deg(Q) - 1) / 2;322323if (p <= (2*N - 1) * deg(Q))324return 0;325326// Back up the caller's modulus327ZZ_pContext modulus_caller;328modulus_caller.save();329330// Create moduli for working mod p^N and mod p^(N+1)331332ZZ_pContext modulus0, modulus1;333334ZZ_p::init(power(p, N));335modulus0.save();336ZZ_p::init(power(p, N+1));337modulus1.save();338339// =========================================================================340// Compute vertical reduction matrix M_V(t) and denominator D_V(t).341342// If N > 1 we need to do this to precision p^(N+1).343// If N = 1 we can get away with precision N, since the last reduction344// of length (p-1)/2 doesn't involve any divisions by p.345ZZ_pContext modulusV = (N == 1) ? modulus0 : modulus1;346modulusV.restore();347348// To compute the vertical reduction matrices, for each 0 <= i < 2g, we need349// to find R_i(x) and S_i(x) satisfying x^i = R_i(x) Q(x) + S_i(x) Q'(x).350vector<ZZ_pX> R, S;351ZZ_pX Qred, Qred_diff, S0, R0;352353conv(Qred, Q);354Qred_diff = diff(Qred);355int result = padic_xgcd(R0, S0, Qred, Qred_diff, p, N+1);356if (!result)357{358// failure if Q(x) and Q'(x) were not relatively prime mod p359modulus_caller.restore();360return 0;361}362363S.push_back(S0);364for (int i = 1; i < 2*g; i++)365S.push_back(LeftShift(S.back(), 1) % Qred);366367R.push_back(R0);368for (int i = 1; i < 2*g; i++)369R.push_back(LeftShift(R.back(), 1) % Qred_diff);370371// Then the (j, i) entry of M_V(t) is given by the x^j coefficient of372// (2t-1) R_i(x) + 2 S_i'(x).373// We store the constant term of M_V(t) in MV0, and the linear term in MV1.374mat_ZZ_p MV0, MV1;375MV0.SetDims(2*g, 2*g);376MV1.SetDims(2*g, 2*g);377for (int i = 0; i < 2*g; i++)378for (int j = 0; j < 2*g; j++)379{380MV0[j][i] = -coeff(R[i], j) + 2*(j+1)*coeff(S[i], j+1);381MV1[j][i] = 2*coeff(R[i], j);382}383384// For convenience we also store the vertical reduction denominator385// D_V(t) = 2t - 1 as a pair of 1x1 matrices.386mat_ZZ_p DV0, DV1;387DV0.SetDims(1, 1);388DV1.SetDims(1, 1);389DV0[0][0] = -1;390DV1[0][0] = 2;391392// =========================================================================393// Compute horizontal reduction matrices M_H^t(s) and denominators D_H^t(s),394// where t = (p(2j+1) - 1)/2, for j in the range 0 <= j < N.395// This is done to precision p^N.396modulus0.restore();397398// We have D_H^t(s) = (2g+1)(2t-1) - 2s.399// The matrix M_H^t(s) has D_H^t(s) on the sub-diagonal entries, and the400// coefficients of the polynomial 2sQ(x) - (2t-1)xQ'(x) in the last column.401402vector<mat_ZZ_p> MH0(N), MH1(N), DH0(N), DH1(N);403404for (int j = 0; j < N; j++)405{406ZZ t = (p*(2*j+1) - 1)/2;407408MH0[j].SetDims(2*g+1, 2*g+1);409MH1[j].SetDims(2*g+1, 2*g+1);410DH0[j].SetDims(1, 1);411DH1[j].SetDims(1, 1);412413DH0[j][0][0] = to_ZZ_p((2*g+1)*(2*t-1));414DH1[j][0][0] = -2;415// subdiagonal entries:416for (int i = 0; i < 2*g; i++)417{418MH0[j][i+1][i] = DH0[j][0][0];419MH1[j][i+1][i] = DH1[j][0][0];420}421// last column:422for (int i = 0; i < 2*g+1; i++)423{424MH0[j][i][2*g] = -to_ZZ_p(i*(2*t-1))*coeff(Qred, i);425MH1[j][i][2*g] = 2*coeff(Qred, i);426}427}428429// Compute inverse of the vandermonde matrix that will be used in the430// horizontal reduction phase431mat_ZZ_p vand_in, vand;432vand_in.SetDims(N, N);433for (int y = 1; y <= N; y++)434{435vand_in[y-1][0] = 1;436for (int x = 1; x < N; x++)437vand_in[y-1][x] = vand_in[y-1][x-1] * y;438}439padic_invert_matrix(vand, vand_in, p, N);440441// =========================================================================442// Compute B_{j, r} coefficients.443// These are done to precision p^(N+1).444445modulus1.restore();446447// The relevant binomial coefficients.448vector<vector<ZZ_p> > binomial(2*N);449binomial[0].push_back(to_ZZ_p(1));450for (int j = 1; j < 2*N; j++)451{452binomial[j].push_back(to_ZZ_p(1));453for (int i = 1; i < j; i++)454binomial[j].push_back(binomial[j-1][i-1] + binomial[j-1][i]);455binomial[j].push_back(to_ZZ_p(1));456}457458// For 0 <= j < N, compute Btemp[j] =459// (-1)^j \sum_{k=j}^{N-1} 4^{-k} {2k \choose k} {k \choose j}460ZZ_p fourth = to_ZZ_p(1) / 4;461ZZ_p fourth_pow = to_ZZ_p(1);462vector<ZZ_p> Btemp(N);463for (int k = 0; k < N; k++, fourth_pow = fourth_pow * fourth)464{465// even j466for (int j = 0; j <= k; j += 2)467Btemp[j] += binomial[2*k][k] * binomial[k][j] * fourth_pow;468// odd j469for (int j = 1; j <= k; j += 2)470Btemp[j] -= binomial[2*k][k] * binomial[k][j] * fourth_pow;471}472473// Compute the coefficients B_{j, r} = p C_{j, r} (-1)^j474// \sum_{k=j}^{N-1} 4^{-k} {2k \choose k} {k \choose j},475// where C_{j, r} is the coefficient of x^r in Q(x)^j.476vector<vector<ZZ_p> > B(N);477ZZ_pX Qpow = to_ZZ_pX(p);478for (int j = 0; j < N; j++, Qpow *= Qred)479for (int r = 0; r <= (2*g+1)*j; r++)480B[j].push_back(Btemp[j] * coeff(Qpow, r));481482// =========================================================================483// Horizontal reductions.484485// reduced[j][i] will hold the reduction to x^0 of the i-th differential486// along row j (each entry is a vector of length 2*g)487vector<vector<vec_ZZ_p> > reduced(N, vector<vec_ZZ_p>(2*g));488489for (int j = 0; j < N; j++)490{491// Compute the transition matrices, mod p^N, between the indices492// kp-2g-2 and kp, for each k. We compute at most N matrices (the others493// will be deduced more efficiently using the vandermonde matrix below)494modulus0.restore();495vector<ZZ> s;496int L = (2*g+1)*(j+1) - 1;497int L_effective = min(L, N);498for (int k = 0; k < L_effective; k++)499{500s.push_back(k*p);501s.push_back((k+1)*p - 2*g - 2);502}503vector<mat_ZZ_p> MH, DH;504MH.reserve(L);505DH.reserve(L);506interval_products_wrapper(MH, MH0[j], MH1[j], s, force_ntl);507interval_products_wrapper(DH, DH0[j], DH1[j], s, force_ntl);508509// Use the vandermonde matrix to extend all the way up to L if necessary.510if (L > N)511{512// First compute X[r] = F^{(r)}(0) p^r / r! for r = 0, ..., N-1,513// where F is the matrix corresponding to M as described in the paper;514// and do the same for Y corresponding to the denominator D.515vector<mat_ZZ_p> X(N);516vector<ZZ_p> Y(N);517for (int r = 0; r < N; r++)518{519X[r].SetDims(2*g+1, 2*g+1);520for (int h = 0; h < N; h++)521{522ZZ_p& v = vand[r][h];523524for (int y = 0; y < 2*g+1; y++)525for (int x = 0; x < 2*g+1; x++)526X[r][y][x] += v * MH[h][y][x];527528Y[r] += v * DH[h][0][0];529}530}531532// Now use those taylor coefficients to get the remaining MH's533// and DH's.534MH.resize(L);535DH.resize(L);536for (int k = N; k < L; k++)537{538MH[k].SetDims(2*g+1, 2*g+1);539DH[k].SetDims(1, 1);540541// this is actually a power of k+1, since the indices into542// MH and DH are one-off from what's written in the paper543ZZ_p k_pow;544k_pow = 1;545546for (int h = 0; h < N; h++, k_pow *= (k+1))547{548for (int y = 0; y < 2*g+1; y++)549for (int x = 0; x < 2*g+1; x++)550MH[k][y][x] += k_pow * X[h][y][x];551552DH[k][0][0] += k_pow * Y[h];553}554}555}556557// Divide out each MH[k] by DH[k].558for (int k = 0; k < MH.size(); k++)559{560ZZ_p Dinv = 1 / DH[k][0][0];561562for (int x = 0; x < 2*g+1; x++)563for (int y = 0; y < 2*g+1; y++)564MH[k][y][x] *= Dinv;565}566567// Reduce all differentials on this row into the x^0 position.568for (int i = 0; i < 2*g; i++)569{570modulus1.restore();571vec_ZZ_p sum;572sum.SetLength(2*g+1);573574// loop over blocks in this row575for (int k = (2*g+1)*(j+1) - 1; k >= 1; k--)576{577// add in new term if necessary578if (k >= i+1 && k <= i+1 + (2*g+1)*j)579sum[0] += B[j][k - (i+1)];580581// We do the following reductions "by hand" for a little more582// efficiency, since the matrices are all sparse.583584// tt is 2t - 1 in the notation of the paper585ZZ tt = (2*j+1)*p - 2;586587// Reduce from kp - 1 down to kp - 2g - 1.588for (int u = 1; u <= 2*g; u++)589{590// c is the last component591ZZ_p c = sum[2*g];592593// shuffle over all components by one step594for (int m = 2*g; m >= 1; m--)595sum[m] = sum[m-1];596sum[0] = 0;597598// add in contribution from c (these follow from the explicit599// formulae for the horizontal reductions)600ZZ s = k*p - u;601c /= to_ZZ_p((2*g+1)*tt - 2*s);602for (int m = 0; m <= 2*g; m++)603sum[m] += c * to_ZZ_p(2*s - tt*m) * coeff(Qred, m);604}605606// Reduce from kp - 2g - 1 to kp - 2g - 2.607// This is a little different to the previous block owing to the608// denominator being divisible by p.609{610// c is the last component611ZZ_p c = sum[2*g];612613assert(rep(c) % p == 0);614c = to_ZZ_p(rep(c) / p);615616// shuffle over all components by one step617for (int m = 2*g; m >= 1; m--)618sum[m] = sum[m-1];619sum[0] = 0;620621// add in contribution from c622ZZ s = k*p - (2*g + 1);623ZZ denom = (2*g+1)*tt - 2*s;624assert(denom % p == 0);625c /= to_ZZ_p(denom / p);626for (int m = 0; m <= 2*g; m++)627sum[m] += c * to_ZZ_p(2*s - tt*m) * coeff(Qred, m);628}629630// Use big fat matrices to reduce from kp - 2g - 2 to (k-1)p.631{632// drop precision to p^N and apply the matrix633modulus0.restore();634vec_ZZ_p temp = MH[k-1] * to_vec_ZZ_p(to_vec_ZZ(sum));635636// lift precision back to p^(N+1)637modulus1.restore();638sum = to_vec_ZZ_p(to_vec_ZZ(temp));639}640641// Reduce from (k-1)p down to (k-1)p - 1.642{643// c is the last component644ZZ_p c = sum[2*g];645646// shuffle over all components by one step647for (int m = 2*g; m >= 1; m--)648sum[m] = sum[m-1];649sum[0] = 0;650651// add in contribution from c652ZZ s = (k-1)*p;653c /= to_ZZ_p((2*g+1)*tt - 2*s);654for (int m = 0; m <= 2*g; m++)655sum[m] += c * to_ZZ_p(2*s - tt*m) * coeff(Qred, m);656}657}658659// Store the reduced vector. Note that currently it has length 2g+1,660// but the lowest term is zero, and the vertical reductions will661// operate on length 2g vectors, so we kill the lowest term.662modulus1.restore();663reduced[j][i].SetLength(2*g);664for (int f = 0; f < 2*g; f++)665reduced[j][i][f] = sum[f+1];666}667}668669if (N == 1)670{671// If N == 1, then the vertical matrices are stored at precision 1672// (instead of at N+1), so we reduce "reduced" to precision 1 here.673modulus0.restore();674for (int j = 0; j < N; j++)675for (int i = 0; i < 2*g; i++)676reduced[j][i] = to_vec_ZZ_p(to_vec_ZZ(reduced[j][i]));677}678679// =========================================================================680// Vertical reductions.681682// Compute the vertical reduction maps between indices683// 0, (p-1)/2, (3p-1)/2, (5p-1)/2, ..., ((2N-1)p-1)/2.684vector<ZZ> s;685s.push_back(to_ZZ(0));686s.push_back((p-1)/2);687for (int u = 1; u < N; u++)688{689s.push_back(s.back());690s.push_back(s.back() + p);691}692modulusV.restore();693vector<mat_ZZ_p> MV, DV;694interval_products_wrapper(MV, MV0, MV1, s, force_ntl);695interval_products_wrapper(DV, DV0, DV1, s, force_ntl);696697// Divide out each MV[j] by DV[j]. Note that for 1 <= j < N, both of them698// should be divisible by p too, so we take that into account here.699for (int j = 0; j < N; j++)700{701ZZ u = rep(DV[j][0][0]);702if (j != 0)703{704assert(u % p == 0);705u /= p;706}707InvMod(u, u, ZZ_p::modulus());708709for (int x = 0; x < 2*g; x++)710for (int y = 0; y < 2*g; y++)711{712ZZ c = rep(MV[j][y][x]);713if (j != 0)714{715assert(c % p == 0);716c /= p;717}718MV[j][y][x] = to_ZZ_p(c * u);719}720}721722output.SetDims(2*g, 2*g);723ZZ p_to_N = power(p, N);724725// For each i, use the above reductions matrices to vertically reduce the726// terms corresponding to x^i dx/y.727for (int i = 0; i < 2*g; i++)728{729vec_ZZ_p sum;730sum.SetLength(2*g);731732for (int j = N-1; j >= 0; j--)733{734// pick up a new term,735sum += reduced[j][i];736// and reduce:737sum = MV[j] * sum;738}739740// store the result741for (int f = 0; f < 2*g; f++)742output[f][i] = rep(sum[f]) % p_to_N;743}744745// Restore the caller's modulus746modulus_caller.restore();747748return 1;749}750751752}; // namespace hypellfrob753754// ----------------------- end of file755756757