CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/ext/sfmt19937/SFMT.c
Views: 1401
/**1* @file SFMT.c2* @brief SIMD oriented Fast Mersenne Twister(SFMT)3*4* @author Mutsuo Saito (Hiroshima University)5* @author Makoto Matsumoto (Hiroshima University)6*7* Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima8* University.9* Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima10* University and The University of Tokyo.11* Copyright (C) 2013 Mutsuo Saito, Makoto Matsumoto and Hiroshima12* University.13* All rights reserved.14*15* The 3-clause BSD License is applied to this software, see16* LICENSE.txt17*/1819#if defined(__cplusplus)20extern "C" {21#endif2223#include <string.h>24#include <assert.h>25#include "SFMT.h"26#include "SFMT-params.h"27#include "SFMT-common.h"2829#if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)30#define BIG_ENDIAN64 131#endif32#if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)33#define BIG_ENDIAN64 134#endif35#if defined(ONLY64) && !defined(BIG_ENDIAN64)36#if defined(__GNUC__)37#error "-DONLY64 must be specified with -DBIG_ENDIAN64"38#endif39#undef ONLY6440#endif4142/**43* parameters used by sse2.44*/45static const w128_t sse2_param_mask = {{SFMT_MSK1, SFMT_MSK2,46SFMT_MSK3, SFMT_MSK4}};47/*----------------48STATIC FUNCTIONS49----------------*/50inline static int idxof(int i);51inline static void gen_rand_array(sfmt_t * sfmt, w128_t *array, int size);52inline static uint32_t func1(uint32_t x);53inline static uint32_t func2(uint32_t x);54static void period_certification(sfmt_t * sfmt);55#if defined(BIG_ENDIAN64) && !defined(ONLY64)56inline static void swap(w128_t *array, int size);57#endif5859#if defined(HAVE_ALTIVEC)60#include "SFMT-alti.h"61#elif defined(HAVE_SSE2)62#if defined(_MSC_VER)63#include "SFMT-sse2-msc.h"64#else65#include "SFMT-sse2.h"66#endif67#endif6869/**70* This function simulate a 64-bit index of LITTLE ENDIAN71* in BIG ENDIAN machine.72*/73#ifdef ONLY6474inline static int idxof(int i) {75return i ^ 1;76}77#else78inline static int idxof(int i) {79return i;80}81#endif8283#if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))84/**85* This function fills the user-specified array with pseudorandom86* integers.87*88* @param sfmt SFMT internal state89* @param array an 128-bit array to be filled by pseudorandom numbers.90* @param size number of 128-bit pseudorandom numbers to be generated.91*/92inline static void gen_rand_array(sfmt_t * sfmt, w128_t *array, int size) {93int i, j;94w128_t *r1, *r2;9596r1 = &sfmt->state[SFMT_N - 2];97r2 = &sfmt->state[SFMT_N - 1];98for (i = 0; i < SFMT_N - SFMT_POS1; i++) {99do_recursion(&array[i], &sfmt->state[i], &sfmt->state[i + SFMT_POS1], r1, r2);100r1 = r2;101r2 = &array[i];102}103for (; i < SFMT_N; i++) {104do_recursion(&array[i], &sfmt->state[i],105&array[i + SFMT_POS1 - SFMT_N], r1, r2);106r1 = r2;107r2 = &array[i];108}109for (; i < size - SFMT_N; i++) {110do_recursion(&array[i], &array[i - SFMT_N],111&array[i + SFMT_POS1 - SFMT_N], r1, r2);112r1 = r2;113r2 = &array[i];114}115for (j = 0; j < 2 * SFMT_N - size; j++) {116sfmt->state[j] = array[j + size - SFMT_N];117}118for (; i < size; i++, j++) {119do_recursion(&array[i], &array[i - SFMT_N],120&array[i + SFMT_POS1 - SFMT_N], r1, r2);121r1 = r2;122r2 = &array[i];123sfmt->state[j] = array[i];124}125}126#endif127128#if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)129inline static void swap(w128_t *array, int size) {130int i;131uint32_t x, y;132133for (i = 0; i < size; i++) {134x = array[i].u[0];135y = array[i].u[2];136array[i].u[0] = array[i].u[1];137array[i].u[2] = array[i].u[3];138array[i].u[1] = x;139array[i].u[3] = y;140}141}142#endif143/**144* This function represents a function used in the initialization145* by init_by_array146* @param x 32-bit integer147* @return 32-bit integer148*/149static uint32_t func1(uint32_t x) {150return (x ^ (x >> 27)) * (uint32_t)1664525UL;151}152153/**154* This function represents a function used in the initialization155* by init_by_array156* @param x 32-bit integer157* @return 32-bit integer158*/159static uint32_t func2(uint32_t x) {160return (x ^ (x >> 27)) * (uint32_t)1566083941UL;161}162163/**164* This function certificate the period of 2^{MEXP}165* @param sfmt SFMT internal state166*/167static void period_certification(sfmt_t * sfmt) {168int inner = 0;169int i, j;170uint32_t work;171uint32_t *psfmt32 = &sfmt->state[0].u[0];172const uint32_t parity[4] = {SFMT_PARITY1, SFMT_PARITY2,173SFMT_PARITY3, SFMT_PARITY4};174175for (i = 0; i < 4; i++)176inner ^= psfmt32[idxof(i)] & parity[i];177for (i = 16; i > 0; i >>= 1)178inner ^= inner >> i;179inner &= 1;180/* check OK */181if (inner == 1) {182return;183}184/* check NG, and modification */185for (i = 0; i < 4; i++) {186work = 1;187for (j = 0; j < 32; j++) {188if ((work & parity[i]) != 0) {189psfmt32[idxof(i)] ^= work;190return;191}192work = work << 1;193}194}195}196197/*----------------198PUBLIC FUNCTIONS199----------------*/200#define UNUSED_VARIABLE(x) (void)(x)201/**202* This function returns the identification string.203* The string shows the word size, the Mersenne exponent,204* and all parameters of this generator.205* @param sfmt SFMT internal state206*/207const char *sfmt_get_idstring(sfmt_t * sfmt) {208UNUSED_VARIABLE(sfmt);209return SFMT_IDSTR;210}211212/**213* This function returns the minimum size of array used for \b214* fill_array32() function.215* @param sfmt SFMT internal state216* @return minimum size of array used for fill_array32() function.217*/218int sfmt_get_min_array_size32(sfmt_t * sfmt) {219UNUSED_VARIABLE(sfmt);220return SFMT_N32;221}222223/**224* This function returns the minimum size of array used for \b225* fill_array64() function.226* @param sfmt SFMT internal state227* @return minimum size of array used for fill_array64() function.228*/229int sfmt_get_min_array_size64(sfmt_t * sfmt) {230UNUSED_VARIABLE(sfmt);231return SFMT_N64;232}233234#if !defined(HAVE_SSE2) && !defined(HAVE_ALTIVEC)235/**236* This function fills the internal state array with pseudorandom237* integers.238* @param sfmt SFMT internal state239*/240void sfmt_gen_rand_all(sfmt_t * sfmt) {241int i;242w128_t *r1, *r2;243244r1 = &sfmt->state[SFMT_N - 2];245r2 = &sfmt->state[SFMT_N - 1];246for (i = 0; i < SFMT_N - SFMT_POS1; i++) {247do_recursion(&sfmt->state[i], &sfmt->state[i],248&sfmt->state[i + SFMT_POS1], r1, r2);249r1 = r2;250r2 = &sfmt->state[i];251}252for (; i < SFMT_N; i++) {253do_recursion(&sfmt->state[i], &sfmt->state[i],254&sfmt->state[i + SFMT_POS1 - SFMT_N], r1, r2);255r1 = r2;256r2 = &sfmt->state[i];257}258}259#endif260261#ifndef ONLY64262/**263* This function generates pseudorandom 32-bit integers in the264* specified array[] by one call. The number of pseudorandom integers265* is specified by the argument size, which must be at least 624 and a266* multiple of four. The generation by this function is much faster267* than the following gen_rand function.268*269* For initialization, init_gen_rand or init_by_array must be called270* before the first call of this function. This function can not be271* used after calling gen_rand function, without initialization.272*273* @param sfmt SFMT internal state274* @param array an array where pseudorandom 32-bit integers are filled275* by this function. The pointer to the array must be \b "aligned"276* (namely, must be a multiple of 16) in the SIMD version, since it277* refers to the address of a 128-bit integer. In the standard C278* version, the pointer is arbitrary.279*280* @param size the number of 32-bit pseudorandom integers to be281* generated. size must be a multiple of 4, and greater than or equal282* to (MEXP / 128 + 1) * 4.283*284* @note \b memalign or \b posix_memalign is available to get aligned285* memory. Mac OSX doesn't have these functions, but \b malloc of OSX286* returns the pointer to the aligned memory block.287*/288void sfmt_fill_array32(sfmt_t * sfmt, uint32_t *array, int size) {289assert(sfmt->idx == SFMT_N32);290assert(size % 4 == 0);291assert(size >= SFMT_N32);292293gen_rand_array(sfmt, (w128_t *)array, size / 4);294sfmt->idx = SFMT_N32;295}296#endif297298/**299* This function generates pseudorandom 64-bit integers in the300* specified array[] by one call. The number of pseudorandom integers301* is specified by the argument size, which must be at least 312 and a302* multiple of two. The generation by this function is much faster303* than the following gen_rand function.304*305* @param sfmt SFMT internal state306* For initialization, init_gen_rand or init_by_array must be called307* before the first call of this function. This function can not be308* used after calling gen_rand function, without initialization.309*310* @param array an array where pseudorandom 64-bit integers are filled311* by this function. The pointer to the array must be "aligned"312* (namely, must be a multiple of 16) in the SIMD version, since it313* refers to the address of a 128-bit integer. In the standard C314* version, the pointer is arbitrary.315*316* @param size the number of 64-bit pseudorandom integers to be317* generated. size must be a multiple of 2, and greater than or equal318* to (MEXP / 128 + 1) * 2319*320* @note \b memalign or \b posix_memalign is available to get aligned321* memory. Mac OSX doesn't have these functions, but \b malloc of OSX322* returns the pointer to the aligned memory block.323*/324void sfmt_fill_array64(sfmt_t * sfmt, uint64_t *array, int size) {325assert(sfmt->idx == SFMT_N32);326assert(size % 2 == 0);327assert(size >= SFMT_N64);328329gen_rand_array(sfmt, (w128_t *)array, size / 2);330sfmt->idx = SFMT_N32;331332#if defined(BIG_ENDIAN64) && !defined(ONLY64)333swap((w128_t *)array, size /2);334#endif335}336337/**338* This function initializes the internal state array with a 32-bit339* integer seed.340*341* @param sfmt SFMT internal state342* @param seed a 32-bit integer used as the seed.343*/344void sfmt_init_gen_rand(sfmt_t * sfmt, uint32_t seed) {345int i;346347uint32_t *psfmt32 = &sfmt->state[0].u[0];348349psfmt32[idxof(0)] = seed;350for (i = 1; i < SFMT_N32; i++) {351psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)]352^ (psfmt32[idxof(i - 1)] >> 30))353+ i;354}355sfmt->idx = SFMT_N32;356period_certification(sfmt);357}358359/**360* This function initializes the internal state array,361* with an array of 32-bit integers used as the seeds362* @param sfmt SFMT internal state363* @param init_key the array of 32-bit integers, used as a seed.364* @param key_length the length of init_key.365*/366void sfmt_init_by_array(sfmt_t * sfmt, uint32_t *init_key, int key_length) {367int i, j, count;368uint32_t r;369int lag;370int mid;371int size = SFMT_N * 4;372uint32_t *psfmt32 = &sfmt->state[0].u[0];373374if (size >= 623) {375lag = 11;376} else if (size >= 68) {377lag = 7;378} else if (size >= 39) {379lag = 5;380} else {381lag = 3;382}383mid = (size - lag) / 2;384385memset(sfmt, 0x8b, sizeof(sfmt_t));386if (key_length + 1 > SFMT_N32) {387count = key_length + 1;388} else {389count = SFMT_N32;390}391r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)]392^ psfmt32[idxof(SFMT_N32 - 1)]);393psfmt32[idxof(mid)] += r;394r += key_length;395psfmt32[idxof(mid + lag)] += r;396psfmt32[idxof(0)] = r;397398count--;399for (i = 1, j = 0; (j < count) && (j < key_length); j++) {400r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % SFMT_N32)]401^ psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]);402psfmt32[idxof((i + mid) % SFMT_N32)] += r;403r += init_key[j] + i;404psfmt32[idxof((i + mid + lag) % SFMT_N32)] += r;405psfmt32[idxof(i)] = r;406i = (i + 1) % SFMT_N32;407}408for (; j < count; j++) {409r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % SFMT_N32)]410^ psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]);411psfmt32[idxof((i + mid) % SFMT_N32)] += r;412r += i;413psfmt32[idxof((i + mid + lag) % SFMT_N32)] += r;414psfmt32[idxof(i)] = r;415i = (i + 1) % SFMT_N32;416}417for (j = 0; j < SFMT_N32; j++) {418r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % SFMT_N32)]419+ psfmt32[idxof((i + SFMT_N32 - 1) % SFMT_N32)]);420psfmt32[idxof((i + mid) % SFMT_N32)] ^= r;421r -= i;422psfmt32[idxof((i + mid + lag) % SFMT_N32)] ^= r;423psfmt32[idxof(i)] = r;424i = (i + 1) % SFMT_N32;425}426427sfmt->idx = SFMT_N32;428period_certification(sfmt);429}430#if defined(__cplusplus)431}432#endif433434435