/* Written in 2018 by Sebastiano Vigna (vigna@acm.org) Performs a correlation test on a PCG generator with "ext" in the name. The output of this program should look random for some large enough value of the command-line parameter, but this never happens. To the extent possible under law, the author has dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. See . */ #include #include #include #include #include #include #include // Included code for PCG: for simplicity, and because we need to make the data_ array public to perform the correlation test. // The actual test is at the end of the source. /* * PCG Random Number Generation for C++ * * Copyright 2014 Melissa O'Neill * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For additional information about the PCG random number generation scheme, * including its license and other licensing options, visit * * http://www.pcg-random.org */ /* * This code provides the reference implementation of the PCG family of * random number generators. The code is complex because it implements * * - several members of the PCG family, specifically members corresponding * to the output functions: * - XSH RR (good for 64-bit state, 32-bit output) * - XSH RS (good for 64-bit state, 32-bit output) * - XSL RR (good for 128-bit state, 64-bit output) * - RXS M XS (statistically most powerful generator) * - XSL RR RR (good for 128-bit state, 128-bit output) * - and RXS, RXS M, XSH, XSL (mostly for testing) * - at potentially *arbitrary* bit sizes * - with four different techniques for random streams (MCG, one-stream * LCG, settable-stream LCG, unique-stream LCG) * - and the extended generation schemes allowing arbitrary periods * - with all features of C++11 random number generation (and more), * some of which are somewhat painful, including * - initializing with a SeedSequence which writes 32-bit values * to memory, even though the state of the generator may not * use 32-bit values (it might use smaller or larger integers) * - I/O for RNGs and a prescribed format, which needs to handle * the issue that 8-bit and 128-bit integers don't have working * I/O routines (e.g., normally 8-bit = char, not integer) * - equality and inequality for RNGs * - and a number of convenience typedefs to mask all the complexity * * The code employes a fairly heavy level of abstraction, and has to deal * with various C++ minutia. If you're looking to learn about how the PCG * scheme works, you're probably best of starting with one of the other * codebases (see www.pcg-random.org). But if you're curious about the * constants for the various output functions used in those other, simpler, * codebases, this code shows how they are calculated. * * On the positive side, at least there are convenience typedefs so that you * can say * * pcg32 myRNG; * * rather than: * * pcg_detail::engine< * uint32_t, // Output Type * uint64_t, // State Type * pcg_detail::xsh_rr_mixin, true, // Output Func * pcg_detail::specific_stream, // Stream Kind * pcg_detail::default_multiplier // LCG Mult * > myRNG; * */ #ifndef PCG_RAND_HPP_INCLUDED #define PCG_RAND_HPP_INCLUDED 1 #include #include #include #include #include #include #include #include #include #include #include #include /* * The pcg_extras namespace contains some support code that is likley to * be useful for a variety of RNGs, including: * - 128-bit int support for platforms where it isn't available natively * - bit twiddling operations * - I/O of 128-bit and 8-bit integers * - Handling the evilness of SeedSeq * - Support for efficiently producing random numbers less than a given * bound */ /* * PCG Random Number Generation for C++ * * Copyright 2014 Melissa O'Neill * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * For additional information about the PCG random number generation scheme, * including its license and other licensing options, visit * * http://www.pcg-random.org */ /* * This file provides support code that is useful for random-number generation * but not specific to the PCG generation scheme, including: * - 128-bit int support for platforms where it isn't available natively * - bit twiddling operations * - I/O of 128-bit and 8-bit integers * - Handling the evilness of SeedSeq * - Support for efficiently producing random numbers less than a given * bound */ #ifndef PCG_EXTRAS_HPP_INCLUDED #define PCG_EXTRAS_HPP_INCLUDED 1 #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __GNUC__ #include #endif /* * Abstractions for compiler-specific directives */ #ifdef __GNUC__ #define PCG_NOINLINE __attribute__((noinline)) #else #define PCG_NOINLINE #endif /* * Some members of the PCG library use 128-bit math. When compiling on 64-bit * platforms, both GCC and Clang provide 128-bit integer types that are ideal * for the job. * * On 32-bit platforms (or with other compilers), we fall back to a C++ * class that provides 128-bit unsigned integers instead. It may seem * like we're reinventing the wheel here, because libraries already exist * that support large integers, but most existing libraries provide a very * generic multiprecision code, but here we're operating at a fixed size. * Also, most other libraries are fairly heavyweight. So we use a direct * implementation. Sadly, it's much slower than hand-coded assembly or * direct CPU support. * */ #if __SIZEOF_INT128__ namespace pcg_extras { typedef __uint128_t pcg128_t; } #define PCG_128BIT_CONSTANT(high,low) \ ((pcg128_t(high) << 64) + low) #else #include "pcg_uint128.hpp" namespace pcg_extras { typedef pcg_extras::uint_x4 pcg128_t; } #define PCG_128BIT_CONSTANT(high,low) \ pcg128_t(high,low) #define PCG_EMULATED_128BIT_MATH 1 #endif namespace pcg_extras { /* * We often need to represent a "number of bits". When used normally, these * numbers are never greater than 128, so an unsigned char is plenty. * If you're using a nonstandard generator of a larger size, you can set * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers * might produce faster code if you set it to an unsigned int.) */ #ifndef PCG_BITCOUNT_T typedef uint8_t bitcount_t; #else typedef PCG_BITCOUNT_T bitcount_t; #endif /* * C++ requires us to be able to serialize RNG state by printing or reading * it from a stream. Because we use 128-bit ints, we also need to be able * ot print them, so here is code to do so. * * This code provides enough functionality to print 128-bit ints in decimal * and zero-padded in hex. It's not a full-featured implementation. */ template std::basic_ostream& operator<<(std::basic_ostream& out, pcg128_t value) { auto desired_base = out.flags() & out.basefield; bool want_hex = desired_base == out.hex; if (want_hex) { uint64_t highpart = uint64_t(value >> 64); uint64_t lowpart = uint64_t(value); auto desired_width = out.width(); if (desired_width > 16) { out.width(desired_width - 16); } if (highpart != 0 || desired_width > 16) out << highpart; CharT oldfill; if (highpart != 0) { out.width(16); oldfill = out.fill('0'); } auto oldflags = out.setf(decltype(desired_base){}, out.showbase); out << lowpart; out.setf(oldflags); if (highpart != 0) { out.fill(oldfill); } return out; } constexpr size_t MAX_CHARS_128BIT = 40; char buffer[MAX_CHARS_128BIT]; char* pos = buffer+sizeof(buffer); *(--pos) = '\0'; constexpr auto BASE = pcg128_t(10ULL); do { auto div = value / BASE; auto mod = uint32_t(value - (div * BASE)); *(--pos) = '0' + mod; value = div; } while(value != pcg128_t(0ULL)); return out << pos; } template std::basic_istream& operator>>(std::basic_istream& in, pcg128_t& value) { typename std::basic_istream::sentry s(in); if (!s) return in; constexpr auto BASE = pcg128_t(10ULL); pcg128_t current(0ULL); bool did_nothing = true; bool overflow = false; for(;;) { CharT wide_ch = in.get(); if (!in.good()) break; auto ch = in.narrow(wide_ch, '\0'); if (ch < '0' || ch > '9') { in.unget(); break; } did_nothing = false; pcg128_t digit(uint32_t(ch - '0')); pcg128_t timesbase = current*BASE; overflow = overflow || timesbase < current; current = timesbase + digit; overflow = overflow || current < digit; } if (did_nothing || overflow) { in.setstate(std::ios::failbit); if (overflow) current = ~pcg128_t(0ULL); } value = current; return in; } /* * Likewise, if people use tiny rngs, we'll be serializing uint8_t. * If we just used the provided IO operators, they'd read/write chars, * not ints, so we need to define our own. We *can* redefine this operator * here because we're in our own namespace. */ template std::basic_ostream& operator<<(std::basic_ostream&out, uint8_t value) { return out << uint32_t(value); } template std::basic_istream& operator>>(std::basic_istream& in, uint8_t target) { uint32_t value = 0xdecea5edU; in >> value; if (!in && value == 0xdecea5edU) return in; if (value > uint8_t(~0)) { in.setstate(std::ios::failbit); value = ~0U; } target = uint8_t(value); return in; } /* Unfortunately, the above functions don't get found in preference to the * built in ones, so we create some more specific overloads that will. * Ugh. */ inline std::ostream& operator<<(std::ostream& out, uint8_t value) { return pcg_extras::operator<< (out, value); } inline std::istream& operator>>(std::istream& in, uint8_t& value) { return pcg_extras::operator>> (in, value); } /* * Useful bitwise operations. */ /* * XorShifts are invertable, but they are someting of a pain to invert. * This function backs them out. It's used by the whacky "inside out" * generator defined later. */ template inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift) { if (2*shift >= bits) { return x ^ (x >> shift); } itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1; itype highmask1 = ~lowmask1; itype top1 = x; itype bottom1 = x & lowmask1; top1 ^= top1 >> shift; top1 &= highmask1; x = top1 | bottom1; itype lowmask2 = (itype(1U) << (bits - shift)) - 1; itype bottom2 = x & lowmask2; bottom2 = unxorshift(bottom2, bits - shift, shift); bottom2 &= lowmask1; return top1 | bottom2; } /* * Rotate left and right. * * In ideal world, compilers would spot idiomatic rotate code and convert it * to a rotate instruction. Of course, opinions vary on what the correct * idiom is and how to spot it. For clang, sometimes it generates better * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM. */ template inline itype rotl(itype value, bitcount_t rot) { constexpr bitcount_t bits = sizeof(itype) * 8; constexpr bitcount_t mask = bits - 1; #if PCG_USE_ZEROCHECK_ROTATE_IDIOM return rot ? (value << rot) | (value >> (bits - rot)) : value; #else return (value << rot) | (value >> ((- rot) & mask)); #endif } template inline itype rotr(itype value, bitcount_t rot) { constexpr bitcount_t bits = sizeof(itype) * 8; constexpr bitcount_t mask = bits - 1; #if PCG_USE_ZEROCHECK_ROTATE_IDIOM return rot ? (value >> rot) | (value << (bits - rot)) : value; #else return (value >> rot) | (value << ((- rot) & mask)); #endif } /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes * to properly recognizing idiomatic rotate code, so for we also provide * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. * (I hope that these compilers get better so that this code can die.) * * These overloads will be preferred over the general template code above. */ #if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__) inline uint8_t rotr(uint8_t value, bitcount_t rot) { asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); return value; } inline uint16_t rotr(uint16_t value, bitcount_t rot) { asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); return value; } inline uint32_t rotr(uint32_t value, bitcount_t rot) { asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); return value; } #if __x86_64__ inline uint64_t rotr(uint64_t value, bitcount_t rot) { asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); return value; } #endif // __x86_64__ #endif // PCG_USE_INLINE_ASM /* * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of * 32-bit integers with seed data, but sometimes we want to produce * larger or smaller integers. * * The following code handles this annoyance. * * uneven_copy will copy an array of 32-bit ints to an array of larger or * smaller ints (actually, the code is general it only needing forward * iterators). The copy is identical to the one that would be performed if * we just did memcpy on a standard little-endian machine, but works * regardless of the endian of the machine (or the weirdness of the ints * involved). * * generate_to initializes an array of integers using a SeedSeq * object. It is given the size as a static constant at compile time and * tries to avoid memory allocation. If we're filling in 32-bit constants * we just do it directly. If we need a separate buffer and it's small, * we allocate it on the stack. Otherwise, we fall back to heap allocation. * Ugh. * * generate_one produces a single value of some integral type using a * SeedSeq object. */ /* uneven_copy helper, case where destination ints are less than 32 bit. */ template SrcIter uneven_copy_impl( SrcIter src_first, DestIter dest_first, DestIter dest_last, std::true_type) { typedef typename std::iterator_traits::value_type src_t; typedef typename std::iterator_traits::value_type dest_t; constexpr bitcount_t SRC_SIZE = sizeof(src_t); constexpr bitcount_t DEST_SIZE = sizeof(dest_t); constexpr bitcount_t DEST_BITS = DEST_SIZE * 8; constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE; size_t count = 0; src_t value; while (dest_first != dest_last) { if ((count++ % SCALE) == 0) value = *src_first++; // Get more bits else value >>= DEST_BITS; // Move down bits *dest_first++ = dest_t(value); // Truncates, ignores high bits. } return src_first; } /* uneven_copy helper, case where destination ints are more than 32 bit. */ template SrcIter uneven_copy_impl( SrcIter src_first, DestIter dest_first, DestIter dest_last, std::false_type) { typedef typename std::iterator_traits::value_type src_t; typedef typename std::iterator_traits::value_type dest_t; constexpr auto SRC_SIZE = sizeof(src_t); constexpr auto SRC_BITS = SRC_SIZE * 8; constexpr auto DEST_SIZE = sizeof(dest_t); constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE; while (dest_first != dest_last) { dest_t value(0UL); unsigned int shift = 0; for (size_t i = 0; i < SCALE; ++i) { value |= dest_t(*src_first++) << shift; shift += SRC_BITS; } *dest_first++ = value; } return src_first; } /* uneven_copy, call the right code for larger vs. smaller */ template inline SrcIter uneven_copy(SrcIter src_first, DestIter dest_first, DestIter dest_last) { typedef typename std::iterator_traits::value_type src_t; typedef typename std::iterator_traits::value_type dest_t; constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t); return uneven_copy_impl(src_first, dest_first, dest_last, std::integral_constant{}); } /* generate_to, fill in a fixed-size array of integral type using a SeedSeq * (actually works for any random-access iterator) */ template inline void generate_to_impl(SeedSeq&& generator, DestIter dest, std::true_type) { generator.generate(dest, dest+size); } template void generate_to_impl(SeedSeq&& generator, DestIter dest, std::false_type) { typedef typename std::iterator_traits::value_type dest_t; constexpr auto DEST_SIZE = sizeof(dest_t); constexpr auto GEN_SIZE = sizeof(uint32_t); constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE; constexpr size_t FROM_ELEMS = GEN_IS_SMALLER ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE) : (size + (GEN_SIZE / DEST_SIZE) - 1) / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); // this odd code ^^^^^^^^^^^^^^^^^ is work-around for // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 if (FROM_ELEMS <= 1024) { uint32_t buffer[FROM_ELEMS]; generator.generate(buffer, buffer+FROM_ELEMS); uneven_copy(buffer, dest, dest+size); } else { uint32_t* buffer = (uint32_t*) malloc(GEN_SIZE * FROM_ELEMS); generator.generate(buffer, buffer+FROM_ELEMS); uneven_copy(buffer, dest, dest+size); free(buffer); } } template inline void generate_to(SeedSeq&& generator, DestIter dest) { typedef typename std::iterator_traits::value_type dest_t; constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t); generate_to_impl(std::forward(generator), dest, std::integral_constant{}); } /* generate_one, produce a value of integral type using a SeedSeq * (optionally, we can have it produce more than one and pick which one * we want) */ template inline UInt generate_one(SeedSeq&& generator) { UInt result[N]; generate_to(std::forward(generator), result); return result[i]; } template auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) -> typename RngType::result_type { typedef typename RngType::result_type rtype; rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound) % upper_bound; for (;;) { rtype r = rng() - RngType::min(); if (r >= threshold) return r % upper_bound; } } template void shuffle(Iter from, Iter to, RandType&& rng) { typedef typename std::iterator_traits::difference_type delta_t; auto count = to - from; while (count > 1) { delta_t chosen(bounded_rand(rng, count)); --count; --to; using std::swap; swap(*(from+chosen), *to); } } /* * Although std::seed_seq is useful, it isn't everything. Often we want to * initialize a random-number generator some other way, such as from a random * device. * * Technically, it does not meet the requirements of a SeedSequence because * it lacks some of the rarely-used member functions (some of which would * be impossible to provide). However the C++ standard is quite specific * that actual engines only called the generate method, so it ought not to be * a problem in practice. */ template class seed_seq_from { private: RngType rng_; typedef uint_least32_t result_type; public: template seed_seq_from(Args&&... args) : rng_(std::forward(args)...) { // Nothing (else) to do... } template void generate(Iter start, Iter finish) { for (auto i = start; i != finish; ++i) *i = result_type(rng_()); } constexpr size_t size() const { return (sizeof(typename RngType::result_type) > sizeof(result_type) && RngType::max() > ~size_t(0UL)) ? ~size_t(0UL) : size_t(RngType::max()); } }; /* * Sometimes you might want a distinct seed based on when the program * was compiled. That way, a particular instance of the program will * behave the same way, but when recompiled it'll produce a different * value. */ template struct static_arbitrary_seed { private: static constexpr IntType fnv(IntType hash, const char* pos) { return *pos == '\0' ? hash : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1)); } public: static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)), __DATE__ __TIME__ __FILE__); }; // Sometimes, when debugging or testing, it's handy to be able print the name // of a (in human-readable form). This code allows the idiom: // // cout << printable_typename() // // to print out my_foo_type_t (or its concrete type if it is a synonym) template struct printable_typename {}; template std::ostream& operator<<(std::ostream& out, printable_typename) { const char *implementation_typename = typeid(T).name(); #ifdef __GNUC__ int status; const char* pretty_name = abi::__cxa_demangle(implementation_typename, NULL, NULL, &status); if (status == 0) out << pretty_name; free((void*) pretty_name); if (status == 0) return out; #endif out << implementation_typename; return out; } } // namespace pcg_extras #endif // PCG_EXTRAS_HPP_INCLUDED namespace pcg_detail { using namespace pcg_extras; /* * The LCG generators need some constants to function. This code lets you * look up the constant by *type*. For example * * default_multiplier::multiplier() * * gives you the default multipler for 32-bit integers. We use the name * of the constant and not a generic word like value to allow these classes * to be used as mixins. */ template struct default_multiplier { // Not defined for an arbitrary type }; template struct default_increment { // Not defined for an arbitrary type }; #define PCG_DEFINE_CONSTANT(type, what, kind, constant) \ template <> \ struct what ## _ ## kind { \ static constexpr type kind() { \ return constant; \ } \ }; PCG_DEFINE_CONSTANT(uint8_t, default, multiplier, 141U) PCG_DEFINE_CONSTANT(uint8_t, default, increment, 77U) PCG_DEFINE_CONSTANT(uint16_t, default, multiplier, 12829U) PCG_DEFINE_CONSTANT(uint16_t, default, increment, 47989U) PCG_DEFINE_CONSTANT(uint32_t, default, multiplier, 747796405U) PCG_DEFINE_CONSTANT(uint32_t, default, increment, 2891336453U) PCG_DEFINE_CONSTANT(uint64_t, default, multiplier, 6364136223846793005ULL) PCG_DEFINE_CONSTANT(uint64_t, default, increment, 1442695040888963407ULL) PCG_DEFINE_CONSTANT(pcg128_t, default, multiplier, PCG_128BIT_CONSTANT(2549297995355413924ULL,4865540595714422341ULL)) PCG_DEFINE_CONSTANT(pcg128_t, default, increment, PCG_128BIT_CONSTANT(6364136223846793005ULL,1442695040888963407ULL)) /* * Each PCG generator is available in four variants, based on how it applies * the additive constant for its underlying LCG; the variations are: * * single stream - all instances use the same fixed constant, thus * the RNG always somewhere in same sequence * mcg - adds zero, resulting in a single stream and reduced * period * specific stream - the constant can be changed at any time, selecting * a different random sequence * unique stream - the constant is based on the memory addresss of the * object, thus every RNG has its own unique sequence * * This variation is provided though mixin classes which define a function * value called increment() that returns the nesessary additive constant. */ /* * unique stream */ template class unique_stream { protected: static constexpr bool is_mcg = false; // Is never called, but is provided for symmetry with specific_stream void set_stream(...) { abort(); } public: typedef itype state_type; constexpr itype increment() const { return itype(reinterpret_cast(this) | 1); } constexpr itype stream() const { return increment() >> 1; } static constexpr bool can_specify_stream = false; static constexpr size_t streams_pow2() { return (sizeof(itype) < sizeof(size_t) ? sizeof(itype) : sizeof(size_t))*8 - 1u; } protected: constexpr unique_stream() = default; }; /* * no stream (mcg) */ template class no_stream { protected: static constexpr bool is_mcg = true; // Is never called, but is provided for symmetry with specific_stream void set_stream(...) { abort(); } public: typedef itype state_type; static constexpr itype increment() { return 0; } static constexpr bool can_specify_stream = false; static constexpr size_t streams_pow2() { return 0u; } protected: constexpr no_stream() = default; }; /* * single stream/sequence (oneseq) */ template class oneseq_stream : public default_increment { protected: static constexpr bool is_mcg = false; // Is never called, but is provided for symmetry with specific_stream void set_stream(...) { abort(); } public: typedef itype state_type; static constexpr itype stream() { return default_increment::increment() >> 1; } static constexpr bool can_specify_stream = false; static constexpr size_t streams_pow2() { return 0u; } protected: constexpr oneseq_stream() = default; }; /* * specific stream */ template class specific_stream { protected: static constexpr bool is_mcg = false; itype inc_ = default_increment::increment(); public: typedef itype state_type; typedef itype stream_state; constexpr itype increment() const { return inc_; } itype stream() { return inc_ >> 1; } void set_stream(itype specific_seq) { inc_ = (specific_seq << 1) | 1; } static constexpr bool can_specify_stream = true; static constexpr size_t streams_pow2() { return (sizeof(itype)*8) - 1u; } protected: specific_stream() = default; specific_stream(itype specific_seq) : inc_((specific_seq << 1) | itype(1U)) { // Nothing (else) to do. } }; /* * This is where it all comes together. This function joins together three * mixin classes which define * - the LCG additive constant (the stream) * - the LCG multiplier * - the output function * in addition, we specify the type of the LCG state, and the result type, * and whether to use the pre-advance version of the state for the output * (increasing instruction-level parallelism) or the post-advance version * (reducing register pressure). * * Given the high level of parameterization, the code has to use some * template-metaprogramming tricks to handle some of the suble variations * involved. */ template , typename multiplier_mixin = default_multiplier > class engine : protected output_mixin, public stream_mixin, protected multiplier_mixin { protected: itype state_; struct can_specify_stream_tag {}; struct no_specifiable_stream_tag {}; using stream_mixin::increment; using multiplier_mixin::multiplier; public: typedef xtype result_type; typedef itype state_type; static constexpr size_t period_pow2() { return sizeof(state_type)*8 - 2*stream_mixin::is_mcg; } // It would be nice to use std::numeric_limits for these, but // we can't be sure that it'd be defined for the 128-bit types. static constexpr result_type min() { return result_type(0UL); } static constexpr result_type max() { return ~result_type(0UL); } protected: itype bump(itype state) { return state * multiplier() + increment(); } itype base_generate() { return state_ = bump(state_); } itype base_generate0() { itype old_state = state_; state_ = bump(state_); return old_state; } public: result_type operator()() { if (output_previous) return this->output(base_generate0()); else return this->output(base_generate()); } result_type operator()(result_type upper_bound) { return bounded_rand(*this, upper_bound); } protected: static itype advance(itype state, itype delta, itype cur_mult, itype cur_plus); static itype distance(itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask = ~itype(0U)); itype distance(itype newstate, itype mask = ~itype(0U)) const { return distance(state_, newstate, multiplier(), increment(), mask); } public: void advance(itype delta) { state_ = advance(state_, delta, this->multiplier(), this->increment()); } void backstep(itype delta) { advance(-delta); } void discard(itype delta) { advance(delta); } bool wrapped() { if (stream_mixin::is_mcg) { // For MCGs, the low order two bits never change. In this // implementation, we keep them fixed at 3 to make this test // easier. return state_ == 3; } else { return state_ == 0; } } engine(itype state = itype(0xcafef00dd15ea5e5ULL)) : state_(this->is_mcg ? state|state_type(3U) : bump(state + this->increment())) { // Nothing else to do. } // This function may or may not exist. It thus has to be a template // to use SFINAE; users don't have to worry about its template-ness. template engine(itype state, typename sm::stream_state stream_seed) : stream_mixin(stream_seed), state_(this->is_mcg ? state|state_type(3U) : bump(state + this->increment())) { // Nothing else to do. } template engine(SeedSeq&& seedSeq, typename std::enable_if< !stream_mixin::can_specify_stream && !std::is_convertible::value && !std::is_convertible::value, no_specifiable_stream_tag>::type = {}) : engine(generate_one(std::forward(seedSeq))) { // Nothing else to do. } template engine(SeedSeq&& seedSeq, typename std::enable_if< stream_mixin::can_specify_stream && !std::is_convertible::value && !std::is_convertible::value, can_specify_stream_tag>::type = {}) : engine(generate_one(seedSeq), generate_one(seedSeq)) { // Nothing else to do. } template void seed(Args&&... args) { new (this) engine(std::forward(args)...); } template friend bool operator==(const engine&, const engine&); template friend itype1 operator-(const engine&, const engine&); template friend std::basic_ostream& operator<<(std::basic_ostream& out, const engine&); template friend std::basic_istream& operator>>(std::basic_istream& in, engine& rng); }; template std::basic_ostream& operator<<(std::basic_ostream& out, const engine& rng) { auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); auto space = out.widen(' '); auto orig_fill = out.fill(); out << rng.multiplier() << space << rng.increment() << space << rng.state_; out.flags(orig_flags); out.fill(orig_fill); return out; } template std::basic_istream& operator>>(std::basic_istream& in, engine& rng) { auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); itype multiplier, increment, state; in >> multiplier >> increment >> state; if (!in.fail()) { bool good = true; if (multiplier != rng.multiplier()) { good = false; } else if (rng.can_specify_stream) { rng.set_stream(increment >> 1); } else if (increment != rng.increment()) { good = false; } if (good) { rng.state_ = state; } else { in.clear(std::ios::failbit); } } in.flags(orig_flags); return in; } template itype engine::advance( itype state, itype delta, itype cur_mult, itype cur_plus) { // The method used here is based on Brown, "Random Number Generation // with Arbitrary Stride,", Transactions of the American Nuclear // Society (Nov. 1994). The algorithm is very similar to fast // exponentiation. // // Even though delta is an unsigned integer, we can pass a // signed integer to go backwards, it just goes "the long way round". constexpr itype ZERO = 0u; // itype may be a non-trivial types, so constexpr itype ONE = 1u; // we define some ugly constants. itype acc_mult = 1; itype acc_plus = 0; while (delta > ZERO) { if (delta & ONE) { acc_mult *= cur_mult; acc_plus = acc_plus*cur_mult + cur_plus; } cur_plus = (cur_mult+ONE)*cur_plus; cur_mult *= cur_mult; delta >>= 1; } return acc_mult * state + acc_plus; } template itype engine::distance( itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask) { constexpr itype ONE = 1u; // itype could be weird, so use constant itype the_bit = stream_mixin::is_mcg ? itype(4u) : itype(1u); itype distance = 0u; while ((cur_state & mask) != (newstate & mask)) { if ((cur_state & the_bit) != (newstate & the_bit)) { cur_state = cur_state * cur_mult + cur_plus; distance |= the_bit; } assert((cur_state & the_bit) == (newstate & the_bit)); the_bit <<= 1; cur_plus = (cur_mult+ONE)*cur_plus; cur_mult *= cur_mult; } return stream_mixin::is_mcg ? distance >> 2 : distance; } template itype operator-(const engine& lhs, const engine& rhs) { if (lhs.multiplier() != rhs.multiplier() || lhs.increment() != rhs.increment()) throw std::logic_error("incomparable generators"); return rhs.distance(lhs.state_); } template bool operator==(const engine& lhs, const engine& rhs) { return (lhs.multiplier() == rhs.multiplier()) && (lhs.increment() == rhs.increment()) && (lhs.state_ == rhs.state_); } template inline bool operator!=(const engine& lhs, const engine& rhs) { return !operator==(lhs,rhs); } template class output_mixin, bool output_previous = (sizeof(itype) <= 8)> using oneseq_base = engine, output_previous, oneseq_stream >; template class output_mixin, bool output_previous = (sizeof(itype) <= 8)> using unique_base = engine, output_previous, unique_stream >; template class output_mixin, bool output_previous = (sizeof(itype) <= 8)> using setseq_base = engine, output_previous, specific_stream >; template class output_mixin, bool output_previous = (sizeof(itype) <= 8)> using mcg_base = engine, output_previous, no_stream >; /* * OUTPUT FUNCTIONS. * * These are the core of the PCG generation scheme. They specify how to * turn the base LCG's internal state into the output value of the final * generator. * * They're implemented as mixin classes. * * All of the classes have code that is written to allow it to be applied * at *arbitrary* bit sizes, although in practice they'll only be used at * standard sizes supported by C++. */ /* * XSH RS -- high xorshift, followed by a random shift * * Fast. A good performer. */ template struct xsh_rs_mixin { static xtype output(itype internal) { constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); constexpr bitcount_t sparebits = bits - xtypebits; constexpr bitcount_t opbits = sparebits-5 >= 64 ? 5 : sparebits-4 >= 32 ? 4 : sparebits-3 >= 16 ? 3 : sparebits-2 >= 4 ? 2 : sparebits-1 >= 1 ? 1 : 0; constexpr bitcount_t mask = (1 << opbits) - 1; constexpr bitcount_t maxrandshift = mask; constexpr bitcount_t topspare = opbits; constexpr bitcount_t bottomspare = sparebits - topspare; constexpr bitcount_t xshift = topspare + (xtypebits+maxrandshift)/2; bitcount_t rshift = opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; internal ^= internal >> xshift; xtype result = xtype(internal >> (bottomspare - maxrandshift + rshift)); return result; } }; /* * XSH RR -- high xorshift, followed by a random rotate * * Fast. A good performer. Slightly better statistically than XSH RS. */ template struct xsh_rr_mixin { static xtype output(itype internal) { constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8); constexpr bitcount_t sparebits = bits - xtypebits; constexpr bitcount_t wantedopbits = xtypebits >= 128 ? 7 : xtypebits >= 64 ? 6 : xtypebits >= 32 ? 5 : xtypebits >= 16 ? 4 : 3; constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits : sparebits; constexpr bitcount_t amplifier = wantedopbits - opbits; constexpr bitcount_t mask = (1 << opbits) - 1; constexpr bitcount_t topspare = opbits; constexpr bitcount_t bottomspare = sparebits - topspare; constexpr bitcount_t xshift = (topspare + xtypebits)/2; bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; bitcount_t amprot = (rot << amplifier) & mask; internal ^= internal >> xshift; xtype result = xtype(internal >> bottomspare); result = rotr(result, amprot); return result; } }; /* * RXS -- random xorshift */ template struct rxs_mixin { static xtype output_rxs(itype internal) { constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*8); constexpr bitcount_t shift = bits - xtypebits; constexpr bitcount_t extrashift = (xtypebits - shift)/2; bitcount_t rshift = shift > 64+8 ? (internal >> (bits - 6)) & 63 : shift > 32+4 ? (internal >> (bits - 5)) & 31 : shift > 16+2 ? (internal >> (bits - 4)) & 15 : shift > 8+1 ? (internal >> (bits - 3)) & 7 : shift > 4+1 ? (internal >> (bits - 2)) & 3 : shift > 2+1 ? (internal >> (bits - 1)) & 1 : 0; internal ^= internal >> (shift + extrashift - rshift); xtype result = internal >> rshift; return result; } }; /* * RXS M XS -- random xorshift, mcg multiply, fixed xorshift * * The most statistically powerful generator, but all those steps * make it slower than some of the others. We give it the rottenest jobs. * * Because it's usually used in contexts where the state type and the * result type are the same, it is a permutation and is thus invertable. * We thus provide a function to invert it. This function is used to * for the "inside out" generator used by the extended generator. */ /* Defined type-based concepts for the multiplication step. They're actually * all derived by truncating the 128-bit, which was computed to be a good * "universal" constant. */ template struct mcg_multiplier { // Not defined for an arbitrary type }; template struct mcg_unmultiplier { // Not defined for an arbitrary type }; PCG_DEFINE_CONSTANT(uint8_t, mcg, multiplier, 217U) PCG_DEFINE_CONSTANT(uint8_t, mcg, unmultiplier, 105U) PCG_DEFINE_CONSTANT(uint16_t, mcg, multiplier, 62169U) PCG_DEFINE_CONSTANT(uint16_t, mcg, unmultiplier, 28009U) PCG_DEFINE_CONSTANT(uint32_t, mcg, multiplier, 277803737U) PCG_DEFINE_CONSTANT(uint32_t, mcg, unmultiplier, 2897767785U) PCG_DEFINE_CONSTANT(uint64_t, mcg, multiplier, 12605985483714917081ULL) PCG_DEFINE_CONSTANT(uint64_t, mcg, unmultiplier, 15009553638781119849ULL) PCG_DEFINE_CONSTANT(pcg128_t, mcg, multiplier, PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL)) PCG_DEFINE_CONSTANT(pcg128_t, mcg, unmultiplier, PCG_128BIT_CONSTANT(14422606686972528997ULL, 15009553638781119849ULL)) template struct rxs_m_xs_mixin { static xtype output(itype internal) { constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); constexpr bitcount_t opbits = xtypebits >= 128 ? 6 : xtypebits >= 64 ? 5 : xtypebits >= 32 ? 4 : xtypebits >= 16 ? 3 : 2; constexpr bitcount_t shift = bits - xtypebits; constexpr bitcount_t mask = (1 << opbits) - 1; bitcount_t rshift = opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; internal ^= internal >> (opbits + rshift); internal *= mcg_multiplier::multiplier(); xtype result = internal >> shift; result ^= result >> ((2U*xtypebits+2U)/3U); return result; } static itype unoutput(itype internal) { constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); constexpr bitcount_t opbits = bits >= 128 ? 6 : bits >= 64 ? 5 : bits >= 32 ? 4 : bits >= 16 ? 3 : 2; constexpr bitcount_t mask = (1 << opbits) - 1; internal = unxorshift(internal, bits, (2U*bits+2U)/3U); internal *= mcg_unmultiplier::unmultiplier(); bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; internal = unxorshift(internal, bits, opbits + rshift); return internal; } }; /* * RXS M -- random xorshift, mcg multiply */ template struct rxs_m_mixin { static xtype output(itype internal) { constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); constexpr bitcount_t opbits = xtypebits >= 128 ? 6 : xtypebits >= 64 ? 5 : xtypebits >= 32 ? 4 : xtypebits >= 16 ? 3 : 2; constexpr bitcount_t shift = bits - xtypebits; constexpr bitcount_t mask = (1 << opbits) - 1; bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : 0; internal ^= internal >> (opbits + rshift); internal *= mcg_multiplier::multiplier(); xtype result = internal >> shift; return result; } }; /* * XSL RR -- fixed xorshift (to low bits), random rotate * * Useful for 128-bit types that are split across two CPU registers. */ template struct xsl_rr_mixin { static xtype output(itype internal) { constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); constexpr bitcount_t sparebits = bits - xtypebits; constexpr bitcount_t wantedopbits = xtypebits >= 128 ? 7 : xtypebits >= 64 ? 6 : xtypebits >= 32 ? 5 : xtypebits >= 16 ? 4 : 3; constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits : sparebits; constexpr bitcount_t amplifier = wantedopbits - opbits; constexpr bitcount_t mask = (1 << opbits) - 1; constexpr bitcount_t topspare = sparebits; constexpr bitcount_t bottomspare = sparebits - topspare; constexpr bitcount_t xshift = (topspare + xtypebits) / 2; bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; bitcount_t amprot = (rot << amplifier) & mask; internal ^= internal >> xshift; xtype result = xtype(internal >> bottomspare); result = rotr(result, amprot); return result; } }; /* * XSL RR RR -- fixed xorshift (to low bits), random rotate (both parts) * * Useful for 128-bit types that are split across two CPU registers. * If you really want an invertable 128-bit RNG, I guess this is the one. */ template struct halfsize_trait {}; template <> struct halfsize_trait { typedef uint64_t type; }; template <> struct halfsize_trait { typedef uint32_t type; }; template <> struct halfsize_trait { typedef uint16_t type; }; template <> struct halfsize_trait { typedef uint8_t type; }; template struct xsl_rr_rr_mixin { typedef typename halfsize_trait::type htype; static itype output(itype internal) { constexpr bitcount_t htypebits = bitcount_t(sizeof(htype) * 8); constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); constexpr bitcount_t sparebits = bits - htypebits; constexpr bitcount_t wantedopbits = htypebits >= 128 ? 7 : htypebits >= 64 ? 6 : htypebits >= 32 ? 5 : htypebits >= 16 ? 4 : 3; constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits : sparebits; constexpr bitcount_t amplifier = wantedopbits - opbits; constexpr bitcount_t mask = (1 << opbits) - 1; constexpr bitcount_t topspare = sparebits; constexpr bitcount_t xshift = (topspare + htypebits) / 2; bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask : 0; bitcount_t amprot = (rot << amplifier) & mask; internal ^= internal >> xshift; htype lowbits = htype(internal); lowbits = rotr(lowbits, amprot); htype highbits = htype(internal >> topspare); bitcount_t rot2 = lowbits & mask; bitcount_t amprot2 = (rot2 << amplifier) & mask; highbits = rotr(highbits, amprot2); return (itype(highbits) << topspare) ^ itype(lowbits); } }; /* * XSH -- fixed xorshift (to high bits) * * You shouldn't use this at 64-bits or less. */ template struct xsh_mixin { static xtype output(itype internal) { constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); constexpr bitcount_t sparebits = bits - xtypebits; constexpr bitcount_t topspare = 0; constexpr bitcount_t bottomspare = sparebits - topspare; constexpr bitcount_t xshift = (topspare + xtypebits) / 2; internal ^= internal >> xshift; xtype result = internal >> bottomspare; return result; } }; /* * XSL -- fixed xorshift (to low bits) * * You shouldn't use this at 64-bits or less. */ template struct xsl_mixin { inline xtype output(itype internal) { constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * 8); constexpr bitcount_t bits = bitcount_t(sizeof(itype) * 8); constexpr bitcount_t sparebits = bits - xtypebits; constexpr bitcount_t topspare = sparebits; constexpr bitcount_t bottomspare = sparebits - topspare; constexpr bitcount_t xshift = (topspare + xtypebits) / 2; internal ^= internal >> xshift; xtype result = internal >> bottomspare; return result; } }; /* ---- End of Output Functions ---- */ template struct inside_out : private baseclass { inside_out() = delete; typedef typename baseclass::result_type result_type; typedef typename baseclass::state_type state_type; static_assert(sizeof(result_type) == sizeof(state_type), "Require a RNG whose output function is a permutation"); static bool external_step(result_type& randval, size_t i) { state_type state = baseclass::unoutput(randval); state = state * baseclass::multiplier() + baseclass::increment() + state_type(i*2); result_type result = baseclass::output(state); randval = result; state_type zero = baseclass::is_mcg ? state & state_type(3U) : state_type(0U); return result == zero; } static bool external_advance(result_type& randval, size_t i, result_type delta, bool forwards = true) { state_type state = baseclass::unoutput(randval); state_type mult = baseclass::multiplier(); state_type inc = baseclass::increment() + state_type(i*2); state_type zero = baseclass::is_mcg ? state & state_type(3U) : state_type(0U); state_type dist_to_zero = baseclass::distance(state, zero, mult, inc); bool crosses_zero = forwards ? dist_to_zero <= delta : (-dist_to_zero) <= delta; if (!forwards) delta = -delta; state = baseclass::advance(state, delta, mult, inc); randval = baseclass::output(state); return crosses_zero; } }; template class extended : public baseclass { public: typedef typename baseclass::state_type state_type; typedef typename baseclass::result_type result_type; typedef inside_out insideout; static constexpr size_t table_size = 1UL << table_pow2; result_type data_[table_size]; private: static constexpr bitcount_t rtypebits = sizeof(result_type)*8; static constexpr bitcount_t stypebits = sizeof(state_type)*8; static constexpr bitcount_t tick_limit_pow2 = 64U; static constexpr size_t table_shift = stypebits - table_pow2; static constexpr state_type table_mask = (state_type(1U) << table_pow2) - state_type(1U); static constexpr bool may_tick = (advance_pow2 < stypebits) && (advance_pow2 < tick_limit_pow2); static constexpr size_t tick_shift = stypebits - advance_pow2; static constexpr state_type tick_mask = may_tick ? state_type( (uint64_t(1) << (advance_pow2*may_tick)) - 1) // ^-- stupidity to appease GCC warnings : ~state_type(0U); static constexpr bool may_tock = stypebits < tick_limit_pow2; PCG_NOINLINE void advance_table(); PCG_NOINLINE void advance_table(state_type delta, bool isForwards = true); result_type& get_extended_value() { state_type state = this->state_; if (kdd && baseclass::is_mcg) { // The low order bits of an MCG are constant, so drop them. state >>= 2; } size_t index = kdd ? state & table_mask : state >> table_shift; if (may_tick) { bool tick = kdd ? (state & tick_mask) == state_type(0u) : (state >> tick_shift) == state_type(0u); if (tick) advance_table(); } if (may_tock) { bool tock = state == state_type(0u); if (tock) advance_table(); } return data_[index]; } public: static constexpr size_t period_pow2() { return baseclass::period_pow2() + table_size*extvalclass::period_pow2(); } __attribute__((always_inline)) result_type operator()() { result_type rhs = get_extended_value(); result_type lhs = this->baseclass::operator()(); return lhs ^ rhs; } result_type operator()(result_type upper_bound) { return bounded_rand(*this, upper_bound); } void set(result_type wanted) { result_type& rhs = get_extended_value(); result_type lhs = this->baseclass::operator()(); rhs = lhs ^ wanted; } void advance(state_type distance, bool forwards = true); void backstep(state_type distance) { advance(distance, false); } extended(const result_type* data) : baseclass() { datainit(data); } extended(const result_type* data, state_type seed) : baseclass(seed) { datainit(data); } // This function may or may not exist. It thus has to be a template // to use SFINAE; users don't have to worry about its template-ness. template extended(const result_type* data, state_type seed, typename bc::stream_state stream_seed) : baseclass(seed, stream_seed) { datainit(data); } extended() : baseclass() { selfinit(); } extended(state_type seed) : baseclass(seed) { selfinit(); } // This function may or may not exist. It thus has to be a template // to use SFINAE; users don't have to worry about its template-ness. template extended(state_type seed, typename bc::stream_state stream_seed) : baseclass(seed, stream_seed) { selfinit(); } private: void selfinit(); void datainit(const result_type* data); public: template::value && !std::is_convertible::value>::type> extended(SeedSeq&& seedSeq) : baseclass(seedSeq) { generate_to(seedSeq, data_); } template void seed(Args&&... args) { new (this) extended(std::forward(args)...); } template friend bool operator==(const extended&, const extended&); template friend std::basic_ostream& operator<<(std::basic_ostream& out, const extended&); template friend std::basic_istream& operator>>(std::basic_istream& in, extended&); }; template void extended::datainit( const result_type* data) { for (size_t i = 0; i < table_size; ++i) data_[i] = data[i]; } template void extended::selfinit() { // We need to fill the extended table with something, and we have // very little provided data, so we use the base generator to // produce values. Although not ideal (use a seed sequence, folks!), // unexpected correlations are mitigated by // - using XOR differences rather than the number directly // - the way the table is accessed, its values *won't* be accessed // in the same order the were written. // - any strange correlations would only be apparent if we // were to backstep the generator so that the base generator // was generating the same values again result_type xdiff = baseclass::operator()() - baseclass::operator()(); for (size_t i = 0; i < table_size; ++i) { data_[i] = baseclass::operator()() ^ xdiff; } } template bool operator==(const extended& lhs, const extended& rhs) { auto& base_lhs = static_cast(lhs); auto& base_rhs = static_cast(rhs); return base_lhs == base_rhs && !memcmp((void*) lhs.data_, (void*) rhs.data_, sizeof(lhs.data_)); } template inline bool operator!=(const extended& lhs, const extended& rhs) { return lhs != rhs; } template std::basic_ostream& operator<<(std::basic_ostream& out, const extended& rng) { auto orig_flags = out.flags(std::ios_base::dec | std::ios_base::left); auto space = out.widen(' '); auto orig_fill = out.fill(); out << rng.multiplier() << space << rng.increment() << space << rng.state_; for (const auto& datum : rng.data_) out << space << datum; out.flags(orig_flags); out.fill(orig_fill); return out; } template std::basic_istream& operator>>(std::basic_istream& in, extended& rng) { extended new_rng; auto& base_rng = static_cast(new_rng); in >> base_rng; if (in.fail()) return in; auto orig_flags = in.flags(std::ios_base::dec | std::ios_base::skipws); for (auto& datum : new_rng.data_) { in >> datum; if (in.fail()) goto bail; } rng = new_rng; bail: in.flags(orig_flags); return in; } template void extended::advance_table() { bool carry = false; for (size_t i = 0; i < table_size; ++i) { if (carry) { carry = insideout::external_step(data_[i],i+1); } bool carry2 = insideout::external_step(data_[i],i+1); carry = carry || carry2; } } template void extended::advance_table( state_type delta, bool isForwards) { typedef typename baseclass::state_type base_state_t; typedef typename extvalclass::state_type ext_state_t; constexpr bitcount_t basebits = sizeof(base_state_t)*8; constexpr bitcount_t extbits = sizeof(ext_state_t)*8; static_assert(basebits <= extbits || advance_pow2 > 0, "Current implementation might overflow its carry"); base_state_t carry = 0; for (size_t i = 0; i < table_size; ++i) { base_state_t total_delta = carry + delta; ext_state_t trunc_delta = ext_state_t(total_delta); if (basebits > extbits) { carry = total_delta >> extbits; } else { carry = 0; } carry += insideout::external_advance(data_[i],i+1, trunc_delta, isForwards); } } template void extended::advance( state_type distance, bool forwards) { static_assert(kdd, "Efficient advance is too hard for non-kdd extension. " "For a weak advance, cast to base class"); state_type zero = baseclass::is_mcg ? this->state_ & state_type(3U) : state_type(0U); if (may_tick) { state_type ticks = distance >> (advance_pow2*may_tick); // ^-- stupidity to appease GCC // warnings state_type adv_mask = baseclass::is_mcg ? tick_mask << 2 : tick_mask; state_type next_advance_distance = this->distance(zero, adv_mask); if (!forwards) next_advance_distance = (-next_advance_distance) & tick_mask; if (next_advance_distance < (distance & tick_mask)) { ++ticks; } if (ticks) advance_table(ticks, forwards); } if (forwards) { if (may_tock && this->distance(zero) <= distance) advance_table(); baseclass::advance(distance); } else { if (may_tock && -(this->distance(zero)) <= distance) advance_table(state_type(1U), false); baseclass::advance(-distance); } } } // namespace pcg_detail namespace pcg_engines { using namespace pcg_detail; /* Predefined types for XSH RS */ typedef oneseq_base oneseq_xsh_rs_16_8; typedef oneseq_base oneseq_xsh_rs_32_16; typedef oneseq_base oneseq_xsh_rs_64_32; typedef oneseq_base oneseq_xsh_rs_128_64; typedef unique_base unique_xsh_rs_16_8; typedef unique_base unique_xsh_rs_32_16; typedef unique_base unique_xsh_rs_64_32; typedef unique_base unique_xsh_rs_128_64; typedef setseq_base setseq_xsh_rs_16_8; typedef setseq_base setseq_xsh_rs_32_16; typedef setseq_base setseq_xsh_rs_64_32; typedef setseq_base setseq_xsh_rs_128_64; typedef mcg_base mcg_xsh_rs_16_8; typedef mcg_base mcg_xsh_rs_32_16; typedef mcg_base mcg_xsh_rs_64_32; typedef mcg_base mcg_xsh_rs_128_64; /* Predefined types for XSH RR */ typedef oneseq_base oneseq_xsh_rr_16_8; typedef oneseq_base oneseq_xsh_rr_32_16; typedef oneseq_base oneseq_xsh_rr_64_32; typedef oneseq_base oneseq_xsh_rr_128_64; typedef unique_base unique_xsh_rr_16_8; typedef unique_base unique_xsh_rr_32_16; typedef unique_base unique_xsh_rr_64_32; typedef unique_base unique_xsh_rr_128_64; typedef setseq_base setseq_xsh_rr_16_8; typedef setseq_base setseq_xsh_rr_32_16; typedef setseq_base setseq_xsh_rr_64_32; typedef setseq_base setseq_xsh_rr_128_64; typedef mcg_base mcg_xsh_rr_16_8; typedef mcg_base mcg_xsh_rr_32_16; typedef mcg_base mcg_xsh_rr_64_32; typedef mcg_base mcg_xsh_rr_128_64; /* Predefined types for RXS M XS */ typedef oneseq_base oneseq_rxs_m_xs_8_8; typedef oneseq_base oneseq_rxs_m_xs_16_16; typedef oneseq_base oneseq_rxs_m_xs_32_32; typedef oneseq_base oneseq_rxs_m_xs_64_64; typedef oneseq_base oneseq_rxs_m_xs_128_128; typedef unique_base unique_rxs_m_xs_8_8; typedef unique_base unique_rxs_m_xs_16_16; typedef unique_base unique_rxs_m_xs_32_32; typedef unique_base unique_rxs_m_xs_64_64; typedef unique_base unique_rxs_m_xs_128_128; typedef setseq_base setseq_rxs_m_xs_8_8; typedef setseq_base setseq_rxs_m_xs_16_16; typedef setseq_base setseq_rxs_m_xs_32_32; typedef setseq_base setseq_rxs_m_xs_64_64; typedef setseq_base setseq_rxs_m_xs_128_128; // MCG versions don't make sense here, so aren't defined. /* Predefined types for XSL RR (only defined for "large" types) */ typedef oneseq_base oneseq_xsl_rr_64_32; typedef oneseq_base oneseq_xsl_rr_128_64; typedef unique_base unique_xsl_rr_64_32; typedef unique_base unique_xsl_rr_128_64; typedef setseq_base setseq_xsl_rr_64_32; typedef setseq_base setseq_xsl_rr_128_64; typedef mcg_base mcg_xsl_rr_64_32; typedef mcg_base mcg_xsl_rr_128_64; /* Predefined types for XSL RR RR (only defined for "large" types) */ typedef oneseq_base oneseq_xsl_rr_rr_64_64; typedef oneseq_base oneseq_xsl_rr_rr_128_128; typedef unique_base unique_xsl_rr_rr_64_64; typedef unique_base unique_xsl_rr_rr_128_128; typedef setseq_base setseq_xsl_rr_rr_64_64; typedef setseq_base setseq_xsl_rr_rr_128_128; // MCG versions don't make sense here, so aren't defined. /* Extended generators */ template using ext_std8 = extended; template using ext_std16 = extended; template using ext_std32 = extended; template using ext_std64 = extended; template using ext_oneseq_rxs_m_xs_32_32 = ext_std32; template using ext_mcg_xsh_rs_64_32 = ext_std32; template using ext_oneseq_xsh_rs_64_32 = ext_std32; template using ext_setseq_xsh_rr_64_32 = ext_std32; template using ext_mcg_xsl_rr_128_64 = ext_std64; template using ext_oneseq_xsl_rr_128_64 = ext_std64; template using ext_setseq_xsl_rr_128_64 = ext_std64; } // namespace pcg_engines typedef pcg_engines::setseq_xsh_rr_64_32 pcg32; typedef pcg_engines::oneseq_xsh_rr_64_32 pcg32_oneseq; typedef pcg_engines::unique_xsh_rr_64_32 pcg32_unique; typedef pcg_engines::mcg_xsh_rs_64_32 pcg32_fast; typedef pcg_engines::setseq_xsl_rr_128_64 pcg64; typedef pcg_engines::oneseq_xsl_rr_128_64 pcg64_oneseq; typedef pcg_engines::unique_xsl_rr_128_64 pcg64_unique; typedef pcg_engines::mcg_xsl_rr_128_64 pcg64_fast; typedef pcg_engines::setseq_rxs_m_xs_8_8 pcg8_once_insecure; typedef pcg_engines::setseq_rxs_m_xs_16_16 pcg16_once_insecure; typedef pcg_engines::setseq_rxs_m_xs_32_32 pcg32_once_insecure; typedef pcg_engines::setseq_rxs_m_xs_64_64 pcg64_once_insecure; typedef pcg_engines::setseq_xsl_rr_rr_128_128 pcg128_once_insecure; typedef pcg_engines::oneseq_rxs_m_xs_8_8 pcg8_oneseq_once_insecure; typedef pcg_engines::oneseq_rxs_m_xs_16_16 pcg16_oneseq_once_insecure; typedef pcg_engines::oneseq_rxs_m_xs_32_32 pcg32_oneseq_once_insecure; typedef pcg_engines::oneseq_rxs_m_xs_64_64 pcg64_oneseq_once_insecure; typedef pcg_engines::oneseq_xsl_rr_rr_128_128 pcg128_oneseq_once_insecure; // These two extended RNGs provide two-dimensionally equidistributed // 32-bit generators. pcg32_k2_fast occupies the same space as pcg64, // and can be called twice to generate 64 bits, but does not required // 128-bit math; on 32-bit systems, it's faster than pcg64 as well. typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,true> pcg32_k2; typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,true> pcg32_k2_fast; // These eight extended RNGs have about as much state as arc4random // // - the k variants are k-dimensionally equidistributed // - the c variants offer better crypographic security // // (just how good the cryptographic security is is an open question) typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,true> pcg32_k64; typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,true> pcg32_k64_oneseq; typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,true> pcg32_k64_fast; typedef pcg_engines::ext_setseq_xsh_rr_64_32<6,16,false> pcg32_c64; typedef pcg_engines::ext_oneseq_xsh_rs_64_32<6,32,false> pcg32_c64_oneseq; typedef pcg_engines::ext_mcg_xsh_rs_64_32<6,32,false> pcg32_c64_fast; typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,true> pcg64_k32; typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,true> pcg64_k32_oneseq; typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,true> pcg64_k32_fast; typedef pcg_engines::ext_setseq_xsl_rr_128_64<5,16,false> pcg64_c32; typedef pcg_engines::ext_oneseq_xsl_rr_128_64<5,128,false> pcg64_c32_oneseq; typedef pcg_engines::ext_mcg_xsl_rr_128_64<5,128,false> pcg64_c32_fast; // These eight extended RNGs have more state than the Mersenne twister // // - the k variants are k-dimensionally equidistributed // - the c variants offer better crypographic security // // (just how good the cryptographic security is is an open question) typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,true> pcg32_k1024; typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,true> pcg32_k1024_fast; typedef pcg_engines::ext_setseq_xsh_rr_64_32<10,16,false> pcg32_c1024; typedef pcg_engines::ext_oneseq_xsh_rs_64_32<10,32,false> pcg32_c1024_fast; typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,true> pcg64_k1024; typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,true> pcg64_k1024_fast; typedef pcg_engines::ext_setseq_xsl_rr_128_64<10,16,false> pcg64_c1024; typedef pcg_engines::ext_oneseq_xsl_rr_128_64<10,128,false> pcg64_c1024_fast; // These generators have an insanely huge period (2^524352), and is suitable // for silly party tricks, such as dumping out 64 KB ZIP files at an arbitrary // point in the future. [Actually, over the full period of the generator, it // will produce every 64 KB ZIP file 2^64 times!] typedef pcg_engines::ext_setseq_xsh_rr_64_32<14,16,true> pcg32_k16384; typedef pcg_engines::ext_oneseq_xsh_rs_64_32<14,32,true> pcg32_k16384_fast; #endif // PCG_RAND_HPP_INCLUDED /* Fixed-parameter 64-bit SplitMix generator; used for seeding. */ static uint64_t seed = 4242; static uint64_t next_seed() { uint64_t z = (seed += UINT64_C(0x9E3779B97F4A7C15)); z = (z ^ (z >> 30)) * UINT64_C(0xBF58476D1CE4E5B9); z = (z ^ (z >> 27)) * UINT64_C(0x94D049BB133111EB); return z ^ (z >> 31); } using namespace std; int main(int argc, char **argv) { if (argc != 2) { fprintf(stderr, "USAGE: %s N\n", argv[0]); exit(1); } const uint64_t n = strtoull(argv[1], NULL, 0); if (errno) { fprintf(stderr, "%s\n", strerror(errno)); exit(1); } // Two generators in the same state pcg64_k32 rng0, rng1; rng0.seed(1); rng1.seed(1); rng1.data_[0] ^= 1; // Flip one bit of state for(int i = 0; i < n; i++) { // Advance n times to uncorrelate rng0(); rng1(); } for(;;) { const uint64_t x = rng0(); fwrite(&x, sizeof x, 1, stdout); //printf("%016" PRIx64 "\n", x); const uint64_t y = rng1(); fwrite(&y, sizeof y, 1, stdout); //printf("%016" PRIx64 "\n", y); } }