/* Written in 2019 by Sebastiano Vigna (vigna@acm.org) Easily computes the state of a 128-bit PCG generator from its output, making it trivial to predict all its future output. To the extent possible under law, the author has dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. Compile with: g++ -std=c++17 -O3 predpcg128.cpp -o predpcg128 -l ntl [-l pthread] See . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; using namespace NTL; using namespace std::chrono; typedef __uint128_t uint128_t; // ------- // PCG code Copyright by Melissa O'Neill typedef __uint128_t pcg128_t; #define PCG_128BIT_CONSTANT(high,low) ((((pcg128_t)high) << 64) + low) struct pcg_state_128 { pcg128_t state; }; #define PCG_DEFAULT_MULTIPLIER_128 \ PCG_128BIT_CONSTANT(2549297995355413924ULL,4865540595714422341ULL) #define PCG_DEFAULT_INCREMENT_128 \ PCG_128BIT_CONSTANT(6364136223846793005ULL,1442695040888963407ULL) static inline void pcg_oneseq_128_step_r(struct pcg_state_128* rng) { rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + PCG_DEFAULT_INCREMENT_128; } static inline uint64_t pcg_output_xsh_rs_128_64(pcg128_t state) { return (uint64_t)(((state >> 43u) ^ state) >> ((state >> 124u) + 45u)); } static inline uint64_t pcg_oneseq_128_xsh_rs_64_random_r(struct pcg_state_128* rng) { pcg_oneseq_128_step_r(rng); return pcg_output_xsh_rs_128_64(rng->state); } struct pcg_state_128 rng; // ------- // ------- // I/O commodity functions // Conversion for large integers in hexadecimal form, too. static ZZ strtoZZ(const char * const s) { if (s[0] == '0' && tolower(s[1]) == 'x') { ZZ x = conv(0); for(size_t i = 2; i < strlen(s); i++) { x *= 16; if (!isdigit(s[i]) && (tolower(s[i]) < 'a' || tolower(s[i]) > 'f')) { cerr << "Invalid hexadecimal constant " << s << endl; abort(); } x += isdigit(s[i]) ? s[i] - '0' : tolower(s[i]) - 'a' + 10; } return x; } for(size_t i = 0; i < strlen(s); i++) if (!isdigit(s[i])) { cerr << "Invalid decimal constant " << s << endl; abort(); } return conv(s); } template <> ZZ NTL::conv<>(const uint128_t& x) { return conv(uint64_t(x >> 64)) << 64 | conv((uint64_t&)x); } template <> uint128_t NTL::conv<>(const ZZ& x) { return uint128_t(conv(x >> 64 & conv(0xFFFFFFFFFFFFFFFF))) << 64 | conv(x & conv(0xFFFFFFFFFFFFFFFF)); } template string hex(T); template <> string hex<>(ZZ a) { // Hexadecimal representation of a string s = ""; while(a != 0) { s += "0123456789abcdef"[a % 16]; a /= 16; } reverse(s.begin(), s.end()); return s; } template <> string hex<>(uint128_t a) { return hex(conv(a)); } template <> string hex<>(uint64_t a) { return hex(conv(a)); } // ------- // Precomputed constants const ZZ a = conv(PCG_DEFAULT_MULTIPLIER_128); // Multiplier const ZZ c = conv(PCG_DEFAULT_INCREMENT_128); // Constant const ZZ s = power(conv(10), 100); // A googol (a large constant) const ZZ _1 = - conv(1); const ZZ m = conv(1) << (128); // Modulo /* Given an output, a guess g for the highest 4 bits and a guess h for the following 15 - g bits, uncovers the upper 83 - g bits. */ static uint128_t inline uncover(const uint64_t output, uint64_t g, uint64_t h) { // Given bits [109 + g..128) in h, we recover also bits [66 + g..109 + g) and place these bits into x const uint128_t x = (((uint128_t)(output >> 21) ^ h) << (21 + (45 + g)) | (uint128_t)h << (109 + g)); // We now use the highest bits of the uncovered bits to uncover bits [45 + g...66 + g) return x ^ ((x >> (45 + g + 43) ^ output) & ((1ULL << 21) - 1)) << (45 + g); } /** Given two outputs and two correct guesses for the high bits, recovers the lower bits. */ static pair recover(mat_ZZ& mat, const uint64_t o0, const uint64_t o1, uint64_t g0, uint64_t g1, uint64_t h0, uint64_t h1) { const uint128_t c0 = uncover(o0, g0, h0); const uint128_t c1 = uncover(o1, g1, h1); for(int i = 0; i < 4; i++) for(int j = 0; j < 4; j++) mat[i][j] = ZZ::zero(); // Standard setup to solve the modular equation a * (c0 + x0) + c = c1 + x1 (mod m) for(int i = 0; i < 2; i++) mat[i][i + 1] = _1; // super-diagonal of -1's mat[0][0] = s * a; mat[1][0] = -s; mat[2][0] = s * ((-c - a * conv(c0) + conv(c1)) % m); mat[2][3] = s; mat[3][0] = s * m; ZZ det2; // LLL reduction with delta = 0.999999999 LLL(det2, mat, 99999999, 100000000); return pair(c0 | conv(mat[3][1]), c1 | conv(mat[3][2])); } int main(int argc, char* argv[]) { if (argc != 3 && argc != 4) { cerr << "USAGE: " << argv[0] << " STATE NTHREADS [START]" << endl; cerr << "Given an initial 128-bit state (in decimal or hexadecimal)," << endl; cerr << "generates outputs using a 128-bit PCG generator and recovers" << endl; cerr << "the initial state from the output. START (default: 9) tunes" << endl; cerr << "the recovery strategy: for example, 0 recovers the initial" << endl; cerr << "state using just three outputs, but it requires some time;" << endl; cerr << "15 recovers the initial state after a few hundred outputs, but" << endl; cerr << "it ends in less than a second. NTHREADS is the number of " << endl; cerr << "parallel threads (e.g., your number of CPU cores)." << endl; return 1; } /* Lower threshold for state recovery. You can make this lower to reduce the number of outputs used for recovery, but the search will be slower. In that case, better use several parallel threads. The internal loop will be executed 4^(16 - start) - 2^(17 - start) + 1 times, and in particular 4294836225 times when start = 0 (e.g., about a day of single-thread computation time if want to be able to recover the state at the first attempt). If you set start = 15, you will need to scan a few hundred outputs, but you will recover the initial states in tenths of a second. */ const int nthreads = atoi(argv[2]); const int start = argc == 4 ? strtoll(argv[3], NULL, 0) : 9; if (start < 0 || start >= 16) { cerr << "START must be in the interval [0..16)" << endl; return 1; } SetNumThreads(nthreads); rng.state = conv(strtoZZ(argv[1])); // Outputs uint64_t o0, o1, o2; o1 = pcg_oneseq_128_xsh_rs_64_random_r(&rng); printf("0x%016" PRIx64 "\n", o1); o2 = pcg_oneseq_128_xsh_rs_64_random_r(&rng); printf("0x%016" PRIx64 "\n", o2); auto begin = high_resolution_clock::now(); bool found = false; for(int count = 1;; count++) { o0 = o1; o1 = o2; o2 = pcg_oneseq_128_xsh_rs_64_random_r(&rng); printf("0x%016" PRIx64 "\n", o2); // We try to find only states whose upper four bits are at least start. for(uint64_t g0 = 16; g0-- != start;) { // Guess upper 4 bits of the first state const uint64_t m0 = g0 << (15 - g0); const uint64_t r0 = (1U << (15 - g0)); for(uint64_t g1 = 16; g1-- != start;) { // Guess upper 4 bits of the second state const uint64_t m1 = g1 << (15 - g1); const uint64_t r1 = (1U << (15 - g1)); NTL_EXEC_RANGE(r0 * r1, first, last) for(uint64_t h = first; h < last; h++) { // Guess following 15 - g0 bits of the first state and following 15 - g1 bits of the second state const uint64_t h0 = h / r1; const uint64_t h1 = h % r1; mat_ZZ mat; mat.SetDims(4, 4); auto&& [s0, s1] = recover(mat, o0, o1, g0, g1, h0 | m0, h1 | m1); // Recover the lower bits of both states // Check that the recovered states are right if (pcg_output_xsh_rs_128_64(s0) == o0 && pcg_output_xsh_rs_128_64(s1) == o1 && s0 * PCG_DEFAULT_MULTIPLIER_128 + PCG_DEFAULT_INCREMENT_128 == s1 && pcg_output_xsh_rs_128_64(PCG_DEFAULT_MULTIPLIER_128 * s1 + PCG_DEFAULT_INCREMENT_128) == o2) { // Roll back state const uint128_t a_ = conv(InvMod(a, m)); while(count-- != 0) s0 = (s0 - PCG_DEFAULT_INCREMENT_128) * a_; auto end = high_resolution_clock::now(); printf("Elapsed: %.3fs\n", duration_cast(end - begin).count() / 1E9); cout << endl << "Recovered initial state: " << conv(s0) << " " << "(0x" << hex(s0) << ")" << endl; found = true; goto the_end; } } the_end: ; NTL_EXEC_RANGE_END if (found) exit(1); } } } return 0; }