/* ** samsim.c - sampling simulator */ #include #include #include #include typedef int bool; #define MAX_RANDOM_VALUE (0x7FFFFFFF) static void initrandom() { struct timeval tv; gettimeofday(&tv, NULL); srandom(tv.tv_sec ^ tv.tv_usec); }/*initrandom*/ /* Select a random value R uniformly distributed in 0 < R < 1 */ static double random_fract(void) { long z; /* random() can produce endpoint values, try again if so */ do { z = random(); } while (z <= 0 || z >= MAX_RANDOM_VALUE); return (double) z / (double) MAX_RANDOM_VALUE; } /* ** data structure for (modified) Algorithm S from Knuth 3.4.2 */ typedef struct { long N; /* number of tuples, known in advance */ int n; /* sample size */ long t; /* current tuple number */ int m; /* tuples selected so far */ } SamplerData; typedef SamplerData *Sampler; static void Sampler_Init(Sampler bs, long N, int samplesize); static bool Sampler_HasMore(Sampler bs); static long Sampler_Next(Sampler bs); /* ** Sampler_Init -- prepare for random sampling */ static void Sampler_Init(Sampler bs, long N, int samplesize) { bs->N = N; /* table size */ bs->n = samplesize; bs->t = 0; /* tuples scanned so far */ bs->m = 0; /* tuples selected so far */ } static bool Sampler_HasMore(Sampler bs) { return (bs->t < bs->N) && (bs->m < bs->n); } static long Sampler_Next(Sampler bs) { long K = bs->N - bs->t; /* remaining tuples */ int k = bs->n - bs->m; /* tuples still to sample */ double p; /* probability to skip the next tuple */ double V; /* random */ /* Assert(Sampler_HasMore(bs)); */ if (k >= K) { /* need all the rest */ bs->t += 1; bs->m += 1; return bs->t - 1; } p = 1.0 - (double) k / (double) K; V = random_fract(); while (V < p) { bs->t += 1; K -= 1; /* ** Assert(K > 0) ** because we startet with K > k > 0, ** and when K == k, the loop terminates */ p *= 1.0 - (double) k / (double) K; } /* select */ bs->t += 1; bs->m += 1; return bs->t - 1; } static void usage() { fprintf(stderr, "usage: samsim c B n\n" "where c = tuples/page\n" " B = # of pages\n" " n = sample size\n"); }/*usage*/ static void dumpstats(long *stat, long cnt) { long i; for (i = 1; i <= cnt; ++i) { printf("%ld%c", stat[i], (i < cnt) ? '\t' : '\n'); }/*for*/ }/*dumpstats*/ static int samsim(long c, long B, long n) { SamplerData s; long oldblock = -1; long blockhits = 0; long maxhits = -1; long *stat = calloc(c + 1, sizeof(long)); if (stat == NULL) { fprintf(stderr, "cannot allocate %ld numbers\n", c); return 2; }/*if*/ Sampler_Init(&s, c * B, n); if (!Sampler_HasMore(&s)) { fprintf(stderr, "empty sample\n"); return 3; }/*if*/ while (Sampler_HasMore(&s)) { long t = Sampler_Next(&s); long blocknr = t / c; if (blocknr == oldblock) { ++blockhits; } else { if (oldblock >= 0) { stat[blockhits] += 1; if (blockhits > maxhits) maxhits = blockhits; }/*if*/ oldblock = blocknr; blockhits = 1; }/*else*/ }/*while*/ stat[blockhits] += 1; dumpstats(stat, maxhits); free(stat); return 0; /* success */ }/*samsim*/ int main(int argc, char* argv[]) { long c, B, n; if (argc != 4) { usage(); exit(1); }/*if*/ c = atol(argv[1]); B = atol(argv[2]); n = atol(argv[3]); if ((c <= 0) || (B <= 0) || (n <= 0)) { usage(); exit(1); }/*if*/ initrandom(); return samsim(c, B, n); }/*main*/