...

Text file src/golang.org/x/exp/shootout/fasta.c

Documentation: golang.org/x/exp/shootout

     1// +build ignore
     2
     3/*
     4Redistribution and use in source and binary forms, with or without
     5modification, are permitted provided that the following conditions are met:
     6
     7    * Redistributions of source code must retain the above copyright
     8    notice, this list of conditions and the following disclaimer.
     9
    10    * Redistributions in binary form must reproduce the above copyright
    11    notice, this list of conditions and the following disclaimer in the
    12    documentation and/or other materials provided with the distribution.
    13
    14    * Neither the name of "The Computer Language Benchmarks Game" nor the
    15    name of "The Computer Language Shootout Benchmarks" nor the names of
    16    its contributors may be used to endorse or promote products derived
    17    from this software without specific prior written permission.
    18
    19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    20AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    21IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    22ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    23LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    24CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    25SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    26INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    27CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    28ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    29POSSIBILITY OF SUCH DAMAGE.
    30*/
    31
    32/*
    33 * http://shootout.alioth.debian.org/u32/program.php?test=fasta&lang=gcc&id=3
    34 */
    35
    36/*  The Computer Language Benchmarks Game
    37 *  http://shootout.alioth.debian.org/
    38 *
    39 *  contributed by Petr Prokhorenkov
    40 */
    41
    42#include <stdio.h>
    43#include <stdlib.h>
    44#include <string.h>
    45
    46#ifndef fwrite_unlocked
    47// not available on OS X 
    48#define fwrite_unlocked fwrite
    49#define fputc_unlocked fputc
    50#define fputs_unlocked fputs
    51#endif
    52
    53#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
    54#define unlikely(x) __builtin_expect((x), 0)
    55
    56#define IM 139968
    57#define IA 3877
    58#define IC 29573
    59
    60#define LINE_LEN 60
    61#define LOOKUP_SIZE 4096
    62#define LOOKUP_SCALE ((float)(LOOKUP_SIZE - 1))
    63
    64typedef unsigned random_t;
    65
    66void
    67random_init(random_t *random) {
    68    *random = 42;
    69}
    70
    71// Special version with result rescaled to LOOKUP_SCALE.
    72static inline
    73float
    74random_next_lookup(random_t *random) {
    75    *random = (*random*IA + IC)%IM;
    76
    77    return (*random)*(LOOKUP_SCALE/IM);
    78}
    79
    80struct amino_acid {
    81   char sym;
    82   float prob;
    83   float cprob_lookup;
    84};
    85
    86void
    87repeat(const char *alu, const char *title, int n) {
    88    int len = strlen(alu);
    89    char buffer[len + LINE_LEN];
    90    int pos = 0;
    91
    92    memcpy(buffer, alu, len);
    93    memcpy(buffer + len, alu, LINE_LEN);
    94
    95    fputs_unlocked(title, stdout);
    96    while (n > 0) {
    97        int bytes = n > LINE_LEN ? LINE_LEN : n;
    98
    99        fwrite_unlocked(buffer + pos, bytes, 1, stdout);
   100        pos += bytes;
   101        if (pos > len) {
   102            pos -= len;
   103        }
   104        fputc_unlocked('\n', stdout);
   105        n -= bytes;
   106    }
   107}
   108
   109/*
   110 * Lookup table contains mapping from real values to cumulative
   111 * probabilities. Careful selection of table size allows lookup
   112 * virtually in constant time.
   113 *
   114 * All cumulative probabilities are rescaled to LOOKUP_SCALE,
   115 * this allows to save one multiplication operation on each iteration
   116 * in randomize().
   117 */
   118
   119void *
   120fill_lookup(struct amino_acid **lookup, struct amino_acid *amino_acid, int amino_acid_size) {
   121    float p = 0;
   122    int i, j;
   123
   124    for (i = 0; i < amino_acid_size; i++) {
   125        p += amino_acid[i].prob;
   126        amino_acid[i].cprob_lookup = p*LOOKUP_SCALE;
   127    }
   128
   129    // Prevent rounding error.
   130    amino_acid[amino_acid_size - 1].cprob_lookup = LOOKUP_SIZE - 1;
   131
   132    for (i = 0, j = 0; i < LOOKUP_SIZE; i++) {
   133        while (amino_acid[j].cprob_lookup < i) {
   134            j++;
   135        }
   136        lookup[i] = &amino_acid[j];
   137    }
   138
   139    return 0;
   140}
   141
   142void
   143randomize(struct amino_acid *amino_acid, int amino_acid_size,
   144        const char *title, int n, random_t *rand) {
   145    struct amino_acid *lookup[LOOKUP_SIZE];
   146    char line_buffer[LINE_LEN + 1];
   147    int i, j;
   148
   149    line_buffer[LINE_LEN] = '\n';
   150
   151    fill_lookup(lookup, amino_acid, amino_acid_size);
   152
   153    fputs_unlocked(title, stdout);
   154
   155    for (i = 0, j = 0; i < n; i++, j++) {
   156        if (j == LINE_LEN) {
   157            fwrite_unlocked(line_buffer, LINE_LEN + 1, 1, stdout);
   158            j = 0;
   159        }
   160
   161        float r = random_next_lookup(rand);
   162        struct amino_acid *u = lookup[(short)r];
   163        while (unlikely(u->cprob_lookup < r)) {
   164            ++u;
   165        }
   166        line_buffer[j] = u->sym;
   167    }
   168    line_buffer[j] = '\n';
   169    fwrite_unlocked(line_buffer, j + 1, 1, stdout);
   170}
   171
   172struct amino_acid amino_acid[] = {
   173   { 'a', 0.27 },
   174   { 'c', 0.12 },
   175   { 'g', 0.12 },
   176   { 't', 0.27 },
   177
   178   { 'B', 0.02 },
   179   { 'D', 0.02 },
   180   { 'H', 0.02 },
   181   { 'K', 0.02 },
   182   { 'M', 0.02 },
   183   { 'N', 0.02 },
   184   { 'R', 0.02 },
   185   { 'S', 0.02 },
   186   { 'V', 0.02 },
   187   { 'W', 0.02 },
   188   { 'Y', 0.02 },
   189};
   190
   191struct amino_acid homo_sapiens[] = {
   192   { 'a', 0.3029549426680 },
   193   { 'c', 0.1979883004921 },
   194   { 'g', 0.1975473066391 },
   195   { 't', 0.3015094502008 },
   196};
   197
   198static const char alu[] =
   199   "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG"
   200   "GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA"
   201   "GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA"
   202   "AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT"
   203   "CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC"
   204   "CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG"
   205   "CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
   206
   207int
   208main(int argc, const char **argv) {
   209    int n = argc > 1 ? atoi( argv[1] ) : 512;
   210    random_t rand;
   211
   212    random_init(&rand);
   213
   214    repeat(alu, ">ONE Homo sapiens alu\n", n*2);
   215    randomize(amino_acid, ARRAY_SIZE(amino_acid),
   216            ">TWO IUB ambiguity codes\n", n*3, &rand);
   217    randomize(homo_sapiens, ARRAY_SIZE(homo_sapiens),
   218            ">THREE Homo sapiens frequency\n", n*5, &rand);
   219
   220    return 0;
   221}

View as plain text