1// +build ignore
2
3/*
4Redistribution and use in source and binary forms, with or without
5modification, are permitted provided that the following conditions are met:
6
7 * Redistributions of source code must retain the above copyright
8 notice, this list of conditions and the following disclaimer.
9
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13
14 * Neither the name of "The Computer Language Benchmarks Game" nor the
15 name of "The Computer Language Shootout Benchmarks" nor the names of
16 its contributors may be used to endorse or promote products derived
17 from this software without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29POSSIBILITY OF SUCH DAMAGE.
30*/
31
32#include <stdio.h>
33#include <string.h>
34#include <ctype.h>
35#include <stdlib.h>
36#include <glib.h>
37
38typedef struct stat_s stat_t;
39struct stat_s
40{
41 const gchar *key;
42 long stat;
43};
44
45#define MAX_ELM (8192 / sizeof (stat_t))
46
47static int
48generate_frequencies (int fl, char *buffer, long buflen,
49 GHashTable *ht, GTrashStack **ts, GPtrArray *roots, GStringChunk *sc)
50{
51 gchar *key;
52 long i;
53
54 if (fl > buflen) return 0;
55 if (fl == 0) return 0;
56
57 for (i = 0; i < buflen - fl + 1; ++i)
58 {
59 char nulled;
60 stat_t *stat;
61
62 nulled = buffer[i + fl];
63 buffer[i + fl] = '\0';
64
65 key = g_string_chunk_insert_const(sc, buffer + i);
66
67 stat = g_hash_table_lookup(ht, key);
68 if (!stat)
69 {
70 stat = g_trash_stack_pop(ts);
71 if (!stat)
72 {
73 int j;
74
75 stat = malloc(sizeof (stat_t) * MAX_ELM);
76 g_ptr_array_add(roots, stat);
77
78 for (j = 1; j < MAX_ELM; ++j)
79 g_trash_stack_push(ts, stat + j);
80 }
81 stat->stat = 1;
82 stat->key = key;
83
84 g_hash_table_insert(ht, key, stat);
85 }
86 else
87 stat->stat++;
88
89 buffer[i + fl] = nulled;
90 }
91
92 return buflen - fl + 1;
93}
94
95static int
96cmp_func(gconstpointer a, gconstpointer b)
97{
98 const stat_t *left = a;
99 const stat_t *right = b;
100
101 return right->stat - left->stat;
102}
103
104static void
105sorted_list(gpointer key, gpointer value, gpointer user_data)
106{
107 stat_t *data = value;
108 GList **lst = user_data;
109
110 *lst = g_list_insert_sorted(*lst, data, cmp_func);
111}
112
113static void
114display_stat(gpointer data, gpointer user_data)
115{
116 long *total = user_data;
117 stat_t *st = data;
118
119 printf("%s %.3f\n", st->key, 100 * (float) st->stat / *total);
120}
121
122void
123write_frequencies (int fl, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots)
124{
125 GStringChunk *sc;
126 GHashTable *ht;
127 GList *lst;
128 long total;
129
130 ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */);
131 sc = g_string_chunk_new(buflen);
132 lst = NULL;
133
134 total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc);
135
136 if (!total) goto on_error;
137
138 g_hash_table_foreach(ht, sorted_list, &lst);
139 g_list_foreach(lst, display_stat, &total);
140 g_list_free(lst);
141
142 on_error:
143 g_hash_table_destroy(ht);
144 g_string_chunk_free(sc);
145}
146
147void
148write_count (char *searchFor, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots)
149{
150 GStringChunk *sc;
151 GHashTable *ht;
152 stat_t *result;
153 GList *lst;
154 long total;
155 long fl;
156
157 fl = strlen(searchFor);
158
159 ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */);
160 sc = g_string_chunk_new(buflen);
161 lst = NULL;
162 result = NULL;
163
164 total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc);
165
166 if (!total) goto on_error;
167
168 result = g_hash_table_lookup(ht, searchFor);
169
170 on_error:
171 printf("%ld\t%s\n", result ? result->stat : 0, searchFor);
172
173 g_hash_table_destroy(ht);
174 g_string_chunk_free(sc);
175}
176
177int
178main ()
179{
180 char buffer[4096];
181 GTrashStack *ts;
182 GPtrArray *roots;
183 GString *stuff;
184 gchar *s;
185 int len;
186
187 roots = g_ptr_array_new();
188 ts = NULL;
189
190 while (fgets(buffer, sizeof (buffer), stdin))
191 if (strncmp(buffer, ">THREE", 6) == 0)
192 break;
193
194 stuff = g_string_new(NULL);
195
196 while (fgets(buffer, sizeof (buffer), stdin))
197 {
198 size_t sz;
199
200 if (buffer[0] == '>')
201 break;
202
203 sz = strlen(buffer);
204 if (buffer[sz - 1] == '\n')
205 --sz;
206
207 stuff = g_string_append_len(stuff, buffer, sz);
208 }
209
210 stuff = g_string_ascii_up(stuff);
211 len = stuff->len;
212 s = g_string_free(stuff, FALSE);
213
214 write_frequencies(1, s, len, &ts, roots);
215 printf("\n");
216 write_frequencies(2, s, len, &ts, roots);
217 printf("\n");
218 write_count("GGT", s, len, &ts, roots);
219 write_count("GGTA", s, len, &ts, roots);
220 write_count("GGTATT", s, len, &ts, roots);
221 write_count("GGTATTTTAATT", s, len, &ts, roots);
222 write_count("GGTATTTTAATTTATAGT", s, len, &ts, roots);
223
224 free(s);
225
226 g_ptr_array_foreach(roots, (GFunc)free, NULL);
227 g_ptr_array_free(roots, TRUE);
228
229 return 0;
230}
View as plain text