...
1All tests on r45 or r70
2
3Aug 3 2009
4
5First version of fasta. Translation of fasta.c, fetched from
6 http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4
7
8fasta -n 25000000
9 gcc -O2 fasta.c 5.98u 0.00s 6.01r
10 gccgo -O2 fasta.go 8.82u 0.02s 8.85r
11 6g fasta.go 13.50u 0.02s 13.53r
12 6g -B fata.go 12.99u 0.02s 13.02r
13
14Aug 4 2009
15[added timing.sh]
16
17# myrandom:
18# hand-written optimization of integer division
19# use int32->float conversion
20fasta -n 25000000
21 # probably I/O library inefficiencies
22 gcc -O2 fasta.c 5.99u 0.00s 6.00r
23 gccgo -O2 fasta.go 8.82u 0.02s 8.85r
24 gc fasta 10.70u 0.00s 10.77r
25 gc_B fasta 10.09u 0.03s 10.12r
26
27reverse-complement < output-of-fasta-25000000
28 # we don't know - memory cache behavior?
29 gcc -O2 reverse-complement.c 2.04u 0.94s 10.54r
30 gccgo -O2 reverse-complement.go 6.54u 0.63s 7.17r
31 gc reverse-complement 6.55u 0.70s 7.26r
32 gc_B reverse-complement 6.32u 0.70s 7.10r
33
34nbody 50000000
35 # math.Sqrt needs to be in assembly; inlining is probably the other 50%
36 gcc -O2 nbody.c 21.61u 0.01s 24.80r
37 gccgo -O2 nbody.go 118.55u 0.02s 120.32r
38 gc nbody 100.84u 0.00s 100.85r
39 gc_B nbody 103.33u 0.00s 103.39r
40[
41hacked Sqrt in assembler
42 gc nbody 31.97u 0.00s 32.01r
43]
44
45binary-tree 15 # too slow to use 20
46 # memory allocation and garbage collection
47 gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r
48 gccgo -O2 binary-tree.go 1.69u 0.46s 2.15r
49 gccgo -O2 binary-tree-freelist.go 8.48u 0.00s 8.48r
50 gc binary-tree 9.60u 0.01s 9.62r
51 gc binary-tree-freelist 0.48u 0.01s 0.50r
52
53August 5, 2009
54
55fannkuch 12
56 # bounds checking is half the difference
57 # rest might be registerization
58 gcc -O2 fannkuch.c 60.09u 0.01s 60.32r
59 gccgo -O2 fannkuch.go 64.89u 0.00s 64.92r
60 gc fannkuch 124.59u 0.00s 124.67r
61 gc_B fannkuch 91.14u 0.00s 91.16r
62
63regex-dna 100000
64 # regexp code is slow on trivial regexp
65 gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.99r
66 gc regexp-dna 26.94u 0.18s 28.75r
67 gc_B regexp-dna 26.51u 0.09s 26.75r
68
69spectral-norm 5500
70 gcc -O2 spectral-norm.c -lm 11.54u 0.00s 11.55r
71 gccgo -O2 spectral-norm.go 12.20u 0.00s 12.23r
72 gc spectral-norm 50.23u 0.00s 50.36r
73 gc_B spectral-norm 49.69u 0.01s 49.83r
74 gc spectral-norm-parallel 24.47u 0.03s 11.05r # has shift >>1 not div /2
75 [using >>1 instead of /2 : gc gives 24.33u 0.00s 24.33r]
76
77August 6, 2009
78
79k-nucleotide 5000000
80 # string maps are slower than glib string maps
81 gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 k-nucleotide.c: 10.72u 0.01s 10.74r
82 gccgo -O2 k-nucleotide.go 21.64u 0.83s 22.78r
83 gc k-nucleotide 16.08u 0.06s 16.50r
84 gc_B k-nucleotide 17.32u 0.02s 17.37r
85
86mandelbrot 5500
87 # floating point code generator should use more registers
88 gcc -O2 mandelbrot.c 56.13u 0.02s 56.17r
89 gccgo -O2 mandelbrot.go 57.49u 0.01s 57.51r
90 gc mandelbrot 74.32u 0.00s 74.35r
91 gc_B mandelbrot 74.28u 0.01s 74.31r
92
93meteor 2100
94 # we don't know
95 gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r
96 gccgo -O2 meteor-contest.go 0.12u 0.00s 0.14r
97 gc meteor-contest 0.24u 0.00s 0.26r
98 gc_B meteor-contest 0.23u 0.00s 0.24r
99
100pidigits 10000
101 # bignum is slower than gmp
102 gcc -O2 pidigits.c -lgmp 2.60u 0.00s 2.62r
103 gc pidigits 77.69u 0.14s 78.18r
104 gc_B pidigits 74.26u 0.18s 75.41r
105 gc_B pidigits 68.48u 0.20s 69.31r # special case: no bounds checking in bignum
106
107August 7 2009
108
109# New gc does better division by powers of 2. Significant improvements:
110
111spectral-norm 5500
112 # floating point code generator should use more registers; possibly inline evalA
113 gcc -O2 spectral-norm.c -lm 11.50u 0.00s 11.50r
114 gccgo -O2 spectral-norm.go 12.02u 0.00s 12.02r
115 gc spectral-norm 23.98u 0.00s 24.00r # new time is 0.48 times old time, 52% faster
116 gc_B spectral-norm 23.71u 0.01s 23.72r # ditto
117 gc spectral-norm-parallel 24.04u 0.00s 6.26r # /2 put back. note: 4x faster (on r70, idle)
118
119k-nucleotide 1000000
120 # string maps are slower than glib string maps
121 gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.82u 0.04s 10.87r
122 gccgo -O2 k-nucleotide.go 22.73u 0.89s 23.63r
123 gc k-nucleotide 15.97u 0.03s 16.04r
124 gc_B k-nucleotide 15.86u 0.06s 15.93r # 8.5% faster, but probably due to weird cache effeccts in previous version
125
126pidigits 10000
127 # bignum is slower than gmp
128 gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.58r
129 gc pidigits 71.24u 0.04s 71.28r # 8.5% faster
130 gc_B pidigits 71.25u 0.03s 71.29r # 4% faster
131
132threadring 50000000
133 gcc -O2 threadring.c -lpthread 35.51u 160.21s 199.50r
134 gccgo -O2 threadring.go 90.33u 459.95s 448.03r
135 gc threadring 33.11u 0.00s 33.14r
136 GOMAXPROCS=4 gc threadring 114.48u 226.65s 371.59r
137 # change wait code to do <-make(chan int) instead of time.Sleep
138 gc threadring 28.41u 0.01s 29.35r
139 GOMAXPROCS=4 gc threadring 112.59u 232.83s 384.72r
140
141chameneos 6000000
142 gcc -O2 chameneosredux.c -lpthread 18.14u 276.52s 76.93r
143 gc chameneosredux 20.19u 0.01s 20.23r
144
145Aug 10 2009
146
147# new 6g with better fp registers, fast div and mod of integers
148# complete set of timings listed. significant changes marked ***
149
150fasta -n 25000000
151 # probably I/O library inefficiencies
152 gcc -O2 fasta.c 5.96u 0.00s 5.97r
153 gc fasta 10.59u 0.01s 10.61r
154 gc_B fasta 9.92u 0.02s 9.95r
155
156reverse-complement < output-of-fasta-25000000
157 # we don't know - memory cache behavior?
158 gcc -O2 reverse-complement.c 1.96u 1.56s 16.23r
159 gccgo -O2 reverse-complement.go 6.41u 0.62s 7.05r
160 gc reverse-complement 6.46u 0.70s 7.17r
161 gc_B reverse-complement 6.22u 0.72s 6.95r
162
163nbody 50000000
164 # math.Sqrt needs to be in assembly; inlining is probably the other 50%
165 gcc -O2 nbody.c 21.26u 0.01s 21.28r
166 gccgo -O2 nbody.go 116.68u 0.07s 116.80r
167 gc nbody 86.64u 0.01s 86.68r # -14%
168 gc_B nbody 85.72u 0.02s 85.77r # *** -17%
169
170binary-tree 15 # too slow to use 20
171 # memory allocation and garbage collection
172 gcc -O2 binary-tree.c -lm 0.87u 0.00s 0.87r
173 gccgo -O2 binary-tree.go 1.61u 0.47s 2.09r
174 gccgo -O2 binary-tree-freelist.go 0.00u 0.00s 0.01r
175 gc binary-tree 9.11u 0.01s 9.13r # *** -5%
176 gc binary-tree-freelist 0.47u 0.01s 0.48r
177
178fannkuch 12
179 # bounds checking is half the difference
180 # rest might be registerization
181 gcc -O2 fannkuch.c 59.92u 0.00s 59.94r
182 gccgo -O2 fannkuch.go 65.54u 0.00s 65.58r
183 gc fannkuch 123.98u 0.01s 124.04r
184 gc_B fannkuch 90.75u 0.00s 90.78r
185
186regex-dna 100000
187 # regexp code is slow on trivial regexp
188 gcc -O2 regex-dna.c -lpcre 0.91u 0.00s 0.92r
189 gc regex-dna 27.25u 0.02s 27.28r
190 gc_B regex-dna 29.51u 0.03s 29.55r
191
192spectral-norm 5500
193 # possibly inline evalA
194 gcc -O2 spectral-norm.c -lm 11.57u 0.00s 11.57r
195 gccgo -O2 spectral-norm.go 12.07u 0.01s 12.08r
196 gc spectral-norm 23.99u 0.00s 24.00r
197 gc_B spectral-norm 23.73u 0.00s 23.75r
198
199k-nucleotide 1000000
200 # string maps are slower than glib string maps
201 gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.63u 0.02s 10.69r
202 gccgo -O2 k-nucleotide.go 23.19u 0.91s 24.12r
203 gc k-nucleotide 16.73u 0.04s 16.78r # *** +5% (but this one seems to vary by more than that)
204 gc_B k-nucleotide 16.46u 0.04s 16.51r # *** +5%
205
206mandelbrot 16000
207 gcc -O2 mandelbrot.c 56.16u 0.00s 56.16r
208 gccgo -O2 mandelbrot.go 57.41u 0.01s 57.42r
209 gc mandelbrot 64.05u 0.02s 64.08r # *** -14%
210 gc_B mandelbrot 64.10u 0.02s 64.14r # *** -14%
211
212meteor 2100
213 # we don't know
214 gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r
215 gccgo -O2 meteor-contest.go 0.12u 0.00s 0.12r
216 gc meteor-contest 0.18u 0.00s 0.20r # *** -25%
217 gc_B meteor-contest 0.17u 0.00s 0.18r # *** -24%
218
219pidigits 10000
220 # bignum is slower than gmp
221 gcc -O2 pidigits.c -lgmp 2.57u 0.00s 2.57r
222 gc pidigits 71.82u 0.04s 71.89r
223 gc_B pidigits 71.84u 0.08s 71.98r
224
225threadring 50000000
226 gcc -O2 threadring.c -lpthread 30.91u 164.33s 204.57r
227 gccgo -O2 threadring.go 87.12u 460.04s 447.61r
228 gc threadring 38.55u 0.00s 38.56r # *** +16%
229
230chameneos 6000000
231 gcc -O2 chameneosredux.c -lpthread 17.93u 323.65s 88.47r
232 gc chameneosredux 21.72u 0.00s 21.73r
233
234August 10 2009
235
236# In-place versions for some bignum operations.
237pidigits 10000
238 gcc -O2 pidigits.c -lgmp 2.56u 0.00s 2.57r
239 gc pidigits 55.22u 0.04s 55.29r # *** -23%
240 gc_B pidigits 55.49u 0.02s 55.60r # *** -23%
241
242September 3 2009
243
244# New 6g inlines slices, has a few other tweaks.
245# Complete rerun. Significant changes marked.
246
247fasta -n 25000000
248 # probably I/O library inefficiencies
249 gcc -O2 fasta.c 5.96u 0.00s 5.96r
250 gc fasta 10.63u 0.02s 10.66r
251 gc_B fasta 9.92u 0.01s 9.94r
252
253reverse-complement < output-of-fasta-25000000
254 # we don't know - memory cache behavior?
255 gcc -O2 reverse-complement.c 1.92u 0.33s 2.93r
256 gccgo -O2 reverse-complement.go 6.76u 0.72s 7.58r # +5%
257 gc reverse-complement 6.59u 0.70s 7.29r # +2%
258 gc_B reverse-complement 5.57u 0.80s 6.37r # -10%
259
260nbody 50000000
261 # math.Sqrt needs to be in assembly; inlining is probably the other 50%
262 # also loop alignment appears to be critical
263 gcc -O2 nbody.c 21.28u 0.00s 21.28r
264 gccgo -O2 nbody.go 119.21u 0.00s 119.22r # +2%
265 gc nbody 109.72u 0.00s 109.78r # + 28% *****
266 gc_B nbody 85.90u 0.00s 85.91r
267
268binary-tree 15 # too slow to use 20
269 # memory allocation and garbage collection
270 gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r
271 gccgo -O2 binary-tree.go 1.88u 0.54s 2.42r # +17%
272 gccgo -O2 binary-tree-freelist.go 0.01u 0.01s 0.02r
273 gc binary-tree 8.94u 0.01s 8.96r # -2%
274 gc binary-tree-freelist 0.47u 0.01s 0.48r
275
276fannkuch 12
277 # bounds checking is half the difference
278 # rest might be registerization
279 gcc -O2 fannkuch.c 60.12u 0.00s 60.12r
280 gccgo -O2 fannkuch.go 92.62u 0.00s 92.66r # +41% ***
281 gc fannkuch 123.90u 0.00s 123.92r
282 gc_B fannkuch 89.71u 0.00s 89.74r # -1%
283
284regex-dna 100000
285 # regexp code is slow on trivial regexp
286 gcc -O2 regex-dna.c -lpcre 0.88u 0.00s 0.88r
287 gc regex-dna 25.77u 0.01s 25.79r # -5%
288 gc_B regex-dna 26.05u 0.02s 26.09r # -12% ***
289
290spectral-norm 5500
291 # possibly inline evalA
292 gcc -O2 spectral-norm.c -lm 11.51u 0.00s 11.51r
293 gccgo -O2 spectral-norm.go 11.95u 0.00s 11.96r
294 gc spectral-norm 24.23u 0.00s 24.23r
295 gc_B spectral-norm 23.83u 0.00s 23.84r
296
297k-nucleotide 1000000
298 # string maps are slower than glib string maps
299 gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.68u 0.04s 10.72r
300 gccgo -O2 k-nucleotide.go 23.03u 0.88s 23.92r
301 gc k-nucleotide 15.79u 0.05s 15.85r # -5% (but this one seems to vary by more than that)
302 gc_B k-nucleotide 17.88u 0.05s 17.95r # +8% (ditto)
303
304mandelbrot 16000
305 gcc -O2 mandelbrot.c 56.17u 0.02s 56.20r
306 gccgo -O2 mandelbrot.go 56.74u 0.02s 56.79r # -1%
307 gc mandelbrot 63.31u 0.01s 63.35r # -1%
308 gc_B mandelbrot 63.29u 0.00s 63.31r # -1%
309
310meteor 2100
311 # we don't know
312 gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r
313 gccgo -O2 meteor-contest.go 0.11u 0.00s 0.12r
314 gc meteor-contest 0.18u 0.00s 0.19r
315 gc_B meteor-contest 0.17u 0.00s 0.18r
316
317pidigits 10000
318 # bignum is slower than gmp
319 gcc -O2 pidigits.c -lgmp 2.56u 0.00s 2.57r
320 gc pidigits 55.87u 0.03s 55.91r
321 gc_B pidigits 55.93u 0.03s 55.99r
322
323# these tests are compared using real time, since they run multiple processors
324# accuracy probably low
325threadring 50000000
326 gcc -O2 threadring.c -lpthread 26.31u 164.69s 199.92r # -2%
327 gccgo -O2 threadring.go 87.90u 487.26s 472.81r # +6%
328 gc threadring 28.89u 0.00s 28.90r # -25% ***
329
330chameneos 6000000
331 gcc -O2 chameneosredux.c -lpthread 16.41u 296.91s 81.17r # -8%
332 gc chameneosredux 19.97u 0.00s 19.97r # -8%
333
334Sep 22, 2009
335
336# 6g inlines sliceslice in most cases.
337
338fasta -n 25000000
339 # probably I/O library inefficiencies
340 gc fasta 10.24u 0.00s 10.25r # -4%
341 gc_B fasta 9.68u 0.01s 9.69r # -3%
342
343reverse-complement < output-of-fasta-25000000
344 # we don't know - memory cache behavior?
345 gc reverse-complement 6.67u 0.69s 7.37r # +1%
346 gc_B reverse-complement 6.00u 0.64s 6.65r # +7%
347
348nbody -n 50000000
349 # math.Sqrt needs to be in assembly; inlining is probably the other 50%
350 # also loop alignment appears to be critical
351 gc nbody 86.27u 0.00s 86.29r # -21%
352 gc_B nbody 104.52u 0.00s 104.54r # +22%
353
354fannkuch 12
355 # bounds checking is half the difference
356 # rest might be registerization
357 gc fannkuch 128.36u 0.00s 128.37r # +4%
358 gc_B fannkuch 89.32u 0.00s 89.34r
359
360regex-dna 100000
361 # regexp code is slow on trivial regexp
362 gc regex-dna 24.82u 0.01s 24.86r # -4%
363 gc_B regex-dna 24.55u 0.01s 24.57r # -6%
364
365spectral-norm 5500
366 # possibly inline evalA
367 gc spectral-norm 24.05u 0.00s 24.07r # -1%
368 gc_B spectral-norm 23.60u 0.00s 23.65r # -1%
369
370k-nucleotide 1000000
371 # string maps are slower than glib string maps
372 gc k-nucleotide 17.84u 0.04s 17.89r # +13% but mysterious variation continues
373 gc_B k-nucleotide 15.56u 0.08s 15.65r # -13% (ditto)
374
375mandelbrot 16000
376 gc mandelbrot 64.08u 0.01s 64.11r # +1%
377 gc_B mandelbrot 64.04u 0.00s 64.05r # +1%
378
379pidigits 10000
380 # bignum is slower than gmp
381 gc pidigits 58.68u 0.02s 58.72r # +5%
382 gc_B pidigits 58.86u 0.05s 58.99r # +5%
383
384# these tests are compared using real time, since they run multiple processors
385# accuracy probably low
386threadring 50000000
387 gc threadring 32.70u 0.02s 32.77r # +13%
388
389chameneos 6000000
390 gc chameneosredux 26.62u 0.00s 26.63r # +13%
391
392Sep 24, 2009
393
394# Sqrt now in assembler for 6g.
395nbody -n 50000000
396 # remember, at least for 6g, alignment of loops may be important
397 gcc -O2 nbody.c 21.24u 0.00s 21.25r
398 gccgo -O2 nbody.go 121.03u 0.00s 121.04r
399 gc nbody 30.26u 0.00s 30.27r # -65% ***
400 gc_B nbody 30.20u 0.02s 30.22r # -72% ***
401
402Nov 13 2009
403
404# fix bug in regexp; take performance hit. good regexps will come in time.
405regex-dna 100000
406 gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.94r
407 gc regex-dna 29.78u 0.03s 29.83r
408 gc_B regex-dna 32.63u 0.03s 32.74r
409
410Nov 24 2009
411
412# Roger Peppe's rewrite of the benchmark
413chameneos 6000000
414 gcc -O2 chameneosredux.c -lpthread 18.00u 303.29s 83.64r
415 gc chameneosredux 12.10u 0.00s 12.10r # 2.22X faster
416
417Jan 6, 2010
418
419# Long-overdue update. All numbers included in this complete run.
420# Some programs (e.g. reverse-complement) rewritten for speed.
421# Regular expressions much faster in common cases (although still far behind PCRE)
422# Bignum stuff improved
423# Better (but sometimes slower) locking in channels.
424
425fasta -n 25000000
426 gcc -O2 fasta.c 5.99u 0.01s 6.00r
427 gc fasta 9.11u 0.00s 9.12r # -11%
428 gc_B fasta 8.60u 0.00s 8.62r # +12% ??
429
430reverse-complement < output-of-fasta-25000000
431 gcc -O2 reverse-complement.c 2.00u 0.80s 9.54r
432# gccgo -O2 reverse-complement.go 4.57u 0.35s 4.94r # 33% faster
433 gc reverse-complement 2.01u 0.38s 2.40r # 3.3X faster
434 gc_B reverse-complement 1.88u 0.36s 2.24r # 3.2X faster
435GOGC=off
436 gc reverse-complement 2.01u 0.35s 2.37r
437 gc_B reverse-complement 1.86u 0.32s 2.19r
438
439nbody -n 50000000
440 gcc -O2 nbody.c 21.28u 0.00s 21.31r
441 gccgo -O2 nbody.go 80.02u 0.00s 80.05r # 33% faster
442 gc nbody 30.13u 0.00s 30.13r
443 gc_B nbody 29.89u 0.01s 29.91r
444
445binary-tree 15 # too slow to use 20
446 gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r
447 gccgo -O2 binary-tree.go 4.82u 0.41s 5.24r # 2.5X slower
448 gc binary-tree 7.23u 0.01s 7.25r # # -19%
449 gc binary-tree-freelist 0.43u 0.00s 0.44r # -9%
450
451fannkuch 12
452 gcc -O2 fannkuch.c 60.17u 0.00s 60.17r
453 gccgo -O2 fannkuch.go 78.47u 0.01s 78.49r
454 gc fannkuch 128.86u 0.00s 128.96r
455 gc_B fannkuch 90.17u 0.00s 90.21r
456
457regex-dna 100000
458 gcc -O2 regex-dna.c -lpcre 0.90u 0.00s 0.92r
459 gc regex-dna 9.48u 0.01s 9.50r # 3.1X faster
460 gc_B regex-dna 9.08u 0.00s 9.10r # 3.6X faster
461
462spectral-norm 5500
463 gcc -O2 spectral-norm.c -lm 11.48u 0.00s 11.48r
464 gccgo -O2 spectral-norm.go 11.68u 0.00s 11.70r
465 gc spectral-norm 23.98u 0.00s 23.99r
466 gc_B spectral-norm 23.68u 0.00s 23.69r
467
468k-nucleotide 1000000
469 gcc -O2 k-nucleotide.c 10.85u 0.04s 10.90r
470 gccgo -O2 k-nucleotide.go 25.26u 0.87s 26.14r
471 gc k-nucleotide 15.28u 0.06s 15.37r # restored; mysterious variation continues
472 gc_B k-nucleotide 15.97u 0.03s 16.00r
473
474mandelbrot 16000
475 gcc -O2 mandelbrot.c 56.12u 0.01s 56.15r
476 gccgo -O2 mandelbrot.go 56.86u 0.01s 56.89r
477 gc mandelbrot 66.05u 0.00s 66.07r # -3%
478 gc_B mandelbrot 66.06u 0.00s 66.07r # -3%
479
480meteor 2100
481 gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r
482 gccgo -O2 meteor-contest.go 0.12u 0.00s 0.12r
483 gc meteor-contest 0.17u 0.00s 0.17r
484 gc_B meteor-contest 0.15u 0.00s 0.16r
485
486pidigits 10000
487 gcc -O2 pidigits.c -lgmp 2.57u 0.00s 2.59r
488 gc pidigits 38.27u 0.02s 38.30r # 1.5X faster
489 gc_B pidigits 38.27u 0.02s 38.31r # 1.5X faster
490
491threadring 50000000
492 gcc -O2 threadring.c 37.11u 170.59s 212.75r
493 gccgo -O2 threadring.go 89.67u 447.56s 442.55r # -6.5%
494 gc threadring 36.08u 0.04s 36.15r # +10%
495
496chameneos 6000000
497 gcc -O2 chameneosredux.c -lpthread 19.02u 331.08s 90.79r
498 gc chameneosredux 12.54u 0.00s 12.55r
499
500Oct 19, 2010
501
502# Another long-overdue update. Some of the code is new; parallel versions
503# of some are added. A few significant improvements.
504
505fasta -n 25000000
506 gcc -O2 fasta.c 4.92u 0.00s 4.93r
507 gccgo -O2 fasta.go 3.31u 0.00s 3.34r # new code
508 gc fasta 3.68u 0.00s 3.69r # 2.5X faster with no code
509 gc_B fasta 3.68u 0.00s 3.69r # 2.3X faster with no code
510
511reverse-complement < output-of-fasta-25000000
512 gcc -O2 reverse-complement.c 1.93u 0.81s 11.24r
513 gccgo -O2 reverse-complement.go 1.58u 0.43s 2.04r # first run with new code?
514 gc reverse-complement 1.84u 0.34s 2.20r # 10% faster
515 gc_B reverse-complement 1.85u 0.32s 2.18r
516
517nbody -n 50000000
518 gcc -O2 nbody.c 21.35u 0.00s 21.36r
519 gccgo -O2 nbody.go 21.62u 0.00s 21.66r # 3.7X faster - why??
520 gc nbody 29.78u 0.00s 29.79r
521 gc_B nbody 29.72u 0.00s 29.72r
522
523binary-tree 15 # too slow to use 20
524 gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.88r
525 gccgo -O2 binary-tree.go 4.05u 0.02s 4.08r # 28% faster
526 gccgo -O2 binary-tree-freelist 0.34u 0.08s 0.34r
527 gc binary-tree 5.94u 0.00s 5.95r # 20% faster
528 gc binary-tree-freelist 0.50u 0.01s 0.54r
529
530fannkuch 12
531 gcc -O2 fannkuch.c 60.45u 0.00s 60.45r
532 gccgo -O2 fannkuch.go 64.64u 0.00s 64.64r
533 gccgo -O2 fannkuch-parallel.go 115.63u 0.00s 31.58r
534 gc fannkuch 126.52u 0.04s 126.68r
535 gc fannkuch-parallel 238.82u 0.10s 65.93r # GOMAXPROCS=4
536 gc_B fannkuch 88.99u 0.00s 89.02r
537
538regex-dna 100000
539 gcc -O2 regex-dna.c -lpcre 0.89u 0.00s 0.89r
540 gc regex-dna 8.99u 0.02s 9.03r
541 gc regex-dna-parallel 8.94u 0.02s 3.68r # GOMAXPROCS=4
542 gc_B regex-dna 9.12u 0.00s 9.14r
543
544spectral-norm 5500
545 gcc -O2 spectral-norm.c -lm 11.55u 0.00s 11.57r
546 gccgo -O2 spectral-norm.go 11.73u 0.00s 11.75r
547 gc spectral-norm 23.74u 0.00s 23.79r
548 gc_B spectral-norm 24.49u 0.02s 24.54r
549
550k-nucleotide 1000000
551 gcc -O2 k-nucleotide.c 11.44u 0.06s 11.50r
552 gccgo -O2 k-nucleotide.go 8.65u 0.04s 8.71r
553 gccgo -O2 k-nucleotide-parallel.go 8.75u 0.03s 2.97r # set GOMAXPROCS=4
554 gc k-nucleotide 14.92u 0.05s 15.01r
555 gc k-nucleotide-parallel 16.96u 0.06s 6.53r # set GOMAXPROCS=4
556 gc_B k-nucleotide 15.97u 0.03s 16.08r
557
558mandelbrot 16000
559 gcc -O2 mandelbrot.c 56.32u 0.00s 56.35r
560 gccgo -O2 mandelbrot.go 55.62u 0.02s 55.77r
561 gc mandelbrot 64.85u 0.01s 64.94r
562 gc_B mandelbrot 65.02u 0.01s 65.14r
563
564meteor 2100
565 gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r
566 gccgo -O2 meteor-contest.go 0.10u 0.00s 0.11r
567 gc meteor-contest 0.17u 0.00s 0.18r
568 gc_B meteor-contest 0.16u 0.00s 0.16r
569
570pidigits 10000
571 gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.59r
572 gccgo -O2 pidigits.go 14.06u 0.01s 14.09r # first run?
573 gc pidigits 8.47u 0.05s 8.55r # 4.5X faster due to package big
574 gc_B pidigits 8.33u 0.01s 8.36r # 4.5X faster due to package big
575
576threadring 50000000
577 gcc -O2 threadring.c 28.18u 153.19s 186.47r
578 gccgo -O2 threadring.go 110.10u 516.48s 515.25r
579 gc threadring 40.39u 0.00s 40.40r
580
581chameneos 6000000
582 gcc -O2 chameneosredux.c -lpthread 18.20u 301.55s 83.10r
583 gccgo -O2 chameneosredux.go 52.22u 324.54s 201.21r
584 gc chameneosredux 13.52u 0.00s 13.54r
585
586Dec 14, 2010
587
588# Improved regex code (same algorithm) gets ~30%.
589
590regex-dna 100000
591 gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.78r
592 gc regex-dna 6.80u 0.00s 6.81r
593 gc regex-dna-parallel 6.82u 0.01s 2.75r
594 gc_B regex-dna 6.69u 0.02s 6.70r
595
596Feb 15, 2011
597
598# Improved GC, still single-threaded but more efficient
599
600fasta -n 25000000
601 gcc -O2 fasta.c 3.40u 0.00s 3.40r
602 gccgo -O2 fasta.go 3.51u 0.00s 3.50r
603 gc fasta 3.66u 0.01s 3.66r
604 gc_B fasta 3.66u 0.00s 3.66r
605
606reverse-complement < output-of-fasta-25000000
607 gcc -O2 reverse-complement.c 1.86u 1.29s 4.93r
608 gccgo -O2 reverse-complement.go 2.18u 0.41s 2.60r
609 gc reverse-complement 1.67u 0.48s 2.15r
610 gc_B reverse-complement 1.71u 0.45s 2.15r
611
612nbody -n 50000000
613 gcc -O2 -lm nbody.c 21.64u 0.00s 21.64r
614 gccgo -O2 nbody.go 21.46u 0.00s 21.45r
615 gc nbody 29.07u 0.00s 29.06r
616 gc_B nbody 31.61u 0.00s 31.61r
617
618binary-tree 15 # too slow to use 20
619 gcc -O2 binary-tree.c -lm 0.88u 0.00s 0.87r
620 gccgo -O2 binary-tree.go 2.74u 0.07s 2.81r
621 gccgo -O2 binary-tree-freelist.go 0.01u 0.00s 0.00r
622 gc binary-tree 4.22u 0.02s 4.24r
623 gc binary-tree-freelist 0.54u 0.02s 0.55r
624
625fannkuch 12
626 gcc -O2 fannkuch.c 57.64u 0.00s 57.64r
627 gccgo -O2 fannkuch.go 65.79u 0.00s 65.82r
628 gccgo -O2 fannkuch-parallel.go 160.91u 0.02s 43.90r
629 gc fannkuch 126.36u 0.03s 126.53r
630 gc fannkuch-parallel 175.23u 0.04s 45.49r
631 gc_B fannkuch 89.23u 0.00s 89.24r
632
633regex-dna 100000
634 gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.80r
635 gccgo -O2 regex-dna.go 12.38u 0.10s 12.52r
636 gccgo -O2 regex-dna-parallel.go 43.96u 4.64s 15.11r
637 gc regex-dna 7.03u 0.01s 7.05r
638 gc regex-dna-parallel 6.85u 0.05s 2.70r
639 gc_B regex-dna 6.87u 0.02s 6.89r
640
641spectral-norm 5500
642 gcc -O2 spectral-norm.c -lm 12.29u 0.00s 12.28r
643 gccgo -O2 spectral-norm.go 11.79u 0.00s 11.79r
644 gc spectral-norm 24.00u 0.02s 24.05r
645 gc_B spectral-norm 24.59u 0.01s 24.59r
646
647k-nucleotide 1000000
648 gcc -O2 k-nucleotide.c 9.75u 0.07s 9.82r
649 gccgo -O2 k-nucleotide.go 8.92u 0.06s 8.98r
650 gccgo -O2 k-nucleotide-parallel.go 8.40u 0.04s 2.76r
651 gc k-nucleotide 17.01u 0.03s 17.04r
652 gc k-nucleotide-parallel 16.51u 0.08s 6.21r
653 gc_B k-nucleotide 16.94u 0.08s 17.02r
654
655mandelbrot 16000
656 gcc -O2 mandelbrot.c 54.60u 0.00s 54.66r
657 gccgo -O2 mandelbrot.go 59.38u 0.00s 59.41r
658 gc mandelbrot 64.93u 0.04s 65.08r
659 gc_B mandelbrot 64.85u 0.03s 64.92r
660
661meteor 2098
662 gcc -O2 meteor-contest.c 0.10u 0.01s 0.10r
663 gccgo -O2 meteor-contest.go 0.11u 0.00s 0.11r
664 gc meteor-contest 0.18u 0.00s 0.17r
665 gc_B meteor-contest 0.17u 0.00s 0.16r
666
667pidigits 10000
668 gcc -O2 pidigits.c -lgmp 2.24u 0.00s 2.23r
669 gccgo -O2 pidigits.go 14.05u 0.00s 14.06r
670 gc pidigits 6.34u 0.05s 6.38r
671 gc_B pidigits 6.37u 0.02s 6.38r
672
673threadring 50000000
674 gcc -O2 threadring.c 30.50u 258.05s 325.72r
675 gccgo -O2 threadring.go 92.87u 748.39s 728.46r
676 gc threadring 38.03u 0.01s 38.04r
677
678# Apr 15, 2011
679# Move to new machine, Intel Xeon E5520@2.27GHz.
680# (Was Opteron(tm) Processor 8214 HE)
681
682fasta -n 25000000
683OLD:
684 gcc -O2 fasta.c 3.39u 0.04s 3.42r
685 gccgo -O2 fasta.go 3.52u 0.00s 3.52r
686 gc fasta 3.63u 0.04s 3.67r
687 gc_B fasta 3.66u 0.00s 3.66r
688NEW:
689 gcc -O2 fasta.c 1.45u 0.02s 1.47r
690 gccgo -O2 fasta.go 1.51u 0.01s 1.51r
691 gc fasta 2.04u 0.00s 2.04r
692 gc_B fasta 2.05u 0.00s 2.04r
693
694reverse-complement < output-of-fasta-25000000
695OLD:
696 gcc -O2 reverse-complement.c 1.87u 1.51s 7.02r
697 gccgo -O2 reverse-complement.go 1.56u 0.54s 3.37r
698 gc reverse-complement 1.73u 0.36s 2.08r
699 gc_B reverse-complement 1.75u 0.37s 2.12r
700NEW:
701 gcc -O2 reverse-complement.c 1.20u 0.47s 12.96r
702 gccgo -O2 reverse-complement.go 0.88u 0.14s 1.01r
703 gc reverse-complement 1.13u 0.17s 1.30r
704 gc_B reverse-complement 1.11u 0.09s 1.20r
705
706nbody -n 50000000
707OLD:
708 gcc -O2 -lm nbody.c 21.90u 0.00s 21.92r
709 gccgo -O2 nbody.go 23.12u 0.03s 23.19r
710 gc nbody 29.07u 0.00s 29.07r
711 gc_B nbody 31.84u 0.00s 31.85r
712NEW:
713 gcc -O2 -lm nbody.c 13.01u 0.00s 13.03r
714 gccgo -O2 nbody.go 13.35u 0.00s 13.37r
715 gc nbody 21.78u 0.00s 21.82r
716 gc_B nbody 21.72u 0.00s 21.76r
717
718binary-tree 15 # too slow to use 20
719OLD:
720 gcc -O2 binary-tree.c -lm 0.83u 0.02s 0.84r
721 gccgo -O2 binary-tree.go 2.61u 0.02s 2.62r
722 gccgo -O2 binary-tree-freelist.go 0.32u 0.01s 0.32r
723 gc binary-tree 3.93u 0.04s 3.97r
724 gc binary-tree-freelist 0.47u 0.03s 0.50r
725NEW:
726 gcc -O2 binary-tree.c -lm 0.60u 0.00s 0.59r
727 gccgo -O2 binary-tree.go 1.53u 0.00s 1.52r
728 gccgo -O2 binary-tree-freelist.go 0.01u 0.00s 0.00r
729 gc binary-tree 1.93u 0.02s 1.95r
730 gc binary-tree-freelist 0.32u 0.01s 0.32r
731
732fannkuch 12
733OLD:
734 gcc -O2 fannkuch.c 57.64u 0.00s 57.64r
735 gccgo -O2 fannkuch.go 65.56u 0.01s 65.65r
736 gccgo -O2 fannkuch-parallel.go 179.12u 0.00s 49.82r
737 gc fannkuch 126.39u 0.00s 126.39r
738 gc fannkuch-parallel 172.49u 0.02s 45.44r
739 gc_B fannkuch 89.30u 0.00s 89.28r
740NEW:
741 gcc -O2 fannkuch.c 45.17u 0.00s 45.26r
742 gccgo -O2 fannkuch.go 53.63u 0.00s 53.73r
743 gccgo -O2 fannkuch-parallel.go 216.72u 0.00s 58.42r
744 gc fannkuch 108.21u 0.00s 108.44r
745 gc fannkuch-parallel 227.20u 0.00s 57.27r
746 gc_B fannkuch 56.14u 0.00s 56.26r
747
748regex-dna 100000
749OLD:
750 gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.78r
751 gccgo -O2 regex-dna.go 10.15u 0.02s 10.23r
752 gccgo -O2 regex-dna-parallel.go 33.81u 3.22s 11.62r
753 gc regex-dna 6.52u 0.04s 6.56r
754 gc regex-dna-parallel 6.84u 0.03s 2.70r
755 gc_B regex-dna 6.83u 0.01s 6.84r
756NEW:
757 gcc -O2 regex-dna.c -lpcre 0.47u 0.00s 0.47r
758 gccgo -O2 regex-dna.go 6.00u 0.00s 6.00r
759 gccgo -O2 regex-dna-parallel.go 44.54u 1.57s 6.51r
760 gc regex-dna 5.41u 0.01s 5.42r
761 gc regex-dna-parallel 5.62u 0.01s 2.20r
762 gc_B regex-dna 5.50u 0.00s 5.50r
763
764spectral-norm 5500
765OLD:
766 gcc -O2 spectral-norm.c -lm 12.29u 0.00s 12.28r
767 gccgo -O2 spectral-norm.go 11.56u 0.00s 11.55r
768 gc spectral-norm 23.98u 0.00s 24.00r
769 gc_B spectral-norm 24.62u 0.00s 24.65r
770NEW:
771 gcc -O2 spectral-norm.c -lm 15.79u 0.00s 15.82r
772 gccgo -O2 spectral-norm.go 15.32u 0.00s 15.35r
773 gc spectral-norm 19.62u 0.01s 19.67r
774 gc_B spectral-norm 19.62u 0.00s 19.66r
775
776k-nucleotide 1000000
777OLD:
778 gcc -O2 k-nucleotide.c 9.82u 0.06s 9.87r
779 gccgo -O2 k-nucleotide.go 8.30u 0.02s 8.32r
780 gccgo -O2 k-nucleotide-parallel.go 8.84u 0.05s 3.02r
781 gc k-nucleotide 15.38u 0.07s 15.44r
782 gc k-nucleotide-parallel 16.40u 0.03s 5.93r
783 gc_B k-nucleotide 15.19u 0.05s 15.23r
784NEW:
785 gcc -O2 -k-nucleotide.c 4.88u 0.03s 4.92r
786 gccgo -O2 k-nucleotide.go 5.94u 0.01s 5.96r
787 gccgo -O2 k-nucleotide-parallel.go 6.44u 0.03s 1.47r
788 gc k-nucleotide 9.61u 0.01s 9.63r
789 gc k-nucleotide-parallel 9.70u 0.00s 3.39r
790 gc_B k-nucleotide 9.19u 0.03s 9.23r
791
792mandelbrot 16000
793OLD:
794 gcc -O2 mandelbrot.c 54.54u 0.00s 54.56r
795 gccgo -O2 mandelbrot.go 59.63u 0.03s 59.67r
796 gc mandelbrot 64.82u 0.00s 64.83r
797 gc_B mandelbrot 64.84u 0.00s 64.91r
798NEW:
799 gcc -O2 mandelbrot.c 36.07u 0.01s 36.15r
800 gccgo -O2 mandelbrot.go 43.57u 0.00s 43.66r
801 gc mandelbrot 60.66u 0.00s 60.79r
802 gc_B mandelbrot 60.90u 0.00s 61.03r
803
804meteor 2098
805OLD:
806 gcc -O2 meteor-contest.c 0.11u 0.00s 0.10r
807 gccgo -O2 meteor-contest.go 0.10u 0.01s 0.10r
808 gc meteor-contest 0.18u 0.00s 0.17r
809 gc_B meteor-contest 0.17u 0.00s 0.16r
810NEW:
811 gcc -O2 meteor-contest.c 0.10u 0.00s 0.09r
812 gccgo -O2 meteor-contest.go 0.10u 0.00s 0.09r
813 gc meteor-contest 0.14u 0.00s 0.14r
814 gc_B meteor-contest 0.13u 0.00s 0.13r
815
816pidigits 10000
817OLD:
818 gcc -O2 pidigits.c -lgmp 2.22u 0.00s 2.21r
819 gccgo -O2 pidigits.go 13.39u 0.00s 13.40r
820 gc pidigits 6.42u 0.04s 6.45r
821 gc_B pidigits 6.45u 0.02s 6.47r
822NEW:
823 gcc -O2 pidigits.c -lgmp 2.27u 0.00s 2.29r
824 gccgo -O2 pidigits.go 9.21u 0.00s 9.22r
825 gc pidigits 3.60u 0.00s 3.60r
826 gc_B pidigits 3.56u 0.02s 3.58r
827
828threadring 50000000
829OLD:
830 gcc -O2 threadring.c -lpthread 34.51u 267.95s 336.12r
831 gccgo -O2 threadring.go 103.51u 588.57s 627.16r
832 gc threadring 54.68u 0.00s 54.73r
833NEW:
834 gcc -O2 threadring.c 32.00u 259.39s 369.74r
835 gccgo -O2 threadring.go 133.06u 546.02s 595.33r
836 gc threadring 16.75u 0.02s 16.80r
837
838chameneos 6000000
839OLD:
840 gcc -O2 chameneosredux.c -lpthread 12.65u 31.02s 13.33r
841 gccgo -O2 chameneosredux.go 47.04u 302.84s 252.29r
842 gc chameneosredux 14.14u 0.00s 14.14r
843NEW:
844 gcc -O2 chameneosredux.c -lpthread 8.05u 63.43s 11.16r
845 gccgo -O2 chameneosredux.go 82.95u 304.37s 207.64r
846 gc chameneosredux 9.42u 0.00s 9.43r
847
848# May 13, 2011
849# after gc update to inline append when possible - 35% faster
850
851regex-dna 100000
852 gc regex-dna 3.94u 0.00s 3.95r
853 gc regex-dna-parallel 4.15u 0.01s 1.63r
854 gc_B regex-dna 4.01u 0.01s 4.02r
855
856# Aug 4, 2011
857# After various updates to locking code and some runtime changes.
858# Slowdowns believed due to slower (but more correct) memmove.
859
860fannkuch 12
861 gccgo -O2 fannkuch.go 51.59u 0.00s 51.69r # -4%
862 gccgo -O2 fannkuch-parallel.go 253.17u 0.00s 64.67r # -11%
863 gc fannkuch 103.14u 0.00s 103.36r # -5%
864 gc fannkuch-parallel 189.63u 0.00s 49.37r # +9%
865 gc_B fannkuch 49.19u 0.00s 49.29r # -14%
866
867regex-dna 100000
868 gc regex-dna 3.78u 0.00s 3.78r # -43%
869 gc regex-dna-parallel 3.84u 0.02s 1.48r # -49%
870 gc_B regex-dna 3.62u 0.00s 3.63r # -52%
871
872k-nucleotide 1000000
873 gc k-nucleotide 12.23u 0.02s 12.27r # +27%
874 gc k-nucleotide-parallel 12.76u 0.02s 4.37r # +29%
875 gc_B k-nucleotide 12.18u 0.01s 12.21r # +33%
876
877threadring 50000000
878 gc threadring 17.49u 0.00s 17.53r # +4%
879
880chameneos 6000000
881 gc chameneosredux 7.61u 0.00s 7.63r # -24%
882
883Aug 9, 2011
884# After custom algorithms for 1- 2- 4- 8-byte scalars.
885
886fannkuch 12
887 gc fannkuch-parallel 157.17u 0.00s 41.08r # -17%
888
889k-nucleotide 1000000
890 gc k-nucleotide 8.72u 0.03s 8.76r # -39%
891 gc k-nucleotide-parallel 8.79u 0.01s 3.14r # -39%
892 gc_B k-nucleotide 8.65u 0.03s 8.69r # -39%
893
894pidigits 10000
895 gc pidigits 3.71u 0.02s 3.73r # +4%
896 gc_B pidigits 3.73u 0.00s 3.73r # +4%
897
898threadring 50000000
899 gc threadring 14.51u 0.00s 14.54r # -17%
900
901chameneos 6000000
902 gc chameneosredux 7.41u 0.00s 7.42r # -3%
903
904# A complete run at the Go 1 release.
905# Significant changes:
906# - gccgo is now enabled for all tests (goroutines are cheap enough)
907# - threadring and chameneos are 14% faster, probably due to runtime changes
908# - regex-dna 36% faster
909# - fannkuch-parallel (only) slowed down 40%
910# - gccgo on binary-tree-freelist is still optimized to nothing
911# Other changes are modest.
912
913fasta -n 25000000
914 gcc -O2 fasta.c 1.45u 0.02s 1.48r
915 gccgo -O2 fasta.go 1.46u 0.00s 1.47r
916 gc fasta 1.99u 0.01s 2.00r
917 gc_B fasta 1.99u 0.01s 2.01r
918
919reverse-complement < output-of-fasta-25000000
920 gcc -O2 reverse-complement.c 0.95u 0.48s 4.99r
921 gccgo -O2 reverse-complement.go 0.93u 0.16s 1.09r
922 gc reverse-complement 1.20u 0.19s 1.39r
923 gc_B reverse-complement 1.04u 0.16s 1.20r
924
925nbody -n 50000000
926 gcc -O2 -lm nbody.c 13.02u 0.00s 13.05r
927 gccgo -O2 nbody.go 14.46u 0.00s 14.49r
928 gc nbody 21.79u 0.00s 21.84r
929 gc_B nbody 21.74u 0.00s 21.79r
930
931binary-tree 15 # too slow to use 20
932 gcc -O2 binary-tree.c -lm 0.60u 0.01s 0.61r
933 gccgo -O2 binary-tree.go 1.30u 0.01s 1.32r
934 gccgo -O2 binary-tree-freelist.go 0.00u 0.00s 0.00r
935 gc binary-tree 1.84u 0.01s 1.86r
936 gc binary-tree-freelist 0.33u 0.00s 0.33r
937
938fannkuch 12
939 gcc -O2 fannkuch.c 45.24u 0.00s 45.34r
940 gccgo -O2 fannkuch.go 59.76u 0.01s 59.90r
941 gccgo -O2 fannkuch-parallel.go 218.20u 0.01s 61.60r
942 gc fannkuch 103.92u 0.00s 104.16r
943 gc fannkuch-parallel 221.61u 0.00s 60.49r
944 gc_B fannkuch 53.17u 0.00s 53.30r
945
946regex-dna 100000
947 gcc -O2 regex-dna.c -lpcre 0.47u 0.00s 0.48r
948 gccgo -O2 regex-dna.go 6.52u 0.00s 6.54r
949 gccgo -O2 regex-dna-parallel.go 14.40u 0.73s 4.35r
950 gc regex-dna 2.63u 0.02s 2.66r # -36%
951 gc regex-dna-parallel 2.87u 0.01s 1.11r
952 gc_B regex-dna 2.65u 0.00s 2.66r
953
954spectral-norm 5500
955 gcc -O2 spectral-norm.c -lm 15.78u 0.00s 15.82r
956 gccgo -O2 spectral-norm.go 15.79u 0.00s 15.83r
957 gc spectral-norm 19.76u 0.00s 19.80r
958 gc_B spectral-norm 19.73u 0.01s 19.78r
959
960k-nucleotide 1000000
961 gcc -O2 k-nucleotide.c 5.59u 0.03s 5.63r
962 gccgo -O2 k-nucleotide.go 4.09u 0.03s 4.13r
963 gccgo -O2 k-nucleotide-parallel.go 4.50u 0.06s 1.63r
964 gc k-nucleotide 9.23u 0.02s 9.27r
965 gc k-nucleotide-parallel 9.87u 0.03s 3.55r
966 gc_B k-nucleotide 9.20u 0.00s 9.22r
967
968mandelbrot 16000
969 gcc -O2 mandelbrot.c 36.09u 0.00s 36.18r
970 gccgo -O2 mandelbrot.go 41.69u 0.01s 41.80r
971 gc mandelbrot 60.91u 0.02s 61.07r
972 gc_B mandelbrot 60.90u 0.00s 61.04r
973
974meteor 2098
975 gcc -O2 meteor-contest.c 0.09u 0.00s 0.09r
976 gccgo -O2 meteor-contest.go 0.09u 0.00s 0.09r
977 gc meteor-contest 0.14u 0.00s 0.15r
978 gc_B meteor-contest 0.14u 0.00s 0.14r
979
980pidigits 10000
981 gcc -O2 pidigits.c -lgmp 2.27u 0.00s 2.27r
982 gccgo -O2 pidigits.go 8.65u 0.00s 8.67r
983 gc pidigits 3.70u 0.04s 3.75r
984 gc_B pidigits 3.72u 0.02s 3.75r
985
986threadring 50000000
987 gcc -O2 threadring.c 40.91u 369.85s 323.31r
988 gccgo -O2 threadring.go 26.97u 30.82s 57.93r
989 gc threadring 12.81u 0.01s 12.85r # -13%
990
991chameneos 6000000
992 gcc -O2 chameneosredux.c -lpthread 9.44u 72.90s 12.65r
993 gccgo -O2 chameneosredux.go 7.73u 7.53s 15.30r
994 gc chameneosredux 6.51u 0.00s 6.53r # - 14%
995
996# After http://codereview.appspot.com/6248049, moving panicindex
997# calls out of line (putting the likely code into a single path and shortening
998# loops). Significant changes since the last run (note: some are slower for
999# unrelated and as yet undiagnosed reasons):
1000
1001nbody -n 50000000
1002 gc nbody 19.10u 0.01s 19.19r # -12%
1003 gc_B nbody 19.19u 0.00s 19.23r # -12%
1004
1005binary-tree 15 # too slow to use 20
1006 gc binary-tree 1.49u 0.01s 1.51r # -19%
1007
1008fannkuch 12
1009 gc fannkuch 60.79u 0.00s 60.92r # -41%
1010 gc fannkuch-parallel 183.51u 0.01s 51.75r # -14%
1011 gc_B fannkuch 51.68u 0.00s 51.79r # -3%
1012
1013k-nucleotide 1000000
1014 gc k-nucleotide 9.74u 0.04s 9.80r # +6%
1015 gc k-nucleotide-parallel 9.89u 0.05s 3.59r # +1%
1016 gc_B k-nucleotide 9.39u 0.02s 9.43r # +2%
1017
1018mandelbrot (much slower, due to unrelated http://codereview.appspot.com/6209077)
1019 gc mandelbrot 100.98u 0.00s 101.20r # +65%
1020 gc_B mandelbrot 100.90u 0.01s 101.17r # +65%
1021
1022meteor 2098
1023 gc meteor-contest 0.13u 0.00s 0.13r # -13%
1024 gc_B meteor-contest 0.13u 0.00s 0.13r # -7%
1025
1026# May 30, 2012.
1027# After http://codereview.appspot.com/6261051, restoring old code generated
1028# for floating-point constants. Mandelbrot is back to its previous numbers.
1029
1030mandelbrot 16000
1031 gcc -O2 mandelbrot.c 36.07u 0.00s 36.16r
1032 gccgo -O2 mandelbrot.go 41.72u 0.01s 41.90r
1033 gc mandelbrot 60.62u 0.00s 60.76r
1034 gc_B mandelbrot 60.68u 0.00s 60.82r
1035
1036# May 30, 2012.
1037# After http://codereview.appspot.com/6248068, better FP code
1038# by avoiding MOVSD between registers.
1039# Plus some other timing changes that have crept in from other speedups,
1040# from garbage collection to Printf.
1041
1042fasta -n 25000000
1043 gc fasta 1.76u 0.00s 1.76r # -12%
1044 gc_B fasta 1.71u 0.00s 1.72r # -12%
1045
1046nbody -n 50000000
1047 gc nbody 17.56u 0.00s 17.60r # -8%
1048 gc_B nbody 17.30u 0.00s 17.34r # -10%
1049
1050fannkuch 12
1051 gc fannkuch-parallel 155.92u 0.01s 44.05r # -15%
1052
1053k-nucleotide 1000000
1054 gc k-nucleotide 9.22u 0.01s 9.26r # -5%
1055 gc k-nucleotide-parallel 9.23u 0.03s 3.26r # -9%
1056 gc_B k-nucleotide 9.22u 0.03s 9.28r # -2%
1057
1058mandelbrot 16000
1059 gc mandelbrot 44.80u 0.00s 44.90r # -27%
1060 gc_B mandelbrot 44.81u 0.00s 44.92r # -26%
1061
1062pidigits 10000
1063 gc pidigits 3.51u 0.00s 3.52r # -6%
1064 gc_B pidigits 3.51u 0.00s 3.52r # -6%
1065
1066# Aug 28, 2012
1067# After some assembler work in package big.
1068pidigits 10000
1069 gc pidigits 2.85u 0.02s 2.88r # -22%
1070 gc_B pidigits 2.88u 0.01s 2.90r # -21%
1071
1072# Sep 26, 2012
1073# 64-bit ints, plus significantly better floating-point code.
1074# Interesting details:
1075# Generally something in the 0-10% slower range, some (binary tree) more
1076# Floating-point noticeably faster:
1077# nbody -25%
1078# mandelbrot -37% relative to Go 1.
1079# Other:
1080# regex-dna +47%
1081fasta -n 25000000
1082 gcc -O2 fasta.c 1.43u 0.03s 1.46r
1083 gccgo -O2 fasta.go 1.47u 0.00s 1.47r
1084 gc fasta 1.78u 0.01s 1.80r
1085 gc_B fasta 1.76u 0.00s 1.76r
1086
1087reverse-complement < output-of-fasta-25000000
1088 gcc -O2 reverse-complement.c 1.14u 0.39s 11.19r
1089 gccgo -O2 reverse-complement.go 0.91u 0.17s 1.09r
1090 gc reverse-complement 1.12u 0.18s 1.31r
1091 gc_B reverse-complement 1.12u 0.15s 1.28r
1092
1093nbody -n 50000000
1094 gcc -O2 nbody.c -lm 13.02u 0.00s 13.05r
1095 gccgo -O2 nbody.go 13.90u 0.00s 13.93r
1096 gc nbody 17.05u 0.00s 17.09r
1097 gc_B nbody 16.30u 0.00s 16.34r
1098
1099binary-tree 15 # too slow to use 20
1100 gcc -O2 binary-tree.c -lm 0.61u 0.00s 0.61r
1101 gccgo -O2 binary-tree.go 1.24u 0.04s 1.29r
1102 gccgo -O2 binary-tree-freelist.go 0.21u 0.01s 0.22r
1103 gc binary-tree 1.93u 0.02s 1.96r
1104 gc binary-tree-freelist 0.32u 0.00s 0.33r
1105
1106fannkuch 12
1107 gcc -O2 fannkuch.c 45.19u 0.00s 45.29r
1108 gccgo -O2 fannkuch.go 60.32u 0.00s 60.45r
1109 gccgo -O2 fannkuch-parallel.go 185.59u 0.00s 59.49r
1110 gc fannkuch 72.14u 0.00s 72.30r
1111 gc fannkuch-parallel 172.54u 0.00s 43.59r
1112 gc_B fannkuch 53.55u 0.00s 53.67r
1113
1114regex-dna 100000
1115 gcc -O2 regex-dna.c -lpcre 0.47u 0.00s 0.47r
1116 gccgo -O2 regex-dna.go 6.49u 0.05s 6.56r
1117 gccgo -O2 regex-dna-parallel.go 14.60u 0.67s 4.42r
1118 gc regex-dna 3.91u 0.00s 3.92r
1119 gc regex-dna-parallel 4.01u 0.03s 1.56r
1120 gc_B regex-dna 3.91u 0.00s 3.92r
1121
1122spectral-norm 5500
1123 gcc -O2 spectral-norm.c -lm 15.85u 0.00s 15.89r
1124 gccgo -O2 spectral-norm.go 15.86u 0.00s 15.89r
1125 gc spectral-norm 19.72u 0.00s 19.76r
1126 gc_B spectral-norm 19.68u 0.01s 19.74r
1127
1128k-nucleotide 1000000
1129 gcc -O2 k-nucleotide.c -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include -lglib-2.0 4.90u 0.01s 4.93r
1130 gccgo -O2 k-nucleotide.go 4.78u 0.01s 4.80r
1131 gccgo -O2 k-nucleotide-parallel.go 6.49u 0.02s 2.18r
1132 gc k-nucleotide 9.05u 0.02s 9.09r
1133 gc k-nucleotide-parallel 9.27u 0.01s 3.29r
1134 gc_B k-nucleotide 8.95u 0.03s 9.00r
1135
1136mandelbrot 16000
1137 gcc -O2 mandelbrot.c 36.11u 0.00s 36.19r
1138 gccgo -O2 mandelbrot.go 43.67u 0.00s 43.77r
1139 gc mandelbrot 38.57u 0.00s 38.66r
1140 gc_B mandelbrot 38.59u 0.00s 38.68r
1141
1142meteor 2098
1143 gcc -O2 meteor-contest.c 0.09u 0.00s 0.09r
1144 gccgo -O2 meteor-contest.go 0.09u 0.00s 0.09r
1145 gc meteor-contest 0.13u 0.00s 0.14r
1146 gc_B meteor-contest 0.12u 0.00s 0.13r
1147
1148pidigits 10000
1149 gcc -O2 pidigits.c -lgmp 2.26u 0.00s 2.27r
1150 gccgo -O2 pidigits.go 9.05u 0.00s 9.07r
1151 gc pidigits 2.88u 0.02s 2.90r
1152 gc_B pidigits 2.89u 0.00s 2.90r
1153
1154threadring 50000000
1155 gcc -O2 threadring.c -lpthread 37.30u 327.81s 289.28r
1156 gccgo -O2 threadring.go 42.83u 26.15s 69.14r
1157 gc threadring 13.00u 0.00s 13.03r
1158
1159chameneos 6000000
1160 gcc -O2 chameneosredux.c -lpthread 8.80u 71.67s 12.19r
1161 gccgo -O2 chameneosredux.go 11.28u 6.68s 18.00r
1162 gc chameneosredux 6.94u 0.00s 6.96r
1163
1164# May 23, 2013
1165# Go 1.1, which includes precise GC, new scheduler, faster maps.
1166# 20%-ish speedups across many benchmarks.
1167# gccgo showing significant improvement (even though it's not yet up to Go 1.1)
1168#
1169# Standouts:
1170# fannkuch, regex-dna, k-nucleotide, threadring, chameneos
1171
1172fasta -n 25000000
1173 gcc -m64 -O2 fasta.c 1.54u 0.01s 1.55r
1174 gccgo -O2 fasta.go 1.42u 0.00s 1.43r
1175 gc fasta 1.50u 0.01s 1.52r # -16%
1176 gc_B fasta 1.46u 0.00s 1.46r # -17%
1177
1178reverse-complement < output-of-fasta-25000000
1179 gcc -m64 -O2 reverse-complement.c 0.87u 0.37s 4.36r
1180 gccgo -O2 reverse-complement.go 0.77u 0.15s 0.93r # -15%
1181 gc reverse-complement 0.99u 0.12s 1.12r # -15%
1182 gc_B reverse-complement 0.85u 0.17s 1.02r # -21%
1183
1184nbody -n 50000000
1185 gcc -m64 -O2 nbody.c -lm 13.50u 0.00s 13.53r
1186 gccgo -O2 nbody.go 13.98u 0.01s 14.02r
1187 gc nbody 16.63u 0.01s 16.67r
1188 gc_B nbody 15.74u 0.00s 15.76r
1189
1190binary-tree 15 # too slow to use 20
1191 gcc -m64 -O2 binary-tree.c -lm 0.61u 0.00s 0.61r
1192 gccgo -O2 binary-tree.go 1.11u 0.01s 1.12r # -13%
1193 gccgo -O2 binary-tree-freelist.go 0.22u 0.01s 0.23r
1194 gc binary-tree 1.83u 0.02s 1.83r # -7%
1195 gc binary-tree-freelist 0.32u 0.00s 0.32r
1196
1197fannkuch 12
1198 gcc -m64 -O2 fannkuch.c 45.56u 0.00s 45.67r
1199 gccgo -O2 fannkuch.go 57.71u 0.00s 57.85r # -4%
1200 gccgo -O2 fannkuch-parallel.go 146.31u 0.00s 37.50r #-37%
1201 gc fannkuch 70.06u 0.03s 70.17r # -3%
1202 gc fannkuch-parallel 131.88u 0.06s 33.59r # -23%
1203 gc_B fannkuch 45.55u 0.02s 45.63r # -15%
1204
1205regex-dna 100000
1206 gcc -m64 -O2 regex-dna.c -lpcre 0.44u 0.01s 0.45r
1207 gccgo -O2 regex-dna.go 5.59u 0.00s 5.61r # -14%
1208 gccgo -O2 regex-dna-parallel.go 10.85u 0.30s 3.34r # -24%
1209 gc regex-dna 2.23u 0.01s 2.25r # -43%
1210 gc regex-dna-parallel 2.35u 0.00s 0.93r # -40%
1211 gc_B regex-dna 2.24u 0.01s 2.25r # -43%
1212
1213spectral-norm 5500
1214 gcc -m64 -O2 spectral-norm.c -lm 14.84u 0.00s 14.88r
1215 gccgo -O2 spectral-norm.go 15.33u 0.00s 15.37r
1216 gc spectral-norm 16.75u 0.02s 16.79r # -15%
1217 gc_B spectral-norm 16.77u 0.01s 16.79r # -15%
1218
1219k-nucleotide 1000000
1220 gcc -O2 k-nucleotide.c -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include -lglib-2.0 4.50u 0.00s 4.52r
1221 gccgo -O2 k-nucleotide.go 3.72u 0.04s 3.77r # -21%
1222 gccgo -O2 k-nucleotide-parallel.go 3.88u 0.03s 1.42r # -35%
1223 gc k-nucleotide 6.32u 0.01s 6.33r # -31%
1224 gc k-nucleotide-parallel 6.47u 0.05s 2.13r # -33%
1225 gc_B k-nucleotide 6.45u 0.01s 6.47r # - 28%
1226
1227mandelbrot 16000
1228 gcc -m64 -O2 mandelbrot.c 36.03u 0.00s 36.11r
1229 gccgo -O2 mandelbrot.go 37.61u 0.00s 37.74r # -14%
1230 gc mandelbrot 38.19u 0.05s 38.29r
1231 gc_B mandelbrot 38.19u 0.03s 38.26r
1232
1233meteor 2098
1234 gcc -m64 -O2 meteor-contest.c 0.08u 0.00s 0.08r
1235 gccgo -O2 meteor-contest.go 0.09u 0.01s 0.10r
1236 gc meteor-contest 0.12u 0.00s 0.12r # -15% although perhaps just noise
1237 gc_B meteor-contest 0.11u 0.00s 0.12r # -8% although perhaps just noise
1238
1239pidigits 10000
1240 gcc -m64 -O2 pidigits.c -lgmp 2.27u 0.00s 2.28r
1241 gccgo -O2 pidigits.go 8.95u 0.02s 8.99r
1242 gc pidigits 2.88u 0.14s 2.91r
1243 gc_B pidigits 2.92u 0.10s 2.91r
1244
1245threadring 50000000
1246 gcc -m64 -O2 threadring.c -lpthread 14.75u 167.88s 212.23r
1247 gccgo -O2 threadring.go 36.72u 12.08s 48.91r # -29%
1248 gc threadring 10.93u 0.01s 10.95r # -16%
1249
1250chameneos 6000000
1251 gcc -m64 -O2 chameneosredux.c -lpthread 8.89u 56.62s 9.75r
1252 gccgo -O2 chameneosredux.go 9.48u 2.48s 11.99r # -33%
1253 gc chameneosredux 5.80u 0.00s 5.81r # -16%
1254
View as plain text