1
2
3
4
5
6 package assert
7
8 import (
9 "bufio"
10 "bytes"
11 "fmt"
12 "io"
13 "strings"
14 )
15
16 func min(a, b int) int {
17 if a < b {
18 return a
19 }
20 return b
21 }
22
23 func max(a, b int) int {
24 if a > b {
25 return a
26 }
27 return b
28 }
29
30 func calculateRatio(matches, length int) float64 {
31 if length > 0 {
32 return 2.0 * float64(matches) / float64(length)
33 }
34 return 1.0
35 }
36
37 type Match struct {
38 A int
39 B int
40 Size int
41 }
42
43 type OpCode struct {
44 Tag byte
45 I1 int
46 I2 int
47 J1 int
48 J2 int
49 }
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77 type SequenceMatcher struct {
78 a []string
79 b []string
80 b2j map[string][]int
81 IsJunk func(string) bool
82 autoJunk bool
83 bJunk map[string]struct{}
84 matchingBlocks []Match
85 fullBCount map[string]int
86 bPopular map[string]struct{}
87 opCodes []OpCode
88 }
89
90 func NewMatcher(a, b []string) *SequenceMatcher {
91 m := SequenceMatcher{autoJunk: true}
92 m.SetSeqs(a, b)
93 return &m
94 }
95
96 func NewMatcherWithJunk(a, b []string, autoJunk bool,
97 isJunk func(string) bool) *SequenceMatcher {
98
99 m := SequenceMatcher{IsJunk: isJunk, autoJunk: autoJunk}
100 m.SetSeqs(a, b)
101 return &m
102 }
103
104
105 func (m *SequenceMatcher) SetSeqs(a, b []string) {
106 m.SetSeq1(a)
107 m.SetSeq2(b)
108 }
109
110
111
112
113
114
115
116
117
118
119 func (m *SequenceMatcher) SetSeq1(a []string) {
120 if &a == &m.a {
121 return
122 }
123 m.a = a
124 m.matchingBlocks = nil
125 m.opCodes = nil
126 }
127
128
129
130 func (m *SequenceMatcher) SetSeq2(b []string) {
131 if &b == &m.b {
132 return
133 }
134 m.b = b
135 m.matchingBlocks = nil
136 m.opCodes = nil
137 m.fullBCount = nil
138 m.chainB()
139 }
140
141 func (m *SequenceMatcher) chainB() {
142
143 b2j := map[string][]int{}
144 for i, s := range m.b {
145 indices := b2j[s]
146 indices = append(indices, i)
147 b2j[s] = indices
148 }
149
150
151 m.bJunk = map[string]struct{}{}
152 if m.IsJunk != nil {
153 junk := m.bJunk
154 for s := range b2j {
155 if m.IsJunk(s) {
156 junk[s] = struct{}{}
157 }
158 }
159 for s := range junk {
160 delete(b2j, s)
161 }
162 }
163
164
165 popular := map[string]struct{}{}
166 n := len(m.b)
167 if m.autoJunk && n >= 200 {
168 ntest := n/100 + 1
169 for s, indices := range b2j {
170 if len(indices) > ntest {
171 popular[s] = struct{}{}
172 }
173 }
174 for s := range popular {
175 delete(b2j, s)
176 }
177 }
178 m.bPopular = popular
179 m.b2j = b2j
180 }
181
182 func (m *SequenceMatcher) isBJunk(s string) bool {
183 _, ok := m.bJunk[s]
184 return ok
185 }
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214 func (m *SequenceMatcher) findLongestMatch(alo, ahi, blo, bhi int) Match {
215
216
217
218
219
220
221
222
223
224
225
226 besti, bestj, bestsize := alo, blo, 0
227
228
229
230
231 j2len := map[int]int{}
232 for i := alo; i != ahi; i++ {
233
234
235 newj2len := map[int]int{}
236 for _, j := range m.b2j[m.a[i]] {
237
238 if j < blo {
239 continue
240 }
241 if j >= bhi {
242 break
243 }
244 k := j2len[j-1] + 1
245 newj2len[j] = k
246 if k > bestsize {
247 besti, bestj, bestsize = i-k+1, j-k+1, k
248 }
249 }
250 j2len = newj2len
251 }
252
253
254
255
256
257 for besti > alo && bestj > blo && !m.isBJunk(m.b[bestj-1]) &&
258 m.a[besti-1] == m.b[bestj-1] {
259 besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
260 }
261 for besti+bestsize < ahi && bestj+bestsize < bhi &&
262 !m.isBJunk(m.b[bestj+bestsize]) &&
263 m.a[besti+bestsize] == m.b[bestj+bestsize] {
264 bestsize++
265 }
266
267
268
269
270
271
272
273
274 for besti > alo && bestj > blo && m.isBJunk(m.b[bestj-1]) &&
275 m.a[besti-1] == m.b[bestj-1] {
276 besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
277 }
278 for besti+bestsize < ahi && bestj+bestsize < bhi &&
279 m.isBJunk(m.b[bestj+bestsize]) &&
280 m.a[besti+bestsize] == m.b[bestj+bestsize] {
281 bestsize++
282 }
283
284 return Match{A: besti, B: bestj, Size: bestsize}
285 }
286
287
288
289
290
291
292
293
294
295
296
297
298 func (m *SequenceMatcher) GetMatchingBlocks() []Match {
299 if m.matchingBlocks != nil {
300 return m.matchingBlocks
301 }
302
303 var matchBlocks func(alo, ahi, blo, bhi int, matched []Match) []Match
304 matchBlocks = func(alo, ahi, blo, bhi int, matched []Match) []Match {
305 match := m.findLongestMatch(alo, ahi, blo, bhi)
306 i, j, k := match.A, match.B, match.Size
307 if match.Size > 0 {
308 if alo < i && blo < j {
309 matched = matchBlocks(alo, i, blo, j, matched)
310 }
311 matched = append(matched, match)
312 if i+k < ahi && j+k < bhi {
313 matched = matchBlocks(i+k, ahi, j+k, bhi, matched)
314 }
315 }
316 return matched
317 }
318 matched := matchBlocks(0, len(m.a), 0, len(m.b), nil)
319
320
321
322 nonAdjacent := []Match{}
323 i1, j1, k1 := 0, 0, 0
324 for _, b := range matched {
325
326 i2, j2, k2 := b.A, b.B, b.Size
327 if i1+k1 == i2 && j1+k1 == j2 {
328
329
330
331 k1 += k2
332 } else {
333
334
335
336 if k1 > 0 {
337 nonAdjacent = append(nonAdjacent, Match{i1, j1, k1})
338 }
339 i1, j1, k1 = i2, j2, k2
340 }
341 }
342 if k1 > 0 {
343 nonAdjacent = append(nonAdjacent, Match{i1, j1, k1})
344 }
345
346 nonAdjacent = append(nonAdjacent, Match{len(m.a), len(m.b), 0})
347 m.matchingBlocks = nonAdjacent
348 return m.matchingBlocks
349 }
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366 func (m *SequenceMatcher) GetOpCodes() []OpCode {
367 if m.opCodes != nil {
368 return m.opCodes
369 }
370 i, j := 0, 0
371 matching := m.GetMatchingBlocks()
372 opCodes := make([]OpCode, 0, len(matching))
373 for _, m := range matching {
374
375
376
377
378
379 ai, bj, size := m.A, m.B, m.Size
380 tag := byte(0)
381 if i < ai && j < bj {
382 tag = 'r'
383 } else if i < ai {
384 tag = 'd'
385 } else if j < bj {
386 tag = 'i'
387 }
388 if tag > 0 {
389 opCodes = append(opCodes, OpCode{tag, i, ai, j, bj})
390 }
391 i, j = ai+size, bj+size
392
393
394 if size > 0 {
395 opCodes = append(opCodes, OpCode{'e', ai, i, bj, j})
396 }
397 }
398 m.opCodes = opCodes
399 return m.opCodes
400 }
401
402
403
404
405
406 func (m *SequenceMatcher) GetGroupedOpCodes(n int) [][]OpCode {
407 if n < 0 {
408 n = 3
409 }
410 codes := m.GetOpCodes()
411 if len(codes) == 0 {
412 codes = []OpCode{{'e', 0, 1, 0, 1}}
413 }
414
415 if codes[0].Tag == 'e' {
416 c := codes[0]
417 i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
418 codes[0] = OpCode{c.Tag, max(i1, i2-n), i2, max(j1, j2-n), j2}
419 }
420 if codes[len(codes)-1].Tag == 'e' {
421 c := codes[len(codes)-1]
422 i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
423 codes[len(codes)-1] = OpCode{c.Tag, i1, min(i2, i1+n), j1, min(j2, j1+n)}
424 }
425 nn := n + n
426 groups := [][]OpCode{}
427 group := []OpCode{}
428 for _, c := range codes {
429 i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
430
431
432 if c.Tag == 'e' && i2-i1 > nn {
433 group = append(group, OpCode{c.Tag, i1, min(i2, i1+n),
434 j1, min(j2, j1+n)})
435 groups = append(groups, group)
436 group = []OpCode{}
437 i1, j1 = max(i1, i2-n), max(j1, j2-n)
438 }
439 group = append(group, OpCode{c.Tag, i1, i2, j1, j2})
440 }
441 if len(group) > 0 && !(len(group) == 1 && group[0].Tag == 'e') {
442 groups = append(groups, group)
443 }
444 return groups
445 }
446
447
448
449
450
451
452
453
454
455
456
457
458 func (m *SequenceMatcher) Ratio() float64 {
459 matches := 0
460 for _, m := range m.GetMatchingBlocks() {
461 matches += m.Size
462 }
463 return calculateRatio(matches, len(m.a)+len(m.b))
464 }
465
466
467
468
469
470 func (m *SequenceMatcher) QuickRatio() float64 {
471
472
473
474 if m.fullBCount == nil {
475 m.fullBCount = map[string]int{}
476 for _, s := range m.b {
477 m.fullBCount[s] = m.fullBCount[s] + 1
478 }
479 }
480
481
482
483 avail := map[string]int{}
484 matches := 0
485 for _, s := range m.a {
486 n, ok := avail[s]
487 if !ok {
488 n = m.fullBCount[s]
489 }
490 avail[s] = n - 1
491 if n > 0 {
492 matches++
493 }
494 }
495 return calculateRatio(matches, len(m.a)+len(m.b))
496 }
497
498
499
500
501
502 func (m *SequenceMatcher) RealQuickRatio() float64 {
503 la, lb := len(m.a), len(m.b)
504 return calculateRatio(min(la, lb), la+lb)
505 }
506
507
508 func formatRangeUnified(start, stop int) string {
509
510 beginning := start + 1
511 length := stop - start
512 if length == 1 {
513 return fmt.Sprintf("%d", beginning)
514 }
515 if length == 0 {
516 beginning--
517 }
518 return fmt.Sprintf("%d,%d", beginning, length)
519 }
520
521
522 type UnifiedDiff struct {
523 A []string
524 FromFile string
525 FromDate string
526 B []string
527 ToFile string
528 ToDate string
529 Eol string
530 Context int
531 }
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553 func WriteUnifiedDiff(writer io.Writer, diff UnifiedDiff) error {
554 buf := bufio.NewWriter(writer)
555 defer buf.Flush()
556 wf := func(format string, args ...interface{}) error {
557 _, err := buf.WriteString(fmt.Sprintf(format, args...))
558 return err
559 }
560 ws := func(s string) error {
561 _, err := buf.WriteString(s)
562 return err
563 }
564
565 if len(diff.Eol) == 0 {
566 diff.Eol = "\n"
567 }
568
569 started := false
570 m := NewMatcher(diff.A, diff.B)
571 for _, g := range m.GetGroupedOpCodes(diff.Context) {
572 if !started {
573 started = true
574 fromDate := ""
575 if len(diff.FromDate) > 0 {
576 fromDate = "\t" + diff.FromDate
577 }
578 toDate := ""
579 if len(diff.ToDate) > 0 {
580 toDate = "\t" + diff.ToDate
581 }
582 if diff.FromFile != "" || diff.ToFile != "" {
583 err := wf("--- %s%s%s", diff.FromFile, fromDate, diff.Eol)
584 if err != nil {
585 return err
586 }
587 err = wf("+++ %s%s%s", diff.ToFile, toDate, diff.Eol)
588 if err != nil {
589 return err
590 }
591 }
592 }
593 first, last := g[0], g[len(g)-1]
594 range1 := formatRangeUnified(first.I1, last.I2)
595 range2 := formatRangeUnified(first.J1, last.J2)
596 if err := wf("@@ -%s +%s @@%s", range1, range2, diff.Eol); err != nil {
597 return err
598 }
599 for _, c := range g {
600 i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
601 if c.Tag == 'e' {
602 for _, line := range diff.A[i1:i2] {
603 if err := ws(" " + line); err != nil {
604 return err
605 }
606 }
607 continue
608 }
609 if c.Tag == 'r' || c.Tag == 'd' {
610 for _, line := range diff.A[i1:i2] {
611 if err := ws("-" + line); err != nil {
612 return err
613 }
614 }
615 }
616 if c.Tag == 'r' || c.Tag == 'i' {
617 for _, line := range diff.B[j1:j2] {
618 if err := ws("+" + line); err != nil {
619 return err
620 }
621 }
622 }
623 }
624 }
625 return nil
626 }
627
628
629 func GetUnifiedDiffString(diff UnifiedDiff) (string, error) {
630 w := &bytes.Buffer{}
631 err := WriteUnifiedDiff(w, diff)
632 return w.String(), err
633 }
634
635
636 func formatRangeContext(start, stop int) string {
637
638 beginning := start + 1
639 length := stop - start
640 if length == 0 {
641 beginning--
642 }
643 if length <= 1 {
644 return fmt.Sprintf("%d", beginning)
645 }
646 return fmt.Sprintf("%d,%d", beginning, beginning+length-1)
647 }
648
649 type ContextDiff UnifiedDiff
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668 func WriteContextDiff(writer io.Writer, diff ContextDiff) error {
669 buf := bufio.NewWriter(writer)
670 defer buf.Flush()
671 var diffErr error
672 wf := func(format string, args ...interface{}) {
673 _, err := buf.WriteString(fmt.Sprintf(format, args...))
674 if diffErr == nil && err != nil {
675 diffErr = err
676 }
677 }
678 ws := func(s string) {
679 _, err := buf.WriteString(s)
680 if diffErr == nil && err != nil {
681 diffErr = err
682 }
683 }
684
685 if len(diff.Eol) == 0 {
686 diff.Eol = "\n"
687 }
688
689 prefix := map[byte]string{
690 'i': "+ ",
691 'd': "- ",
692 'r': "! ",
693 'e': " ",
694 }
695
696 started := false
697 m := NewMatcher(diff.A, diff.B)
698 for _, g := range m.GetGroupedOpCodes(diff.Context) {
699 if !started {
700 started = true
701 fromDate := ""
702 if len(diff.FromDate) > 0 {
703 fromDate = "\t" + diff.FromDate
704 }
705 toDate := ""
706 if len(diff.ToDate) > 0 {
707 toDate = "\t" + diff.ToDate
708 }
709 if diff.FromFile != "" || diff.ToFile != "" {
710 wf("*** %s%s%s", diff.FromFile, fromDate, diff.Eol)
711 wf("--- %s%s%s", diff.ToFile, toDate, diff.Eol)
712 }
713 }
714
715 first, last := g[0], g[len(g)-1]
716 ws("***************" + diff.Eol)
717
718 range1 := formatRangeContext(first.I1, last.I2)
719 wf("*** %s ****%s", range1, diff.Eol)
720 for _, c := range g {
721 if c.Tag == 'r' || c.Tag == 'd' {
722 for _, cc := range g {
723 if cc.Tag == 'i' {
724 continue
725 }
726 for _, line := range diff.A[cc.I1:cc.I2] {
727 ws(prefix[cc.Tag] + line)
728 }
729 }
730 break
731 }
732 }
733
734 range2 := formatRangeContext(first.J1, last.J2)
735 wf("--- %s ----%s", range2, diff.Eol)
736 for _, c := range g {
737 if c.Tag == 'r' || c.Tag == 'i' {
738 for _, cc := range g {
739 if cc.Tag == 'd' {
740 continue
741 }
742 for _, line := range diff.B[cc.J1:cc.J2] {
743 ws(prefix[cc.Tag] + line)
744 }
745 }
746 break
747 }
748 }
749 }
750 return diffErr
751 }
752
753
754 func GetContextDiffString(diff ContextDiff) (string, error) {
755 w := &bytes.Buffer{}
756 err := WriteContextDiff(w, diff)
757 return w.String(), err
758 }
759
760
761
762 func SplitLines(s string) []string {
763 lines := strings.SplitAfter(s, "\n")
764 lines[len(lines)-1] += "\n"
765 return lines
766 }
767
View as plain text