difflib.go

Documentation: go.mongodb.org/mongo-driver/internal/assert

     1  // Copied from https://github.com/pmezard/go-difflib/blob/5d4384ee4fb2527b0a1256a821ebfc92f91efefc/difflib/difflib.go
     2  
     3  // Copyright 2013 Patrick Mezard. All rights reserved. Use of this source code is
     4  // governed by a license that can be found in the THIRD-PARTY-NOTICES file.
     5  
     6  package assert
     7  
     8  import (
     9  	"bufio"
    10  	"bytes"
    11  	"fmt"
    12  	"io"
    13  	"strings"
    14  )
    15  
    16  func min(a, b int) int {
    17  	if a < b {
    18  		return a
    19  	}
    20  	return b
    21  }
    22  
    23  func max(a, b int) int {
    24  	if a > b {
    25  		return a
    26  	}
    27  	return b
    28  }
    29  
    30  func calculateRatio(matches, length int) float64 {
    31  	if length > 0 {
    32  		return 2.0 * float64(matches) / float64(length)
    33  	}
    34  	return 1.0
    35  }
    36  
    37  type Match struct {
    38  	A    int
    39  	B    int
    40  	Size int
    41  }
    42  
    43  type OpCode struct {
    44  	Tag byte
    45  	I1  int
    46  	I2  int
    47  	J1  int
    48  	J2  int
    49  }
    50  
    51  // SequenceMatcher compares sequence of strings. The basic
    52  // algorithm predates, and is a little fancier than, an algorithm
    53  // published in the late 1980's by Ratcliff and Obershelp under the
    54  // hyperbolic name "gestalt pattern matching".  The basic idea is to find
    55  // the longest contiguous matching subsequence that contains no "junk"
    56  // elements (R-O doesn't address junk).  The same idea is then applied
    57  // recursively to the pieces of the sequences to the left and to the right
    58  // of the matching subsequence.  This does not yield minimal edit
    59  // sequences, but does tend to yield matches that "look right" to people.
    60  //
    61  // SequenceMatcher tries to compute a "human-friendly diff" between two
    62  // sequences.  Unlike e.g. UNIX(tm) diff, the fundamental notion is the
    63  // longest *contiguous* & junk-free matching subsequence.  That's what
    64  // catches peoples' eyes.  The Windows(tm) windiff has another interesting
    65  // notion, pairing up elements that appear uniquely in each sequence.
    66  // That, and the method here, appear to yield more intuitive difference
    67  // reports than does diff.  This method appears to be the least vulnerable
    68  // to syncing up on blocks of "junk lines", though (like blank lines in
    69  // ordinary text files, or maybe "<P>" lines in HTML files).  That may be
    70  // because this is the only method of the 3 that has a *concept* of
    71  // "junk" <wink>.
    72  //
    73  // Timing:  Basic R-O is cubic time worst case and quadratic time expected
    74  // case.  SequenceMatcher is quadratic time for the worst case and has
    75  // expected-case behavior dependent in a complicated way on how many
    76  // elements the sequences have in common; best case time is linear.
    77  type SequenceMatcher struct {
    78  	a              []string
    79  	b              []string
    80  	b2j            map[string][]int
    81  	IsJunk         func(string) bool
    82  	autoJunk       bool
    83  	bJunk          map[string]struct{}
    84  	matchingBlocks []Match
    85  	fullBCount     map[string]int
    86  	bPopular       map[string]struct{}
    87  	opCodes        []OpCode
    88  }
    89  
    90  func NewMatcher(a, b []string) *SequenceMatcher {
    91  	m := SequenceMatcher{autoJunk: true}
    92  	m.SetSeqs(a, b)
    93  	return &m
    94  }
    95  
    96  func NewMatcherWithJunk(a, b []string, autoJunk bool,
    97  	isJunk func(string) bool) *SequenceMatcher {
    98  
    99  	m := SequenceMatcher{IsJunk: isJunk, autoJunk: autoJunk}
   100  	m.SetSeqs(a, b)
   101  	return &m
   102  }
   103  
   104  // SetSeqs sets the two sequences to be compared.
   105  func (m *SequenceMatcher) SetSeqs(a, b []string) {
   106  	m.SetSeq1(a)
   107  	m.SetSeq2(b)
   108  }
   109  
   110  // SetSeq1 sets the first sequence to be compared. The second sequence to be compared is
   111  // not changed.
   112  //
   113  // SequenceMatcher computes and caches detailed information about the second
   114  // sequence, so if you want to compare one sequence S against many sequences,
   115  // use .SetSeq2(s) once and call .SetSeq1(x) repeatedly for each of the other
   116  // sequences.
   117  //
   118  // See also SetSeqs() and SetSeq2().
   119  func (m *SequenceMatcher) SetSeq1(a []string) {
   120  	if &a == &m.a {
   121  		return
   122  	}
   123  	m.a = a
   124  	m.matchingBlocks = nil
   125  	m.opCodes = nil
   126  }
   127  
   128  // SetSeq2 sets the second sequence to be compared. The first sequence to be compared is
   129  // not changed.
   130  func (m *SequenceMatcher) SetSeq2(b []string) {
   131  	if &b == &m.b {
   132  		return
   133  	}
   134  	m.b = b
   135  	m.matchingBlocks = nil
   136  	m.opCodes = nil
   137  	m.fullBCount = nil
   138  	m.chainB()
   139  }
   140  
   141  func (m *SequenceMatcher) chainB() {
   142  	// Populate line -> index mapping
   143  	b2j := map[string][]int{}
   144  	for i, s := range m.b {
   145  		indices := b2j[s]
   146  		indices = append(indices, i)
   147  		b2j[s] = indices
   148  	}
   149  
   150  	// Purge junk elements
   151  	m.bJunk = map[string]struct{}{}
   152  	if m.IsJunk != nil {
   153  		junk := m.bJunk
   154  		for s := range b2j {
   155  			if m.IsJunk(s) {
   156  				junk[s] = struct{}{}
   157  			}
   158  		}
   159  		for s := range junk {
   160  			delete(b2j, s)
   161  		}
   162  	}
   163  
   164  	// Purge remaining popular elements
   165  	popular := map[string]struct{}{}
   166  	n := len(m.b)
   167  	if m.autoJunk && n >= 200 {
   168  		ntest := n/100 + 1
   169  		for s, indices := range b2j {
   170  			if len(indices) > ntest {
   171  				popular[s] = struct{}{}
   172  			}
   173  		}
   174  		for s := range popular {
   175  			delete(b2j, s)
   176  		}
   177  	}
   178  	m.bPopular = popular
   179  	m.b2j = b2j
   180  }
   181  
   182  func (m *SequenceMatcher) isBJunk(s string) bool {
   183  	_, ok := m.bJunk[s]
   184  	return ok
   185  }
   186  
   187  // Find longest matching block in a[alo:ahi] and b[blo:bhi].
   188  //
   189  // If IsJunk is not defined:
   190  //
   191  // Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
   192  //
   193  //	alo <= i <= i+k <= ahi
   194  //	blo <= j <= j+k <= bhi
   195  //
   196  // and for all (i',j',k') meeting those conditions,
   197  //
   198  //	k >= k'
   199  //	i <= i'
   200  //	and if i == i', j <= j'
   201  //
   202  // In other words, of all maximal matching blocks, return one that
   203  // starts earliest in a, and of all those maximal matching blocks that
   204  // start earliest in a, return the one that starts earliest in b.
   205  //
   206  // If IsJunk is defined, first the longest matching block is
   207  // determined as above, but with the additional restriction that no
   208  // junk element appears in the block.  Then that block is extended as
   209  // far as possible by matching (only) junk elements on both sides.  So
   210  // the resulting block never matches on junk except as identical junk
   211  // happens to be adjacent to an "interesting" match.
   212  //
   213  // If no blocks match, return (alo, blo, 0).
   214  func (m *SequenceMatcher) findLongestMatch(alo, ahi, blo, bhi int) Match {
   215  	// CAUTION:  stripping common prefix or suffix would be incorrect.
   216  	// E.g.,
   217  	//    ab
   218  	//    acab
   219  	// Longest matching block is "ab", but if common prefix is
   220  	// stripped, it's "a" (tied with "b").  UNIX(tm) diff does so
   221  	// strip, so ends up claiming that ab is changed to acab by
   222  	// inserting "ca" in the middle.  That's minimal but unintuitive:
   223  	// "it's obvious" that someone inserted "ac" at the front.
   224  	// Windiff ends up at the same place as diff, but by pairing up
   225  	// the unique 'b's and then matching the first two 'a's.
   226  	besti, bestj, bestsize := alo, blo, 0
   227  
   228  	// find longest junk-free match
   229  	// during an iteration of the loop, j2len[j] = length of longest
   230  	// junk-free match ending with a[i-1] and b[j]
   231  	j2len := map[int]int{}
   232  	for i := alo; i != ahi; i++ {
   233  		// look at all instances of a[i] in b; note that because
   234  		// b2j has no junk keys, the loop is skipped if a[i] is junk
   235  		newj2len := map[int]int{}
   236  		for _, j := range m.b2j[m.a[i]] {
   237  			// a[i] matches b[j]
   238  			if j < blo {
   239  				continue
   240  			}
   241  			if j >= bhi {
   242  				break
   243  			}
   244  			k := j2len[j-1] + 1
   245  			newj2len[j] = k
   246  			if k > bestsize {
   247  				besti, bestj, bestsize = i-k+1, j-k+1, k
   248  			}
   249  		}
   250  		j2len = newj2len
   251  	}
   252  
   253  	// Extend the best by non-junk elements on each end.  In particular,
   254  	// "popular" non-junk elements aren't in b2j, which greatly speeds
   255  	// the inner loop above, but also means "the best" match so far
   256  	// doesn't contain any junk *or* popular non-junk elements.
   257  	for besti > alo && bestj > blo && !m.isBJunk(m.b[bestj-1]) &&
   258  		m.a[besti-1] == m.b[bestj-1] {
   259  		besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
   260  	}
   261  	for besti+bestsize < ahi && bestj+bestsize < bhi &&
   262  		!m.isBJunk(m.b[bestj+bestsize]) &&
   263  		m.a[besti+bestsize] == m.b[bestj+bestsize] {
   264  		bestsize++
   265  	}
   266  
   267  	// Now that we have a wholly interesting match (albeit possibly
   268  	// empty!), we may as well suck up the matching junk on each
   269  	// side of it too.  Can't think of a good reason not to, and it
   270  	// saves post-processing the (possibly considerable) expense of
   271  	// figuring out what to do with it.  In the case of an empty
   272  	// interesting match, this is clearly the right thing to do,
   273  	// because no other kind of match is possible in the regions.
   274  	for besti > alo && bestj > blo && m.isBJunk(m.b[bestj-1]) &&
   275  		m.a[besti-1] == m.b[bestj-1] {
   276  		besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
   277  	}
   278  	for besti+bestsize < ahi && bestj+bestsize < bhi &&
   279  		m.isBJunk(m.b[bestj+bestsize]) &&
   280  		m.a[besti+bestsize] == m.b[bestj+bestsize] {
   281  		bestsize++
   282  	}
   283  
   284  	return Match{A: besti, B: bestj, Size: bestsize}
   285  }
   286  
   287  // GetMatchingBlocks returns list of triples describing matching subsequences.
   288  //
   289  // Each triple is of the form (i, j, n), and means that
   290  // a[i:i+n] == b[j:j+n].  The triples are monotonically increasing in
   291  // i and in j. It's also guaranteed that if (i, j, n) and (i', j', n') are
   292  // adjacent triples in the list, and the second is not the last triple in the
   293  // list, then i+n != i' or j+n != j'. IOW, adjacent triples never describe
   294  // adjacent equal blocks.
   295  //
   296  // The last triple is a dummy, (len(a), len(b), 0), and is the only
   297  // triple with n==0.
   298  func (m *SequenceMatcher) GetMatchingBlocks() []Match {
   299  	if m.matchingBlocks != nil {
   300  		return m.matchingBlocks
   301  	}
   302  
   303  	var matchBlocks func(alo, ahi, blo, bhi int, matched []Match) []Match
   304  	matchBlocks = func(alo, ahi, blo, bhi int, matched []Match) []Match {
   305  		match := m.findLongestMatch(alo, ahi, blo, bhi)
   306  		i, j, k := match.A, match.B, match.Size
   307  		if match.Size > 0 {
   308  			if alo < i && blo < j {
   309  				matched = matchBlocks(alo, i, blo, j, matched)
   310  			}
   311  			matched = append(matched, match)
   312  			if i+k < ahi && j+k < bhi {
   313  				matched = matchBlocks(i+k, ahi, j+k, bhi, matched)
   314  			}
   315  		}
   316  		return matched
   317  	}
   318  	matched := matchBlocks(0, len(m.a), 0, len(m.b), nil)
   319  
   320  	// It's possible that we have adjacent equal blocks in the
   321  	// matching_blocks list now.
   322  	nonAdjacent := []Match{}
   323  	i1, j1, k1 := 0, 0, 0
   324  	for _, b := range matched {
   325  		// Is this block adjacent to i1, j1, k1?
   326  		i2, j2, k2 := b.A, b.B, b.Size
   327  		if i1+k1 == i2 && j1+k1 == j2 {
   328  			// Yes, so collapse them -- this just increases the length of
   329  			// the first block by the length of the second, and the first
   330  			// block so lengthened remains the block to compare against.
   331  			k1 += k2
   332  		} else {
   333  			// Not adjacent.  Remember the first block (k1==0 means it's
   334  			// the dummy we started with), and make the second block the
   335  			// new block to compare against.
   336  			if k1 > 0 {
   337  				nonAdjacent = append(nonAdjacent, Match{i1, j1, k1})
   338  			}
   339  			i1, j1, k1 = i2, j2, k2
   340  		}
   341  	}
   342  	if k1 > 0 {
   343  		nonAdjacent = append(nonAdjacent, Match{i1, j1, k1})
   344  	}
   345  
   346  	nonAdjacent = append(nonAdjacent, Match{len(m.a), len(m.b), 0})
   347  	m.matchingBlocks = nonAdjacent
   348  	return m.matchingBlocks
   349  }
   350  
   351  // GetOpCodes returns a list of 5-tuples describing how to turn a into b.
   352  //
   353  // Each tuple is of the form (tag, i1, i2, j1, j2).  The first tuple
   354  // has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the
   355  // tuple preceding it, and likewise for j1 == the previous j2.
   356  //
   357  // The tags are characters, with these meanings:
   358  //
   359  // 'r' (replace):  a[i1:i2] should be replaced by b[j1:j2]
   360  //
   361  // 'd' (delete):   a[i1:i2] should be deleted, j1==j2 in this case.
   362  //
   363  // 'i' (insert):   b[j1:j2] should be inserted at a[i1:i1], i1==i2 in this case.
   364  //
   365  // 'e' (equal):    a[i1:i2] == b[j1:j2]
   366  func (m *SequenceMatcher) GetOpCodes() []OpCode {
   367  	if m.opCodes != nil {
   368  		return m.opCodes
   369  	}
   370  	i, j := 0, 0
   371  	matching := m.GetMatchingBlocks()
   372  	opCodes := make([]OpCode, 0, len(matching))
   373  	for _, m := range matching {
   374  		//  invariant:  we've pumped out correct diffs to change
   375  		//  a[:i] into b[:j], and the next matching block is
   376  		//  a[ai:ai+size] == b[bj:bj+size]. So we need to pump
   377  		//  out a diff to change a[i:ai] into b[j:bj], pump out
   378  		//  the matching block, and move (i,j) beyond the match
   379  		ai, bj, size := m.A, m.B, m.Size
   380  		tag := byte(0)
   381  		if i < ai && j < bj {
   382  			tag = 'r'
   383  		} else if i < ai {
   384  			tag = 'd'
   385  		} else if j < bj {
   386  			tag = 'i'
   387  		}
   388  		if tag > 0 {
   389  			opCodes = append(opCodes, OpCode{tag, i, ai, j, bj})
   390  		}
   391  		i, j = ai+size, bj+size
   392  		// the list of matching blocks is terminated by a
   393  		// sentinel with size 0
   394  		if size > 0 {
   395  			opCodes = append(opCodes, OpCode{'e', ai, i, bj, j})
   396  		}
   397  	}
   398  	m.opCodes = opCodes
   399  	return m.opCodes
   400  }
   401  
   402  // GetGroupedOpCodes isolates change clusters by eliminating ranges with no changes.
   403  //
   404  // Returns a generator of groups with up to n lines of context.
   405  // Each group is in the same format as returned by GetOpCodes().
   406  func (m *SequenceMatcher) GetGroupedOpCodes(n int) [][]OpCode {
   407  	if n < 0 {
   408  		n = 3
   409  	}
   410  	codes := m.GetOpCodes()
   411  	if len(codes) == 0 {
   412  		codes = []OpCode{{'e', 0, 1, 0, 1}}
   413  	}
   414  	// Fixup leading and trailing groups if they show no changes.
   415  	if codes[0].Tag == 'e' {
   416  		c := codes[0]
   417  		i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
   418  		codes[0] = OpCode{c.Tag, max(i1, i2-n), i2, max(j1, j2-n), j2}
   419  	}
   420  	if codes[len(codes)-1].Tag == 'e' {
   421  		c := codes[len(codes)-1]
   422  		i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
   423  		codes[len(codes)-1] = OpCode{c.Tag, i1, min(i2, i1+n), j1, min(j2, j1+n)}
   424  	}
   425  	nn := n + n
   426  	groups := [][]OpCode{}
   427  	group := []OpCode{}
   428  	for _, c := range codes {
   429  		i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
   430  		// End the current group and start a new one whenever
   431  		// there is a large range with no changes.
   432  		if c.Tag == 'e' && i2-i1 > nn {
   433  			group = append(group, OpCode{c.Tag, i1, min(i2, i1+n),
   434  				j1, min(j2, j1+n)})
   435  			groups = append(groups, group)
   436  			group = []OpCode{}
   437  			i1, j1 = max(i1, i2-n), max(j1, j2-n)
   438  		}
   439  		group = append(group, OpCode{c.Tag, i1, i2, j1, j2})
   440  	}
   441  	if len(group) > 0 && !(len(group) == 1 && group[0].Tag == 'e') {
   442  		groups = append(groups, group)
   443  	}
   444  	return groups
   445  }
   446  
   447  // Ratio returns a measure of the sequences' similarity (float in [0,1]).
   448  //
   449  // Where T is the total number of elements in both sequences, and
   450  // M is the number of matches, this is 2.0*M / T.
   451  // Note that this is 1 if the sequences are identical, and 0 if
   452  // they have nothing in common.
   453  //
   454  // .Ratio() is expensive to compute if you haven't already computed
   455  // .GetMatchingBlocks() or .GetOpCodes(), in which case you may
   456  // want to try .QuickRatio() or .RealQuickRation() first to get an
   457  // upper bound.
   458  func (m *SequenceMatcher) Ratio() float64 {
   459  	matches := 0
   460  	for _, m := range m.GetMatchingBlocks() {
   461  		matches += m.Size
   462  	}
   463  	return calculateRatio(matches, len(m.a)+len(m.b))
   464  }
   465  
   466  // QuickRatio returns an upper bound on ratio() relatively quickly.
   467  //
   468  // This isn't defined beyond that it is an upper bound on .Ratio(), and
   469  // is faster to compute.
   470  func (m *SequenceMatcher) QuickRatio() float64 {
   471  	// viewing a and b as multisets, set matches to the cardinality
   472  	// of their intersection; this counts the number of matches
   473  	// without regard to order, so is clearly an upper bound
   474  	if m.fullBCount == nil {
   475  		m.fullBCount = map[string]int{}
   476  		for _, s := range m.b {
   477  			m.fullBCount[s] = m.fullBCount[s] + 1
   478  		}
   479  	}
   480  
   481  	// avail[x] is the number of times x appears in 'b' less the
   482  	// number of times we've seen it in 'a' so far ... kinda
   483  	avail := map[string]int{}
   484  	matches := 0
   485  	for _, s := range m.a {
   486  		n, ok := avail[s]
   487  		if !ok {
   488  			n = m.fullBCount[s]
   489  		}
   490  		avail[s] = n - 1
   491  		if n > 0 {
   492  			matches++
   493  		}
   494  	}
   495  	return calculateRatio(matches, len(m.a)+len(m.b))
   496  }
   497  
   498  // RealQuickRatio returns an upper bound on ratio() very quickly.
   499  //
   500  // This isn't defined beyond that it is an upper bound on .Ratio(), and
   501  // is faster to compute than either .Ratio() or .QuickRatio().
   502  func (m *SequenceMatcher) RealQuickRatio() float64 {
   503  	la, lb := len(m.a), len(m.b)
   504  	return calculateRatio(min(la, lb), la+lb)
   505  }
   506  
   507  // Convert range to the "ed" format
   508  func formatRangeUnified(start, stop int) string {
   509  	// Per the diff spec at http://www.unix.org/single_unix_specification/
   510  	beginning := start + 1 // lines start numbering with one
   511  	length := stop - start
   512  	if length == 1 {
   513  		return fmt.Sprintf("%d", beginning)
   514  	}
   515  	if length == 0 {
   516  		beginning-- // empty ranges begin at line just before the range
   517  	}
   518  	return fmt.Sprintf("%d,%d", beginning, length)
   519  }
   520  
   521  // UnifiedDiff represents the unified diff parameters.
   522  type UnifiedDiff struct {
   523  	A        []string // First sequence lines
   524  	FromFile string   // First file name
   525  	FromDate string   // First file time
   526  	B        []string // Second sequence lines
   527  	ToFile   string   // Second file name
   528  	ToDate   string   // Second file time
   529  	Eol      string   // Headers end of line, defaults to LF
   530  	Context  int      // Number of context lines
   531  }
   532  
   533  // WriteUnifiedDiff compares two sequences of lines; generates the delta as
   534  // a unified diff.
   535  //
   536  // Unified diffs are a compact way of showing line changes and a few
   537  // lines of context.  The number of context lines is set by 'n' which
   538  // defaults to three.
   539  //
   540  // By default, the diff control lines (those with ---, +++, or @@) are
   541  // created with a trailing newline.  This is helpful so that inputs
   542  // created from file.readlines() result in diffs that are suitable for
   543  // file.writelines() since both the inputs and outputs have trailing
   544  // newlines.
   545  //
   546  // For inputs that do not have trailing newlines, set the lineterm
   547  // argument to "" so that the output will be uniformly newline free.
   548  //
   549  // The unidiff format normally has a header for filenames and modification
   550  // times.  Any or all of these may be specified using strings for
   551  // 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
   552  // The modification times are normally expressed in the ISO 8601 format.
   553  func WriteUnifiedDiff(writer io.Writer, diff UnifiedDiff) error {
   554  	buf := bufio.NewWriter(writer)
   555  	defer buf.Flush()
   556  	wf := func(format string, args ...interface{}) error {
   557  		_, err := buf.WriteString(fmt.Sprintf(format, args...))
   558  		return err
   559  	}
   560  	ws := func(s string) error {
   561  		_, err := buf.WriteString(s)
   562  		return err
   563  	}
   564  
   565  	if len(diff.Eol) == 0 {
   566  		diff.Eol = "\n"
   567  	}
   568  
   569  	started := false
   570  	m := NewMatcher(diff.A, diff.B)
   571  	for _, g := range m.GetGroupedOpCodes(diff.Context) {
   572  		if !started {
   573  			started = true
   574  			fromDate := ""
   575  			if len(diff.FromDate) > 0 {
   576  				fromDate = "\t" + diff.FromDate
   577  			}
   578  			toDate := ""
   579  			if len(diff.ToDate) > 0 {
   580  				toDate = "\t" + diff.ToDate
   581  			}
   582  			if diff.FromFile != "" || diff.ToFile != "" {
   583  				err := wf("--- %s%s%s", diff.FromFile, fromDate, diff.Eol)
   584  				if err != nil {
   585  					return err
   586  				}
   587  				err = wf("+++ %s%s%s", diff.ToFile, toDate, diff.Eol)
   588  				if err != nil {
   589  					return err
   590  				}
   591  			}
   592  		}
   593  		first, last := g[0], g[len(g)-1]
   594  		range1 := formatRangeUnified(first.I1, last.I2)
   595  		range2 := formatRangeUnified(first.J1, last.J2)
   596  		if err := wf("@@ -%s +%s @@%s", range1, range2, diff.Eol); err != nil {
   597  			return err
   598  		}
   599  		for _, c := range g {
   600  			i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
   601  			if c.Tag == 'e' {
   602  				for _, line := range diff.A[i1:i2] {
   603  					if err := ws(" " + line); err != nil {
   604  						return err
   605  					}
   606  				}
   607  				continue
   608  			}
   609  			if c.Tag == 'r' || c.Tag == 'd' {
   610  				for _, line := range diff.A[i1:i2] {
   611  					if err := ws("-" + line); err != nil {
   612  						return err
   613  					}
   614  				}
   615  			}
   616  			if c.Tag == 'r' || c.Tag == 'i' {
   617  				for _, line := range diff.B[j1:j2] {
   618  					if err := ws("+" + line); err != nil {
   619  						return err
   620  					}
   621  				}
   622  			}
   623  		}
   624  	}
   625  	return nil
   626  }
   627  
   628  // GetUnifiedDiffString is like WriteUnifiedDiff but returns the diff as a string.
   629  func GetUnifiedDiffString(diff UnifiedDiff) (string, error) {
   630  	w := &bytes.Buffer{}
   631  	err := WriteUnifiedDiff(w, diff)
   632  	return w.String(), err
   633  }
   634  
   635  // Convert range to the "ed" format.
   636  func formatRangeContext(start, stop int) string {
   637  	// Per the diff spec at http://www.unix.org/single_unix_specification/
   638  	beginning := start + 1 // lines start numbering with one
   639  	length := stop - start
   640  	if length == 0 {
   641  		beginning-- // empty ranges begin at line just before the range
   642  	}
   643  	if length <= 1 {
   644  		return fmt.Sprintf("%d", beginning)
   645  	}
   646  	return fmt.Sprintf("%d,%d", beginning, beginning+length-1)
   647  }
   648  
   649  type ContextDiff UnifiedDiff
   650  
   651  // WriteContextDiff compares two sequences of lines; generates the delta as a context diff.
   652  //
   653  // Context diffs are a compact way of showing line changes and a few
   654  // lines of context. The number of context lines is set by diff.Context
   655  // which defaults to three.
   656  //
   657  // By default, the diff control lines (those with *** or ---) are
   658  // created with a trailing newline.
   659  //
   660  // For inputs that do not have trailing newlines, set the diff.Eol
   661  // argument to "" so that the output will be uniformly newline free.
   662  //
   663  // The context diff format normally has a header for filenames and
   664  // modification times.  Any or all of these may be specified using
   665  // strings for diff.FromFile, diff.ToFile, diff.FromDate, diff.ToDate.
   666  // The modification times are normally expressed in the ISO 8601 format.
   667  // If not specified, the strings default to blanks.
   668  func WriteContextDiff(writer io.Writer, diff ContextDiff) error {
   669  	buf := bufio.NewWriter(writer)
   670  	defer buf.Flush()
   671  	var diffErr error
   672  	wf := func(format string, args ...interface{}) {
   673  		_, err := buf.WriteString(fmt.Sprintf(format, args...))
   674  		if diffErr == nil && err != nil {
   675  			diffErr = err
   676  		}
   677  	}
   678  	ws := func(s string) {
   679  		_, err := buf.WriteString(s)
   680  		if diffErr == nil && err != nil {
   681  			diffErr = err
   682  		}
   683  	}
   684  
   685  	if len(diff.Eol) == 0 {
   686  		diff.Eol = "\n"
   687  	}
   688  
   689  	prefix := map[byte]string{
   690  		'i': "+ ",
   691  		'd': "- ",
   692  		'r': "! ",
   693  		'e': "  ",
   694  	}
   695  
   696  	started := false
   697  	m := NewMatcher(diff.A, diff.B)
   698  	for _, g := range m.GetGroupedOpCodes(diff.Context) {
   699  		if !started {
   700  			started = true
   701  			fromDate := ""
   702  			if len(diff.FromDate) > 0 {
   703  				fromDate = "\t" + diff.FromDate
   704  			}
   705  			toDate := ""
   706  			if len(diff.ToDate) > 0 {
   707  				toDate = "\t" + diff.ToDate
   708  			}
   709  			if diff.FromFile != "" || diff.ToFile != "" {
   710  				wf("*** %s%s%s", diff.FromFile, fromDate, diff.Eol)
   711  				wf("--- %s%s%s", diff.ToFile, toDate, diff.Eol)
   712  			}
   713  		}
   714  
   715  		first, last := g[0], g[len(g)-1]
   716  		ws("***************" + diff.Eol)
   717  
   718  		range1 := formatRangeContext(first.I1, last.I2)
   719  		wf("*** %s ****%s", range1, diff.Eol)
   720  		for _, c := range g {
   721  			if c.Tag == 'r' || c.Tag == 'd' {
   722  				for _, cc := range g {
   723  					if cc.Tag == 'i' {
   724  						continue
   725  					}
   726  					for _, line := range diff.A[cc.I1:cc.I2] {
   727  						ws(prefix[cc.Tag] + line)
   728  					}
   729  				}
   730  				break
   731  			}
   732  		}
   733  
   734  		range2 := formatRangeContext(first.J1, last.J2)
   735  		wf("--- %s ----%s", range2, diff.Eol)
   736  		for _, c := range g {
   737  			if c.Tag == 'r' || c.Tag == 'i' {
   738  				for _, cc := range g {
   739  					if cc.Tag == 'd' {
   740  						continue
   741  					}
   742  					for _, line := range diff.B[cc.J1:cc.J2] {
   743  						ws(prefix[cc.Tag] + line)
   744  					}
   745  				}
   746  				break
   747  			}
   748  		}
   749  	}
   750  	return diffErr
   751  }
   752  
   753  // GetContextDiffString is like WriteContextDiff but returns the diff as a string.
   754  func GetContextDiffString(diff ContextDiff) (string, error) {
   755  	w := &bytes.Buffer{}
   756  	err := WriteContextDiff(w, diff)
   757  	return w.String(), err
   758  }
   759  
   760  // SplitLines splits a string on "\n" while preserving them. The output can be used
   761  // as input for UnifiedDiff and ContextDiff structures.
   762  func SplitLines(s string) []string {
   763  	lines := strings.SplitAfter(s, "\n")
   764  	lines[len(lines)-1] += "\n"
   765  	return lines
   766  }
   767
View as plain text