1
2
3
4
5 package lcs
6
7
8
9 import (
10 "fmt"
11 )
12
13
14 type Diff struct {
15 Start, End int
16 ReplStart, ReplEnd int
17 }
18
19
20
21 func DiffStrings(a, b string) []Diff { return diff(stringSeqs{a, b}) }
22
23
24
25 func DiffBytes(a, b []byte) []Diff { return diff(bytesSeqs{a, b}) }
26
27
28 func DiffRunes(a, b []rune) []Diff { return diff(runesSeqs{a, b}) }
29
30 func diff(seqs sequences) []Diff {
31
32 const maxDiffs = 100
33 diff, _ := compute(seqs, twosided, maxDiffs/2)
34 return diff
35 }
36
37
38
39
40 func compute(seqs sequences, algo func(*editGraph) lcs, limit int) ([]Diff, lcs) {
41 if limit <= 0 {
42 limit = 1 << 25
43 }
44 alen, blen := seqs.lengths()
45 g := &editGraph{
46 seqs: seqs,
47 vf: newtriang(limit),
48 vb: newtriang(limit),
49 limit: limit,
50 ux: alen,
51 uy: blen,
52 delta: alen - blen,
53 }
54 lcs := algo(g)
55 diffs := lcs.toDiffs(alen, blen)
56 return diffs, lcs
57 }
58
59
60 type editGraph struct {
61 seqs sequences
62 vf, vb label
63
64 limit int
65
66 lx, ly, ux, uy int
67 delta int
68 }
69
70
71 func (lcs lcs) toDiffs(alen, blen int) []Diff {
72 var diffs []Diff
73 var pa, pb int
74 for _, l := range lcs {
75 if pa < l.X || pb < l.Y {
76 diffs = append(diffs, Diff{pa, l.X, pb, l.Y})
77 }
78 pa = l.X + l.Len
79 pb = l.Y + l.Len
80 }
81 if pa < alen || pb < blen {
82 diffs = append(diffs, Diff{pa, alen, pb, blen})
83 }
84 return diffs
85 }
86
87
88
89
90
91 func (e *editGraph) fdone(D, k int) (bool, lcs) {
92
93 x := e.vf.get(D, k)
94 y := x - k
95 if x == e.ux && y == e.uy {
96 return true, e.forwardlcs(D, k)
97 }
98 return false, nil
99 }
100
101
102 func forward(e *editGraph) lcs {
103 e.setForward(0, 0, e.lx)
104 if ok, ans := e.fdone(0, 0); ok {
105 return ans
106 }
107
108 for D := 0; D < e.limit; D++ {
109 e.setForward(D+1, -(D + 1), e.getForward(D, -D))
110 if ok, ans := e.fdone(D+1, -(D + 1)); ok {
111 return ans
112 }
113 e.setForward(D+1, D+1, e.getForward(D, D)+1)
114 if ok, ans := e.fdone(D+1, D+1); ok {
115 return ans
116 }
117 for k := -D + 1; k <= D-1; k += 2 {
118
119 lookv := e.lookForward(k, e.getForward(D, k-1)+1)
120 lookh := e.lookForward(k, e.getForward(D, k+1))
121 if lookv > lookh {
122 e.setForward(D+1, k, lookv)
123 } else {
124 e.setForward(D+1, k, lookh)
125 }
126 if ok, ans := e.fdone(D+1, k); ok {
127 return ans
128 }
129 }
130 }
131
132
133
134 kmax := -e.limit - 1
135 diagmax := -1
136 for k := -e.limit; k <= e.limit; k += 2 {
137 x := e.getForward(e.limit, k)
138 y := x - k
139 if x+y > diagmax && x <= e.ux && y <= e.uy {
140 diagmax, kmax = x+y, k
141 }
142 }
143 return e.forwardlcs(e.limit, kmax)
144 }
145
146
147 func (e *editGraph) forwardlcs(D, k int) lcs {
148 var ans lcs
149 for x := e.getForward(D, k); x != 0 || x-k != 0; {
150 if ok(D-1, k-1) && x-1 == e.getForward(D-1, k-1) {
151
152 D, k, x = D-1, k-1, x-1
153 continue
154 } else if ok(D-1, k+1) && x == e.getForward(D-1, k+1) {
155
156 D, k = D-1, k+1
157 continue
158 }
159
160 y := x - k
161 ans = ans.prepend(x+e.lx-1, y+e.ly-1)
162 x--
163 }
164 return ans
165 }
166
167
168
169 func (e *editGraph) lookForward(k, relx int) int {
170 rely := relx - k
171 x, y := relx+e.lx, rely+e.ly
172 if x < e.ux && y < e.uy {
173 x += e.seqs.commonPrefixLen(x, e.ux, y, e.uy)
174 }
175 return x
176 }
177
178 func (e *editGraph) setForward(d, k, relx int) {
179 x := e.lookForward(k, relx)
180 e.vf.set(d, k, x-e.lx)
181 }
182
183 func (e *editGraph) getForward(d, k int) int {
184 x := e.vf.get(d, k)
185 return x
186 }
187
188
189
190
191 func (e *editGraph) bdone(D, k int) (bool, lcs) {
192
193 x := e.vb.get(D, k)
194 y := x - (k + e.delta)
195 if x == 0 && y == 0 {
196 return true, e.backwardlcs(D, k)
197 }
198 return false, nil
199 }
200
201
202 func backward(e *editGraph) lcs {
203 e.setBackward(0, 0, e.ux)
204 if ok, ans := e.bdone(0, 0); ok {
205 return ans
206 }
207
208 for D := 0; D < e.limit; D++ {
209 e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
210 if ok, ans := e.bdone(D+1, -(D + 1)); ok {
211 return ans
212 }
213 e.setBackward(D+1, D+1, e.getBackward(D, D))
214 if ok, ans := e.bdone(D+1, D+1); ok {
215 return ans
216 }
217 for k := -D + 1; k <= D-1; k += 2 {
218
219 lookv := e.lookBackward(k, e.getBackward(D, k-1))
220 lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
221 if lookv < lookh {
222 e.setBackward(D+1, k, lookv)
223 } else {
224 e.setBackward(D+1, k, lookh)
225 }
226 if ok, ans := e.bdone(D+1, k); ok {
227 return ans
228 }
229 }
230 }
231
232
233
234
235 kmax := -e.limit - 1
236 diagmin := 1 << 25
237 for k := -e.limit; k <= e.limit; k += 2 {
238 x := e.getBackward(e.limit, k)
239 y := x - (k + e.delta)
240 if x+y < diagmin && x >= 0 && y >= 0 {
241 diagmin, kmax = x+y, k
242 }
243 }
244 if kmax < -e.limit {
245 panic(fmt.Sprintf("no paths when limit=%d?", e.limit))
246 }
247 return e.backwardlcs(e.limit, kmax)
248 }
249
250
251 func (e *editGraph) backwardlcs(D, k int) lcs {
252 var ans lcs
253 for x := e.getBackward(D, k); x != e.ux || x-(k+e.delta) != e.uy; {
254 if ok(D-1, k-1) && x == e.getBackward(D-1, k-1) {
255
256 D, k = D-1, k-1
257 continue
258 } else if ok(D-1, k+1) && x+1 == e.getBackward(D-1, k+1) {
259
260 D, k, x = D-1, k+1, x+1
261 continue
262 }
263 y := x - (k + e.delta)
264 ans = ans.append(x+e.lx, y+e.ly)
265 x++
266 }
267 return ans
268 }
269
270
271 func (e *editGraph) lookBackward(k, relx int) int {
272 rely := relx - (k + e.delta)
273 x, y := relx+e.lx, rely+e.ly
274 if x > 0 && y > 0 {
275 x -= e.seqs.commonSuffixLen(0, x, 0, y)
276 }
277 return x
278 }
279
280
281 func (e *editGraph) setBackward(d, k, relx int) {
282 x := e.lookBackward(k, relx)
283 e.vb.set(d, k, x-e.lx)
284 }
285
286 func (e *editGraph) getBackward(d, k int) int {
287 x := e.vb.get(d, k)
288 return x
289 }
290
291
292
293 func twosided(e *editGraph) lcs {
294
295
296
297
298 e.setForward(0, 0, e.lx)
299 e.setBackward(0, 0, e.ux)
300
301
302 for D := 0; D < e.limit; D++ {
303
304 if got, ok := e.twoDone(D, D); ok {
305 return e.twolcs(D, D, got)
306 }
307
308 e.setForward(D+1, -(D + 1), e.getForward(D, -D))
309 e.setForward(D+1, D+1, e.getForward(D, D)+1)
310 for k := -D + 1; k <= D-1; k += 2 {
311
312 lookv := e.lookForward(k, e.getForward(D, k-1)+1)
313 lookh := e.lookForward(k, e.getForward(D, k+1))
314 if lookv > lookh {
315 e.setForward(D+1, k, lookv)
316 } else {
317 e.setForward(D+1, k, lookh)
318 }
319 }
320
321 if got, ok := e.twoDone(D+1, D); ok {
322 return e.twolcs(D+1, D, got)
323 }
324
325 e.setBackward(D+1, -(D + 1), e.getBackward(D, -D)-1)
326 e.setBackward(D+1, D+1, e.getBackward(D, D))
327 for k := -D + 1; k <= D-1; k += 2 {
328
329 lookv := e.lookBackward(k, e.getBackward(D, k-1))
330 lookh := e.lookBackward(k, e.getBackward(D, k+1)-1)
331 if lookv < lookh {
332 e.setBackward(D+1, k, lookv)
333 } else {
334 e.setBackward(D+1, k, lookh)
335 }
336 }
337 }
338
339
340
341 kmax := -e.limit - 1
342 diagmax := -1
343 for k := -e.limit; k <= e.limit; k += 2 {
344 x := e.getForward(e.limit, k)
345 y := x - k
346 if x+y > diagmax && x <= e.ux && y <= e.uy {
347 diagmax, kmax = x+y, k
348 }
349 }
350 if kmax < -e.limit {
351 panic(fmt.Sprintf("no forward paths when limit=%d?", e.limit))
352 }
353 lcs := e.forwardlcs(e.limit, kmax)
354
355
356
357 diagmin := 1 << 25
358 for k := -e.limit; k <= e.limit; k += 2 {
359 x := e.getBackward(e.limit, k)
360 y := x - (k + e.delta)
361 if x+y < diagmin && x >= 0 && y >= 0 {
362 diagmin, kmax = x+y, k
363 }
364 }
365 if kmax < -e.limit {
366 panic(fmt.Sprintf("no backward paths when limit=%d?", e.limit))
367 }
368 lcs = append(lcs, e.backwardlcs(e.limit, kmax)...)
369
370 ans := lcs.fix()
371 return ans
372 }
373
374
375 func (e *editGraph) twoDone(df, db int) (int, bool) {
376 if (df+db+e.delta)%2 != 0 {
377 return 0, false
378 }
379 kmin := -db + e.delta
380 if -df > kmin {
381 kmin = -df
382 }
383 kmax := db + e.delta
384 if df < kmax {
385 kmax = df
386 }
387 for k := kmin; k <= kmax; k += 2 {
388 x := e.vf.get(df, k)
389 u := e.vb.get(db, k-e.delta)
390 if u <= x {
391
392 for l := k; l <= kmax; l += 2 {
393 x := e.vf.get(df, l)
394 y := x - l
395 u := e.vb.get(db, l-e.delta)
396 v := u - l
397 if x == u || u == 0 || v == 0 || y == e.uy || x == e.ux {
398 return l, true
399 }
400 }
401 return k, true
402 }
403 }
404 return 0, false
405 }
406
407 func (e *editGraph) twolcs(df, db, kf int) lcs {
408
409 x := e.vf.get(df, kf)
410 y := x - kf
411 kb := kf - e.delta
412 u := e.vb.get(db, kb)
413 v := u - kf
414
415
416
417
418
419
420
421
422
423 if x == u {
424
425
426 lcs := e.forwardlcs(df, kf)
427 lcs = append(lcs, e.backwardlcs(db, kb)...)
428 return lcs.sort()
429 }
430
431
432
433
434 if u > 0 && ok(df-1, u-1-v) && e.vf.get(df-1, u-1-v) == u-1 {
435
436 lcs := e.forwardlcs(df-1, u-1-v)
437 lcs = append(lcs, e.backwardlcs(db, kb)...)
438 return lcs.sort()
439 }
440 if v > 0 && ok(df-1, (u-(v-1))) && e.vf.get(df-1, u-(v-1)) == u {
441
442 lcs := e.forwardlcs(df-1, u-(v-1))
443 lcs = append(lcs, e.backwardlcs(db, kb)...)
444 return lcs.sort()
445 }
446
447
448
449 if u == 0 || v == 0 || x == e.ux || y == e.uy {
450
451 if u == 0 || v == 0 {
452 return e.backwardlcs(db, kb)
453 }
454 return e.forwardlcs(df, kf)
455 }
456
457
458 if x+1 <= e.ux && ok(db-1, x+1-y-e.delta) && e.vb.get(db-1, x+1-y-e.delta) == x+1 {
459
460 lcs := e.backwardlcs(db-1, kb+1)
461 lcs = append(lcs, e.forwardlcs(df, kf)...)
462 return lcs.sort()
463 }
464 if y+1 <= e.uy && ok(db-1, x-(y+1)-e.delta) && e.vb.get(db-1, x-(y+1)-e.delta) == x {
465
466 lcs := e.backwardlcs(db-1, kb-1)
467 lcs = append(lcs, e.forwardlcs(df, kf)...)
468 return lcs.sort()
469 }
470
471
472
473 lcs := e.backwardlcs(db, kb)
474 oldx, oldy := e.ux, e.uy
475 e.ux = u
476 e.uy = v
477 lcs = append(lcs, forward(e)...)
478 e.ux, e.uy = oldx, oldy
479 return lcs.sort()
480 }
481
View as plain text