From: Keita Oda <ainsophyao@gmail•com>
To: git@vger•kernel.org
Cc: Keita ODA <ainsophyao@gmail•com>
Subject: [RFC PATCH 2/3] diff: render word-diff-align pairs for RFC review
Date: Wed, 27 May 2026 13:24:01 +0900 [thread overview]
Message-ID: <20260527042402.13607-3-ainsophyao@gmail.com> (raw)
In-Reply-To: <20260527042402.13607-1-ainsophyao@gmail.com>
From: Keita ODA <ainsophyao@gmail•com>
Teach the RFC prototype to render selected --word-diff-align pairs with
word-diff-like markers.
This renderer is deliberately small and local to the RFC. It exists to make
the recovered line pairs inspectable in review output. It is not meant to be
the final UI. A production version should likely reuse the existing word-diff
machinery once the line-pairing question is settled.
The renderer computes a token LCS for the selected pair and marks the unmatched
spans with the familiar plain word-diff delimiters:
[-old-]
{+new+}
Moved selected pairs are also marked with DIFF_SYMBOL_MOVED_LINE so that the
current moved-line coloring can show that the pair came from a moved region.
---
diff.c | 213 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 200 insertions(+), 13 deletions(-)
diff --git a/diff.c b/diff.c
index 6b8744920..8629d4670 100644
--- a/diff.c
+++ b/diff.c
@@ -1811,6 +1811,104 @@ static void word_diff_align_add_candidate(struct word_diff_align_candidate **can
candidate->line_shared = line_shared;
}
+/*
+ * RFC-only formatter for exposing the selected line pairs. The final
+ * presentation should reuse the normal word-diff machinery instead of this
+ * small debug renderer.
+ */
+struct word_diff_align_debug_token {
+ int start;
+ int end;
+};
+
+static void word_diff_align_debug_collect_tokens(const struct emitted_diff_symbol *line,
+ struct word_diff_align_debug_token **tokens,
+ int *tokens_nr, int *tokens_alloc)
+{
+ int len = word_diff_align_payload_len(line);
+ int pos = 0, start, end;
+
+ while (word_diff_align_next_token(line->line, len, &pos, &start, &end)) {
+ ALLOC_GROW(*tokens, *tokens_nr + 1, *tokens_alloc);
+ (*tokens)[*tokens_nr].start = start;
+ (*tokens)[*tokens_nr].end = end;
+ (*tokens_nr)++;
+ }
+}
+
+static void word_diff_align_debug_add_span(struct strbuf *out,
+ const char *open,
+ const char *line, int len,
+ const char *close)
+{
+ if (!len)
+ return;
+ strbuf_addstr(out, open);
+ strbuf_add(out, line, len);
+ strbuf_addstr(out, close);
+}
+
+static int word_diff_align_debug_token_eq(const struct emitted_diff_symbol *a,
+ const struct word_diff_align_debug_token *a_tok,
+ const struct emitted_diff_symbol *b,
+ const struct word_diff_align_debug_token *b_tok)
+{
+ int a_len = a_tok->end - a_tok->start;
+ int b_len = b_tok->end - b_tok->start;
+
+ return a_len == b_len &&
+ !memcmp(a->line + a_tok->start, b->line + b_tok->start, a_len);
+}
+
+static void word_diff_align_debug_rewrite_line(struct emitted_diff_symbol *line,
+ struct word_diff_align_debug_token *tokens,
+ int tokens_nr, int *match_to,
+ const struct emitted_diff_symbol *other,
+ struct word_diff_align_debug_token *other_tokens,
+ const char *open, const char *close)
+{
+ struct strbuf out = STRBUF_INIT;
+ char *old_line = (char *)line->line;
+ int payload_len = word_diff_align_payload_len(line);
+ int other_pos = 0;
+ int other_payload_len = word_diff_align_payload_len(other);
+ int pos = 0, i;
+ size_t new_len;
+
+ for (i = 0; i < tokens_nr; i++) {
+ int other_i = match_to[i];
+ int gap_len, other_gap_len;
+
+ if (other_i < 0)
+ continue;
+ gap_len = tokens[i].start - pos;
+ other_gap_len = other_tokens[other_i].start - other_pos;
+ if (gap_len == other_gap_len &&
+ !memcmp(line->line + pos, other->line + other_pos, gap_len))
+ strbuf_add(&out, line->line + pos, gap_len);
+ else
+ word_diff_align_debug_add_span(&out, open,
+ line->line + pos,
+ gap_len, close);
+ strbuf_add(&out, line->line + tokens[i].start,
+ tokens[i].end - tokens[i].start);
+ pos = tokens[i].end;
+ other_pos = other_tokens[other_i].end;
+ }
+ if (payload_len - pos == other_payload_len - other_pos &&
+ !memcmp(line->line + pos, other->line + other_pos,
+ payload_len - pos))
+ strbuf_add(&out, line->line + pos, payload_len - pos);
+ else
+ word_diff_align_debug_add_span(&out, open, line->line + pos,
+ payload_len - pos, close);
+ strbuf_add(&out, line->line + payload_len, line->len - payload_len);
+
+ line->line = strbuf_detach(&out, &new_len);
+ line->len = (int)new_len;
+ free(old_line);
+}
+
static void word_diff_align_debug_append_comment(struct emitted_diff_symbol *line,
const struct strbuf *suffix)
{
@@ -1835,25 +1933,114 @@ static void word_diff_align_debug_mark_pair(struct emitted_diff_symbol *minus_li
struct emitted_diff_symbol *plus_line,
int minus_lineno, int plus_lineno,
int changed, int moved,
- int window_score,
- int line_score,
- int pair_score)
-{
- struct strbuf suffix = STRBUF_INIT;
+ int window_score,
+ int line_score,
+ int pair_score)
+{
+ struct word_diff_align_debug_token *minus_tokens = NULL, *plus_tokens = NULL;
+ int minus_tokens_nr = 0, minus_tokens_alloc = 0;
+ int plus_tokens_nr = 0, plus_tokens_alloc = 0;
+ int *minus_match_to = NULL, *plus_match_to = NULL;
+ int *lcs = NULL;
+ struct emitted_diff_symbol minus_original = *minus_line;
+ struct emitted_diff_symbol plus_original = *plus_line;
+ int i, j, columns;
if (moved) {
minus_line->flags |= DIFF_SYMBOL_MOVED_LINE;
plus_line->flags |= DIFF_SYMBOL_MOVED_LINE;
}
- strbuf_addf(&suffix,
- " # aligned from %d to %d, %s, W=%d L=%d S=%d",
- minus_lineno, plus_lineno,
- changed ? "edited" : "unchanged",
- window_score, line_score, pair_score);
- word_diff_align_debug_append_comment(minus_line, &suffix);
- word_diff_align_debug_append_comment(plus_line, &suffix);
- strbuf_release(&suffix);
+ minus_original.line = xmemdupz(minus_line->line, minus_line->len);
+ plus_original.line = xmemdupz(plus_line->line, plus_line->len);
+ if (!changed)
+ goto comment;
+
+ word_diff_align_debug_collect_tokens(minus_line, &minus_tokens,
+ &minus_tokens_nr,
+ &minus_tokens_alloc);
+ word_diff_align_debug_collect_tokens(plus_line, &plus_tokens,
+ &plus_tokens_nr,
+ &plus_tokens_alloc);
+ if (!minus_tokens_nr || !plus_tokens_nr)
+ goto comment;
+
+ columns = plus_tokens_nr + 1;
+ CALLOC_ARRAY(lcs, (minus_tokens_nr + 1) * columns);
+ ALLOC_ARRAY(minus_match_to, minus_tokens_nr);
+ ALLOC_ARRAY(plus_match_to, plus_tokens_nr);
+ for (i = 0; i < minus_tokens_nr; i++)
+ minus_match_to[i] = -1;
+ for (j = 0; j < plus_tokens_nr; j++)
+ plus_match_to[j] = -1;
+
+ for (i = 1; i <= minus_tokens_nr; i++) {
+ for (j = 1; j <= plus_tokens_nr; j++) {
+ if (word_diff_align_debug_token_eq(minus_line,
+ &minus_tokens[i - 1],
+ plus_line,
+ &plus_tokens[j - 1]))
+ lcs[i * columns + j] =
+ lcs[(i - 1) * columns + j - 1] + 1;
+ else if (lcs[(i - 1) * columns + j] >
+ lcs[i * columns + j - 1])
+ lcs[i * columns + j] = lcs[(i - 1) * columns + j];
+ else
+ lcs[i * columns + j] = lcs[i * columns + j - 1];
+ }
+ }
+
+ i = minus_tokens_nr;
+ j = plus_tokens_nr;
+ while (i > 0 && j > 0) {
+ if (lcs[i * columns + j] == lcs[i * columns + j - 1]) {
+ j--;
+ } else if (lcs[i * columns + j] ==
+ lcs[(i - 1) * columns + j]) {
+ i--;
+ } else if (word_diff_align_debug_token_eq(minus_line,
+ &minus_tokens[i - 1],
+ plus_line,
+ &plus_tokens[j - 1])) {
+ minus_match_to[i - 1] = j - 1;
+ plus_match_to[j - 1] = i - 1;
+ i--;
+ j--;
+ } else {
+ BUG("word-diff-align display LCS backtrack failed");
+ }
+ }
+
+ word_diff_align_debug_rewrite_line(minus_line, minus_tokens,
+ minus_tokens_nr, minus_match_to,
+ &plus_original, plus_tokens,
+ "[-", "-]");
+ word_diff_align_debug_rewrite_line(plus_line, plus_tokens,
+ plus_tokens_nr, plus_match_to,
+ &minus_original, minus_tokens,
+ "{+", "+}");
+
+comment:
+ {
+ struct strbuf suffix = STRBUF_INIT;
+
+ strbuf_addf(&suffix,
+ " # aligned from %d to %d, %s, W=%d L=%d S=%d",
+ minus_lineno, plus_lineno,
+ changed ? "edited" : "unchanged",
+ window_score, line_score, pair_score);
+ word_diff_align_debug_append_comment(minus_line, &suffix);
+ word_diff_align_debug_append_comment(plus_line, &suffix);
+ strbuf_release(&suffix);
+ }
+
+ free((char *)minus_original.line);
+ free((char *)plus_original.line);
+ free(minus_tokens);
+ free(plus_tokens);
+ free(minus_match_to);
+ free(plus_match_to);
+ free(lcs);
}
static void word_diff_align_add_item(struct word_diff_align_item **items,
--
2.39.3 (Apple Git-146)
next prev parent reply other threads:[~2026-05-27 4:24 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-27 4:23 [RFC PATCH 0/3] diff: pair edited lines inside moved blocks Keita Oda
2026-05-27 4:24 ` [RFC PATCH 1/3] diff: add word-diff-align line pairing Keita Oda
2026-05-27 4:24 ` Keita Oda [this message]
2026-05-27 4:24 ` [RFC PATCH 3/3] t4034: cover moved-and-edited word diff alignment Keita Oda
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260527042402.13607-3-ainsophyao@gmail.com \
--to=ainsophyao@gmail$(echo .)com \
--cc=git@vger$(echo .)kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox