public inbox for git@vger.kernel.org 
 help / color / mirror / Atom feed
* [PATCH] ws: add new tab-between-non-ws check
@ 2025-12-23 13:27 Adrian Ratiu
  2025-12-24  0:31 ` Junio C Hamano
  0 siblings, 1 reply; 3+ messages in thread
From: Adrian Ratiu @ 2025-12-23 13:27 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Patrick Steinhardt, Adrian Ratiu

This adds a new check to detect HT in the middle of sentences that
should have been a SP, as suggested by Junio in
https://public-inbox.org/git/xmqqy0mwsedz.fsf@gitster.g/

The check is a bit complex because we want to detect places where
a SP was intended and the naive before/after character check can
issue false positives in cases like "a\tb".

The new check is enabled for Documentation/**/*.adoc, where these
kinds of mistakes were seen in practice.

Suggested-by: Junio C Hamano <gitster@pobox•com>
Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora•com>
---
This is based on the latest master branch.
Pushed to GitHub: https://github.com/10ne1/git/tree/dev/aratiu/whitespace-new-test-v1
CI run: https://github.com/10ne1/git/actions/runs/20457905508
---
 .gitattributes             |  2 +-
 t/t4015-diff-whitespace.sh | 56 ++++++++++++++++++++++++++++++++++++++
 ws.c                       | 32 ++++++++++++++++++++++
 ws.h                       |  1 +
 4 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/.gitattributes b/.gitattributes
index 700743c3f5..d3c40a038b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -7,7 +7,7 @@
 *.py text eol=lf diff=python
 *.bat text eol=crlf
 CODE_OF_CONDUCT.md -whitespace
-/Documentation/**/*.adoc text eol=lf whitespace=trail,space,incomplete
+/Documentation/**/*.adoc text eol=lf whitespace=trail,space,incomplete,tab-between-non-ws
 /command-list.txt text eol=lf
 /GIT-VERSION-GEN text eol=lf
 /mergetools/* text eol=lf
diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh
index 3c8eb02e4f..afe95f5209 100755
--- a/t/t4015-diff-whitespace.sh
+++ b/t/t4015-diff-whitespace.sh
@@ -2440,4 +2440,60 @@ test_expect_success 'combine --ignore-blank-lines with --function-context 2' '
 	test_cmp expect actual
 '
 
+test_expect_success 'check tab between non-whitespace (tab-between-non-ws: off)' '
+	git config core.whitespace "-tab-between-non-ws" &&
+	printf "1234567\tb" >x &&
+	git add x &&
+	git diff --cached --check
+'
+
+test_expect_success 'check tab between non-whitespace at tab stop (tab-between-non-ws: on)' '
+	git config core.whitespace "tab-between-non-ws,tabwidth=8" &&
+	printf "1234567\tb" >x &&
+	git add x &&
+	test_must_fail git diff --cached --check
+'
+
+test_expect_success 'check tab between non-whitespace not at tab stop (tab-between-non-ws: on)' '
+	git config core.whitespace "tab-between-non-ws,tabwidth=8" &&
+	printf "a\tb" >x &&
+	git add x &&
+	git diff --cached --check
+'
+
+test_expect_success 'check tab between non-whitespace with tabwidth=4 (tab-between-non-ws: on)' '
+	git config core.whitespace "tab-between-non-ws,tabwidth=4" &&
+	printf "123\tb" >x &&
+	git add x &&
+	test_must_fail git diff --cached --check
+'
+
+test_expect_success 'check tab between non-whitespace with tabwidth=4 (tab-between-non-ws: on)' '
+	git config core.whitespace "tab-between-non-ws,tabwidth=4" &&
+	printf "1234\tb" >x &&
+	git add x &&
+	git diff --cached --check
+'
+
+test_expect_success 'check multiple tabs with one error (tab-between-non-ws: on)' '
+	git config core.whitespace "tab-between-non-ws,tabwidth=8" &&
+	printf "a\t1234567\tb" >x &&
+	git add x &&
+	test_must_fail git diff --cached --check
+'
+
+test_expect_success 'check tab at beginning of line (tab-between-non-ws: on)' '
+	git config core.whitespace "tab-between-non-ws,tabwidth=8" &&
+	printf "\ta" >x &&
+	git add x &&
+	git diff --cached --check
+'
+
+test_expect_success 'check tab at end of line(tab-between-non-ws: on)' '
+	git config core.whitespace "tab-between-non-ws,-trailing-space,tabwidth=8" &&
+	printf "a\t" >x &&
+	git add x &&
+	git diff --cached --check
+'
+
 test_done
diff --git a/ws.c b/ws.c
index 6cc2466c0c..fcd81250ad 100644
--- a/ws.c
+++ b/ws.c
@@ -26,6 +26,7 @@ static struct whitespace_rule {
 	{ "blank-at-eol", WS_BLANK_AT_EOL, 0 },
 	{ "blank-at-eof", WS_BLANK_AT_EOF, 0 },
 	{ "tab-in-indent", WS_TAB_IN_INDENT, 0, 1 },
+	{ "tab-between-non-ws", WS_TAB_BETWEEN_NON_WS, 0 },
 	{ "incomplete-line", WS_INCOMPLETE_LINE, 0, 0 },
 };
 
@@ -140,6 +141,11 @@ char *whitespace_error_string(unsigned ws)
 			strbuf_addstr(&err, ", ");
 		strbuf_addstr(&err, "tab in indent");
 	}
+	if (ws & WS_TAB_BETWEEN_NON_WS) {
+		if (err.len)
+			strbuf_addstr(&err, ", ");
+		strbuf_addstr(&err, "tab between non-whitespace characters");
+	}
 	if (ws & WS_INCOMPLETE_LINE) {
 		if (err.len)
 			strbuf_addstr(&err, ", ");
@@ -228,6 +234,32 @@ static unsigned ws_check_emit_1(const char *line, int len, unsigned ws_rule,
 		written = i;
 	}
 
+	if (ws_rule & WS_TAB_BETWEEN_NON_WS) {
+		/*
+		 * A tab surrounded by non-whitespace characters is a typo candidate
+		 * (a space might have been intended). This checks for a tab that
+		 * would be expanded to a single space, which is when it appears at
+		 * a column that is one less than a multiple of the tabwidth.
+		 */
+		int col = 0;
+		int tabwidth = ws_tab_width(ws_rule);
+
+		if (!tabwidth)
+			BUG("a known tabwidth is required by WS_TAB_BETWEEN_NON_WS");
+
+		for (i = 0; i < len; i++) {
+			if (line[i] == '\t') {
+				if (i > 0 && i < len - 1 &&
+				    !isspace(line[i - 1]) && !isspace(line[i + 1]) &&
+				    (col % tabwidth) == (tabwidth - 1))
+					result |= WS_TAB_BETWEEN_NON_WS;
+				col += tabwidth - (col % tabwidth);
+			} else {
+				col++;
+			}
+		}
+	}
+
 	if (stream) {
 		/*
 		 * Now the rest of the line starts at "written".
diff --git a/ws.h b/ws.h
index 06d5cb73f8..35475fd320 100644
--- a/ws.h
+++ b/ws.h
@@ -16,6 +16,7 @@ struct strbuf;
 #define WS_BLANK_AT_EOF         (1<<10)
 #define WS_TAB_IN_INDENT        (1<<11)
 #define WS_INCOMPLETE_LINE      (1<<12)
+#define WS_TAB_BETWEEN_NON_WS   (1<<13)
 
 #define WS_TRAILING_SPACE       (WS_BLANK_AT_EOL|WS_BLANK_AT_EOF)
 #define WS_DEFAULT_RULE (WS_TRAILING_SPACE|WS_SPACE_BEFORE_TAB|8)
-- 
2.51.2


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-01-05 21:00 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-23 13:27 [PATCH] ws: add new tab-between-non-ws check Adrian Ratiu
2025-12-24  0:31 ` Junio C Hamano
2026-01-05 20:59   ` Adrian Ratiu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox