* [PATCH v2 1/4] xdiff: refactor xdl_hash_record()
@ 2025-09-08 18:49 Alexander Monakov
2025-09-08 18:49 ` [PATCH v2 2/4] xdiff: annotate unlikely branch Alexander Monakov
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Alexander Monakov @ 2025-09-08 18:49 UTC (permalink / raw)
To: git; +Cc: Alexander Monakov, Phillip Wood
From: Phillip Wood <phillip.wood@dunelm•org.uk>
Inline the check for whitespace flags so that the compiler can hoist
it out of the loop in xdl_prepare_ctx(). This improves the performance
by 8%.
$ hyperfine --warmup=1 -L rev HEAD,HEAD^ --setup='git checkout {rev} -- :/ && make git' ': {rev}; GIT_CONFIG_GLOBAL=/dev/null ./git log --oneline --shortstat v2.0.0..v2.5.0'
Benchmark 1: : HEAD; GIT_CONFIG_GLOBAL=/dev/null ./git log --oneline --shortstat v2.0.0..v2.5.0
Time (mean ± σ): 1.670 s ± 0.044 s [User: 1.473 s, System: 0.196 s]
Range (min … max): 1.619 s … 1.754 s 10 runs
Benchmark 2: : HEAD^; GIT_CONFIG_GLOBAL=/dev/null ./git log --oneline --shortstat v2.0.0..v2.5.0
Time (mean ± σ): 1.801 s ± 0.021 s [User: 1.605 s, System: 0.192 s]
Range (min … max): 1.766 s … 1.831 s 10 runs
Summary
': HEAD^; GIT_CONFIG_GLOBAL=/dev/null ./git log --oneline --shortstat v2.0.0..v2.5.0' ran
1.08 ± 0.03 times faster than ': HEAD^^; GIT_CONFIG_GLOBAL=/dev/null ./git log --oneline --shortstat v2.0.0..v2.5.0'
Signed-off-by: Phillip Wood <phillip.wood@dunelm•org.uk>
---
xdiff/xutils.c | 7 ++-----
xdiff/xutils.h | 10 +++++++++-
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 444a108f87..e070ed649f 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -249,7 +249,7 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
return 1;
}
-static unsigned long xdl_hash_record_with_whitespace(char const **data,
+unsigned long xdl_hash_record_with_whitespace(char const **data,
char const *top, long flags) {
unsigned long ha = 5381;
char const *ptr = *data;
@@ -294,13 +294,10 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
return ha;
}
-unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
+unsigned long xdl_hash_record_verbatim(char const **data, char const *top) {
unsigned long ha = 5381;
char const *ptr = *data;
- if (flags & XDF_WHITESPACE_FLAGS)
- return xdl_hash_record_with_whitespace(data, top, flags);
-
for (; ptr < top && *ptr != '\n'; ptr++) {
ha += (ha << 5);
ha ^= (unsigned long) *ptr;
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
index fd0bba94e8..13f6831047 100644
--- a/xdiff/xutils.h
+++ b/xdiff/xutils.h
@@ -34,7 +34,15 @@ void *xdl_cha_alloc(chastore_t *cha);
long xdl_guess_lines(mmfile_t *mf, long sample);
int xdl_blankline(const char *line, long size, long flags);
int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
-unsigned long xdl_hash_record(char const **data, char const *top, long flags);
+unsigned long xdl_hash_record_verbatim(char const **data, char const *top);
+unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags);
+static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags)
+{
+ if (flags & XDF_WHITESPACE_FLAGS)
+ return xdl_hash_record_with_whitespace(data, top, flags);
+ else
+ return xdl_hash_record_verbatim(data, top);
+}
unsigned int xdl_hashbits(unsigned int size);
int xdl_num_out(char *out, long val);
int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
--
2.49.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v2 2/4] xdiff: annotate unlikely branch
2025-09-08 18:49 [PATCH v2 1/4] xdiff: refactor xdl_hash_record() Alexander Monakov
@ 2025-09-08 18:49 ` Alexander Monakov
2025-09-08 18:49 ` [PATCH v2 3/4] xdiff: move hashing functions to a separate header Alexander Monakov
2025-09-08 18:49 ` [PATCH v2 4/4] xdiff: use a faster hash in xdl_hash_record_verbatim Alexander Monakov
2 siblings, 0 replies; 4+ messages in thread
From: Alexander Monakov @ 2025-09-08 18:49 UTC (permalink / raw)
To: git; +Cc: Alexander Monakov
XDL_ALLOC_GROW is used in a hot loop in xdl_prepare_ctx. Inform the
compiler that branching to the reallocation helper happens rarely
to improve code layout.
Signed-off-by: Alexander Monakov <amonakov@ispras•ru>
---
xdiff/xmacros.h | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h
index 8487bb396f..d892e022d4 100644
--- a/xdiff/xmacros.h
+++ b/xdiff/xmacros.h
@@ -23,8 +23,11 @@
#if !defined(XMACROS_H)
#define XMACROS_H
-
-
+#if __GNUC__ >= 3
+#define XDL_LIKELY(e) __builtin_expect((e), 1)
+#else
+#define XDL_LIKELY(e) (e)
+#endif
#define XDL_MIN(a, b) ((a) < (b) ? (a): (b))
#define XDL_MAX(a, b) ((a) > (b) ? (a): (b))
@@ -64,8 +67,8 @@ do { \
* elements) as necessary. Frees p and returns -1 on failure, returns
* 0 on success
*/
-#define XDL_ALLOC_GROW(p, nr, alloc) \
- (-!((nr) <= (alloc) || \
+#define XDL_ALLOC_GROW(p, nr, alloc) \
+ (-!(XDL_LIKELY((nr) <= (alloc)) || \
((p) = xdl_alloc_grow_helper((p), (nr), &(alloc), sizeof(*(p))))))
#endif /* #if !defined(XMACROS_H) */
--
2.49.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v2 3/4] xdiff: move hashing functions to a separate header
2025-09-08 18:49 [PATCH v2 1/4] xdiff: refactor xdl_hash_record() Alexander Monakov
2025-09-08 18:49 ` [PATCH v2 2/4] xdiff: annotate unlikely branch Alexander Monakov
@ 2025-09-08 18:49 ` Alexander Monakov
2025-09-08 18:49 ` [PATCH v2 4/4] xdiff: use a faster hash in xdl_hash_record_verbatim Alexander Monakov
2 siblings, 0 replies; 4+ messages in thread
From: Alexander Monakov @ 2025-09-08 18:49 UTC (permalink / raw)
To: git; +Cc: Alexander Monakov
Move xdl_hash_record to a separate header file, and expose
xdl_hash_record_verbatim as an inline function to avoid call overhead
for short strings.
Signed-off-by: Alexander Monakov <amonakov@ispras•ru>
---
xdiff-interface.c | 1 +
xdiff/xhash.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++
xdiff/xinclude.h | 1 +
xdiff/xutils.c | 13 ------------
xdiff/xutils.h | 9 --------
5 files changed, 54 insertions(+), 22 deletions(-)
create mode 100644 xdiff/xhash.h
diff --git a/xdiff-interface.c b/xdiff-interface.c
index 4971f722b3..e21a7aa0c9 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -12,6 +12,7 @@
#include "xdiff/xtypes.h"
#include "xdiff/xdiffi.h"
#include "xdiff/xutils.h"
+#include "xdiff/xhash.h"
struct xdiff_emit_state {
xdiff_emit_hunk_fn hunk_fn;
diff --git a/xdiff/xhash.h b/xdiff/xhash.h
new file mode 100644
index 0000000000..27da4288c8
--- /dev/null
+++ b/xdiff/xhash.h
@@ -0,0 +1,52 @@
+/*
+ * LibXDiff by Davide Libenzi ( File Differential Library )
+ * Copyright (C) 2003 Davide Libenzi
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Davide Libenzi <davidel@xmailserver•org>
+ *
+ */
+
+#if !defined(XHASH_H)
+#define XHASH_H
+
+
+
+unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags);
+
+static inline unsigned long xdl_hash_record_verbatim(char const **data, char const *top)
+{
+ unsigned long ha = 5381;
+ char const *ptr = *data;
+
+ for (; ptr < top && *ptr != '\n'; ptr++) {
+ ha += (ha << 5);
+ ha ^= (unsigned long) *ptr;
+ }
+ *data = ptr < top ? ptr + 1: ptr;
+
+ return ha;
+}
+
+static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags)
+{
+ if (flags & XDF_WHITESPACE_FLAGS)
+ return xdl_hash_record_with_whitespace(data, top, flags);
+ else
+ return xdl_hash_record_verbatim(data, top);
+}
+
+#endif /* #if !defined(XHASH_H) */
diff --git a/xdiff/xinclude.h b/xdiff/xinclude.h
index a4285ac0eb..68b2d9f1f1 100644
--- a/xdiff/xinclude.h
+++ b/xdiff/xinclude.h
@@ -31,6 +31,7 @@
#include "xprepare.h"
#include "xdiffi.h"
#include "xemit.h"
+#include "xhash.h"
#endif /* #if !defined(XINCLUDE_H) */
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index e070ed649f..0fff5b26a0 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -294,19 +294,6 @@ unsigned long xdl_hash_record_with_whitespace(char const **data,
return ha;
}
-unsigned long xdl_hash_record_verbatim(char const **data, char const *top) {
- unsigned long ha = 5381;
- char const *ptr = *data;
-
- for (; ptr < top && *ptr != '\n'; ptr++) {
- ha += (ha << 5);
- ha ^= (unsigned long) *ptr;
- }
- *data = ptr < top ? ptr + 1: ptr;
-
- return ha;
-}
-
unsigned int xdl_hashbits(unsigned int size) {
unsigned int val = 1, bits = 0;
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
index 13f6831047..f51336fce1 100644
--- a/xdiff/xutils.h
+++ b/xdiff/xutils.h
@@ -34,15 +34,6 @@ void *xdl_cha_alloc(chastore_t *cha);
long xdl_guess_lines(mmfile_t *mf, long sample);
int xdl_blankline(const char *line, long size, long flags);
int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
-unsigned long xdl_hash_record_verbatim(char const **data, char const *top);
-unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags);
-static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags)
-{
- if (flags & XDF_WHITESPACE_FLAGS)
- return xdl_hash_record_with_whitespace(data, top, flags);
- else
- return xdl_hash_record_verbatim(data, top);
-}
unsigned int xdl_hashbits(unsigned int size);
int xdl_num_out(char *out, long val);
int xdl_emit_hunk_hdr(long s1, long c1, long s2, long c2,
--
2.49.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v2 4/4] xdiff: use a faster hash in xdl_hash_record_verbatim
2025-09-08 18:49 [PATCH v2 1/4] xdiff: refactor xdl_hash_record() Alexander Monakov
2025-09-08 18:49 ` [PATCH v2 2/4] xdiff: annotate unlikely branch Alexander Monakov
2025-09-08 18:49 ` [PATCH v2 3/4] xdiff: move hashing functions to a separate header Alexander Monakov
@ 2025-09-08 18:49 ` Alexander Monakov
2 siblings, 0 replies; 4+ messages in thread
From: Alexander Monakov @ 2025-09-08 18:49 UTC (permalink / raw)
To: git; +Cc: Alexander Monakov
Reimplement xdl_hash_record_verbatim such that it computes two djb2
hashes for even and odd characters independently, each at latency two
per character. The new loop is expected to be issue and execution
port throughput bound at three cycles per a pair of characters. The
more efficient ways to evaluate such hashes involve use of
multiplication and higher interleaving factors. The scheme used here
is expected to be a reasonable compromise for lines of source code
(i.e. not very long strings).
Due to interleaving, the new function does not produce the same hash
values as the original. Given values H0 for even characters and H1
for odd characters, we combine them to produce the final hash value
as 'H1 * 257 + H0'. The factor 257 is chosen to approximately match
the original in quality (collision count in xdiff hashtable).
Signed-off-by: Alexander Monakov <amonakov@ispras•ru>
---
xdiff/xhash.h | 62 +++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 55 insertions(+), 7 deletions(-)
diff --git a/xdiff/xhash.h b/xdiff/xhash.h
index 27da4288c8..1e9e13cc45 100644
--- a/xdiff/xhash.h
+++ b/xdiff/xhash.h
@@ -29,16 +29,64 @@ unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top
static inline unsigned long xdl_hash_record_verbatim(char const **data, char const *top)
{
- unsigned long ha = 5381;
char const *ptr = *data;
-
- for (; ptr < top && *ptr != '\n'; ptr++) {
- ha += (ha << 5);
- ha ^= (unsigned long) *ptr;
+#if 0
+ /*
+ * djb2 hash (below) is latency-bound; on x86, this baseline form cannot
+ * run faster than two cycles per iteration, and to achieve that it is
+ * neccessary to arrange for 'ha * 32' to be computed in parallel with
+ * 'ha + ch'. To avoid being latency-bound, we run two independent djb2
+ * hashes over even and odd characters, then combine them in the end.
+ * The resulting hash is not equivalent to the original djb2.
+ */
+ unsigned long ha = 5381, ch;
+ while (ptr < top) {
+ if ((ch = *ptr++) == '\n')
+ break;
+ ha = ha * 33 + ch;
}
- *data = ptr < top ? ptr + 1: ptr;
-
+ *data = ptr;
return ha;
+#else
+#ifdef __GNUC__
+/*
+ * Compiler reassociation barrier: pretend to modify X and Y to disallow
+ * changing evaluation order with respect to following uses of X or Y.
+ */
+#define REASSOC_FENCE(x, y) __asm__("" : "+r"(x), "+r"(y))
+#else
+#define REASSOC_FENCE(x, y)
+#endif
+ unsigned long h0 = 5381, h1 = 0, ch;
+ /* Process two characters per iteration. */
+ if (top - ptr >= 2) do {
+ if ((ch = *ptr++) == '\n') {
+ *data = ptr;
+ h0 += h1;
+ REASSOC_FENCE(h0, h1);
+ return h1 * 256 + h0;
+ }
+ ch += h0;
+ REASSOC_FENCE(ch, h0);
+ h0 = h0 * 32 + ch;
+
+ if ((ch = *ptr++) == '\n') {
+ *data = ptr;
+ h0 += h1;
+ REASSOC_FENCE(h0, h1);
+ return h1 * 256 + h0;
+ }
+ ch += h1;
+ REASSOC_FENCE(ch, h1);
+ h1 = h1 * 32 + ch;
+
+ } while (ptr < top - 1);
+ *data = top;
+ if (ptr < top && (ch = *ptr++) != '\n')
+ h0 = h0 * 33 + ch;
+ return h1 * 257 + h0;
+#undef REASSOC_FENCE
+#endif
}
static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags)
--
2.49.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-09-08 18:49 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-08 18:49 [PATCH v2 1/4] xdiff: refactor xdl_hash_record() Alexander Monakov
2025-09-08 18:49 ` [PATCH v2 2/4] xdiff: annotate unlikely branch Alexander Monakov
2025-09-08 18:49 ` [PATCH v2 3/4] xdiff: move hashing functions to a separate header Alexander Monakov
2025-09-08 18:49 ` [PATCH v2 4/4] xdiff: use a faster hash in xdl_hash_record_verbatim Alexander Monakov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox