public inbox for git@vger.kernel.org 
 help / color / mirror / Atom feed
From: "Kristofer Karlsson via GitGitGadget" <gitgitgadget@gmail•com>
To: git@vger•kernel.org
Cc: Kristofer Karlsson <krka@spotify•com>,
	Kristofer Karlsson <krka@spotify•com>
Subject: [PATCH 3/3] revision: use priority queue for non-limited streaming walks
Date: Wed, 27 May 2026 15:50:02 +0000	[thread overview]
Message-ID: <c0e78707f1e0fa97e96bcd6522648699f593590a.1779897003.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.2127.git.1779897003.gitgitgadget@gmail.com>

From: Kristofer Karlsson <krka@spotify•com>

The streaming (non-limited) walk in get_revision_1() inserts newly
discovered parent commits into a date-sorted queue via
commit_list_insert_by_date(), which scans the linked list to find the
insertion point -- O(w) per insert, where w is the width of the active
walk frontier.  Replace this with an O(log w) priority queue.

Add a commit_queue field to rev_info alongside the existing commits
linked list.  The two representations are mutually exclusive: setup
and external callers that need list access use the linked list, then
get_revision_1() lazily drains it into the priority queue on first
call.  Add a REV_WALK_NO_WALK enum value to distinguish the no_walk
case (which still uses the commit list) from the streaming case.

The conversion function rev_info_commit_list_to_queue() is public so
callers that know they will iterate can convert early.

Combined with the limit_list() priority queue change already in
master, this eliminates all O(w) sorted linked-list insertion from
the revision walk machinery.

Signed-off-by: Kristofer Karlsson <krka@spotify•com>
---
 commit.c   | 13 -------------
 commit.h   |  2 --
 revision.c | 55 +++++++++++++++++++++++++++++-------------------------
 revision.h | 12 +++++++++++-
 4 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/commit.c b/commit.c
index e3e7352e69..5112c7b2af 100644
--- a/commit.c
+++ b/commit.c
@@ -729,19 +729,6 @@ void commit_list_free(struct commit_list *list)
 		pop_commit(&list);
 }
 
-struct commit_list * commit_list_insert_by_date(struct commit *item, struct commit_list **list)
-{
-	struct commit_list **pp = list;
-	struct commit_list *p;
-	while ((p = *pp) != NULL) {
-		if (p->item->date < item->date) {
-			break;
-		}
-		pp = &p->next;
-	}
-	return commit_list_insert(item, pp);
-}
-
 static int commit_list_compare_by_date(const struct commit_list *a,
 				       const struct commit_list *b)
 {
diff --git a/commit.h b/commit.h
index 58150045af..385492fbb1 100644
--- a/commit.h
+++ b/commit.h
@@ -191,8 +191,6 @@ int commit_list_contains(struct commit *item,
 struct commit_list **commit_list_append(struct commit *commit,
 					struct commit_list **next);
 unsigned commit_list_count(const struct commit_list *l);
-struct commit_list *commit_list_insert_by_date(struct commit *item,
-				    struct commit_list **list);
 void commit_list_sort_by_date(struct commit_list **list);
 
 /* Shallow copy of the input list */
diff --git a/revision.c b/revision.c
index 9d0fc696d0..4bb3b16e43 100644
--- a/revision.c
+++ b/revision.c
@@ -1116,7 +1116,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
 }
 
 static int process_parents(struct rev_info *revs, struct commit *commit,
-			   struct commit_list **list, struct prio_queue *queue)
+			   struct prio_queue *queue)
 {
 	struct commit_list *parent = commit->parents;
 	unsigned pass_flags;
@@ -1158,8 +1158,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
 			if (p->object.flags & SEEN)
 				continue;
 			p->object.flags |= (SEEN | NOT_USER_GIVEN);
-			if (list)
-				commit_list_insert_by_date(p, list);
 			if (queue)
 				prio_queue_put(queue, p);
 			if (revs->exclude_first_parent_only)
@@ -1207,8 +1205,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
 		p->object.flags |= pass_flags | CHILD_VISITED;
 		if (!(p->object.flags & SEEN)) {
 			p->object.flags |= (SEEN | NOT_USER_GIVEN);
-			if (list)
-				commit_list_insert_by_date(p, list);
 			if (queue)
 				prio_queue_put(queue, p);
 		}
@@ -1470,7 +1466,7 @@ static int limit_list(struct rev_info *revs)
 
 		if (revs->max_age != -1 && (commit->date < revs->max_age))
 			obj->flags |= UNINTERESTING;
-		if (process_parents(revs, commit, NULL, &queue) < 0) {
+		if (process_parents(revs, commit, &queue) < 0) {
 			clear_prio_queue(&queue);
 			return -1;
 		}
@@ -3257,6 +3253,7 @@ static void free_void_commit_list(void *list)
 void release_revisions(struct rev_info *revs)
 {
 	commit_list_free(revs->commits);
+	clear_prio_queue(&revs->commit_queue);
 	commit_list_free(revs->ancestry_path_bottoms);
 	release_display_notes(&revs->notes_opt);
 	object_array_clear(&revs->pending);
@@ -3726,7 +3723,7 @@ static void explore_walk_step(struct rev_info *revs)
 	if (revs->max_age != -1 && (c->date < revs->max_age))
 		c->object.flags |= UNINTERESTING;
 
-	if (process_parents(revs, c, NULL, NULL) < 0)
+	if (process_parents(revs, c, NULL) < 0)
 		return;
 
 	if (c->object.flags & UNINTERESTING)
@@ -3902,7 +3899,7 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
 {
 	struct commit_list *p;
 	struct topo_walk_info *info = revs->topo_walk_info;
-	if (process_parents(revs, commit, NULL, NULL) < 0) {
+	if (process_parents(revs, commit, NULL) < 0) {
 		if (!revs->ignore_missing_links)
 			die("Failed to traverse parents of commit %s",
 			    oid_to_hex(&commit->object.oid));
@@ -3938,6 +3935,13 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
 	}
 }
 
+void rev_info_commit_list_to_queue(struct rev_info *revs)
+{
+	while (revs->commits)
+		prio_queue_put(&revs->commit_queue, pop_commit(&revs->commits));
+}
+
+
 int prepare_revision_walk(struct rev_info *revs)
 {
 	int i;
@@ -4006,7 +4010,7 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs,
 	for (;;) {
 		struct commit *p = *pp;
 		if (!revs->limited)
-			if (process_parents(revs, p, NULL, queue) < 0)
+			if (process_parents(revs, p, queue) < 0)
 				return rewrite_one_error;
 		if (p->object.flags & UNINTERESTING)
 			return rewrite_one_ok;
@@ -4020,27 +4024,18 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs,
 	}
 }
 
-static void merge_queue_into_list(struct prio_queue *q, struct commit_list **list)
+static void merge_queue_into_prio_queue(struct prio_queue *from,
+					struct prio_queue *to)
 {
-	while (q->nr) {
-		struct commit *item = prio_queue_peek(q);
-		struct commit_list *p = *list;
-
-		if (p && p->item->date >= item->date)
-			list = &p->next;
-		else {
-			p = commit_list_insert(item, list);
-			list = &p->next; /* skip newly added item */
-			prio_queue_get(q); /* pop item */
-		}
-	}
+	while (from->nr)
+		prio_queue_put(to, prio_queue_get(from));
 }
 
 static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp)
 {
 	struct prio_queue queue = { compare_commits_by_commit_date };
 	enum rewrite_result ret = rewrite_one_1(revs, pp, &queue);
-	merge_queue_into_list(&queue, &revs->commits);
+	merge_queue_into_prio_queue(&queue, &revs->commit_queue);
 	clear_prio_queue(&queue);
 	return ret;
 }
@@ -4331,6 +4326,7 @@ enum rev_walk_mode {
 	REV_WALK_REFLOG,
 	REV_WALK_TOPO,
 	REV_WALK_LIMITED,
+	REV_WALK_NO_WALK,
 	REV_WALK_STREAMING,
 };
 
@@ -4342,6 +4338,8 @@ static enum rev_walk_mode get_walk_mode(struct rev_info *revs)
 		return REV_WALK_TOPO;
 	if (revs->limited)
 		return REV_WALK_LIMITED;
+	if (revs->no_walk)
+		return REV_WALK_NO_WALK;
 	return REV_WALK_STREAMING;
 }
 
@@ -4349,6 +4347,9 @@ static struct commit *get_revision_1(struct rev_info *revs)
 {
 	enum rev_walk_mode mode = get_walk_mode(revs);
 
+	if (mode == REV_WALK_STREAMING && revs->commits)
+		rev_info_commit_list_to_queue(revs);
+
 	while (1) {
 		struct commit *commit;
 
@@ -4360,9 +4361,12 @@ static struct commit *get_revision_1(struct rev_info *revs)
 			commit = next_topo_commit(revs);
 			break;
 		case REV_WALK_LIMITED:
-		case REV_WALK_STREAMING:
+		case REV_WALK_NO_WALK:
 			commit = pop_commit(&revs->commits);
 			break;
+		case REV_WALK_STREAMING:
+			commit = prio_queue_get(&revs->commit_queue);
+			break;
 		}
 
 		if (!commit)
@@ -4390,12 +4394,13 @@ static struct commit *get_revision_1(struct rev_info *revs)
 			break;
 		case REV_WALK_STREAMING:
 			if (process_parents(revs, commit,
-					    &revs->commits, NULL) < 0) {
+					    &revs->commit_queue) < 0) {
 				if (!revs->ignore_missing_links)
 					die("Failed to traverse parents of commit %s",
 					    oid_to_hex(&commit->object.oid));
 			}
 			break;
+		case REV_WALK_NO_WALK:
 		case REV_WALK_LIMITED:
 			break;
 		}
diff --git a/revision.h b/revision.h
index 584f1338b5..04982a3d47 100644
--- a/revision.h
+++ b/revision.h
@@ -12,6 +12,7 @@
 #include "decorate.h"
 #include "ident.h"
 #include "list-objects-filter-options.h"
+#include "prio-queue.h"
 #include "strvec.h"
 
 /**
@@ -122,8 +123,14 @@ struct oidset;
 struct topo_walk_info;
 
 struct rev_info {
-	/* Starting list */
+	/*
+	 * Work queue of commits, stored as either a linked list or a
+	 * priority queue, but never both at the same time.
+	 * rev_info_commit_list_to_queue() converts list to queue.
+	 */
 	struct commit_list *commits;
+	struct prio_queue commit_queue;
+
 	struct object_array pending;
 	struct repository *repo;
 
@@ -400,6 +407,7 @@ struct rev_info {
  * uninitialized.
  */
 #define REV_INFO_INIT { \
+	.commit_queue = { .compare = compare_commits_by_commit_date }, \
 	.abbrev = DEFAULT_ABBREV, \
 	.simplify_history = 1, \
 	.pruning.flags.recursive = 1, \
@@ -478,6 +486,8 @@ void reset_revision_walk(void);
  */
 int prepare_revision_walk(struct rev_info *revs);
 
+/* Drain the commits linked list into the priority queue. */
+void rev_info_commit_list_to_queue(struct rev_info *revs);
 /**
  * Takes a pointer to a `rev_info` structure and iterates over it, returning a
  * `struct commit *` each time you call it. The end of the revision list is
-- 
gitgitgadget

      parent reply	other threads:[~2026-05-27 15:50 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-27 15:49 [PATCH 0/3] revision: use priority queue for streaming walks Kristofer Karlsson via GitGitGadget
2026-05-27 15:50 ` [PATCH 1/3] pack-objects: call release_revisions() after cruft traversal Kristofer Karlsson via GitGitGadget
2026-05-27 15:50 ` [PATCH 2/3] revision: introduce rev_walk_mode to clarify get_revision_1() Kristofer Karlsson via GitGitGadget
2026-05-27 15:50 ` Kristofer Karlsson via GitGitGadget [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=c0e78707f1e0fa97e96bcd6522648699f593590a.1779897003.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail$(echo .)com \
    --cc=git@vger$(echo .)kernel.org \
    --cc=krka@spotify$(echo .)com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox