From: Justin Tobler <jltobler@gmail•com>
To: git@vger•kernel.org
Cc: ps@pks•im, gitster@pobox•com, Justin Tobler <jltobler@gmail•com>
Subject: [PATCH v2 0/4] bulk-checkin: remove global transaction state
Date: Thu, 21 Aug 2025 18:22:45 -0500 [thread overview]
Message-ID: <20250821232249.319427-1-jltobler@gmail.com> (raw)
In-Reply-To: <20250820225531.1212935-1-jltobler@gmail.com>
Greetings,
The bulk-checkin subsystem provides an interface to write objects to the
object database in a bulk transaction. The state of an ongoing
transaction is stored across several global variables. This series aims
to remove this global transaction state in favor of storing state in in
`struct object_database`. This is done in preparation for a follow-up
change where the goal is to eventually move these transaction interfaces
into "odb.h".
Changes since V1:
- `index_blob_bulk_checkin()` now assumes that the caller always
provides a setup `struct odb_transaction`. Callers are adjusted to
ensure this.
- Functions in bulk-checkin.c now consistently access the repository
through the provided `odb_transaction`.
Thanks,
-Justin
Justin Tobler (4):
bulk-checkin: introduce object database transaction structure
bulk-checkin: remove global transaction state
bulk-checkin: require transaction for index_blob_bulk_checkin()
bulk-checkin: use repository variable from transaction
builtin/add.c | 5 +-
builtin/unpack-objects.c | 5 +-
builtin/update-index.c | 7 +-
bulk-checkin.c | 153 +++++++++++++++++++++------------------
bulk-checkin.h | 25 ++++---
cache-tree.c | 5 +-
object-file.c | 30 +++++---
odb.h | 8 ++
read-cache.c | 5 +-
9 files changed, 142 insertions(+), 101 deletions(-)
Range-diff against v1:
1: 5c9358e0b03 = 1: 5c9358e0b03 bulk-checkin: introduce object database transaction structure
2: 4a1b80a6baf = 2: 4a1b80a6baf bulk-checkin: remove global transaction state
-: ----------- > 3: ce329932fdd bulk-checkin: require transaction for index_blob_bulk_checkin()
3: 2ca78c8d343 ! 4: 08e26647915 bulk-checkin: wire repository variable
@@ Metadata
Author: Justin Tobler <jltobler@gmail•com>
## Commit message ##
- bulk-checkin: wire repository variable
+ bulk-checkin: use repository variable from transaction
The bulk-checkin subsystem depends on `the_repository`. Adapt functions
- and call sites to wire the repository variable where needed. The
- `USE_THE_REPOSITORY_VARIBALE` is still required as the
+ and call sites to access the repository through `struct odb_transaction`
+ instead. The `USE_THE_REPOSITORY_VARIBALE` is still required as the
`pack_compression_level` and `pack_size_limit_cfg` globals are still
used.
@@ bulk-checkin.c: struct odb_transaction {
};
-static void finish_tmp_packfile(struct strbuf *basename,
-+static void finish_tmp_packfile(struct repository *repo,
+- const char *pack_tmp_name,
+- struct pack_idx_entry **written_list,
+- uint32_t nr_written,
+- struct pack_idx_option *pack_idx_opts,
++static void finish_tmp_packfile(struct odb_transaction *transaction,
+ struct strbuf *basename,
- const char *pack_tmp_name,
- struct pack_idx_entry **written_list,
- uint32_t nr_written,
-@@ bulk-checkin.c: static void finish_tmp_packfile(struct strbuf *basename,
+ unsigned char hash[])
{
++ struct bulk_checkin_packfile *state = &transaction->packfile;
++ struct repository *repo = transaction->odb->repo;
char *idx_tmp_name = NULL;
- stage_tmp_packfiles(the_repository, basename, pack_tmp_name,
-+ stage_tmp_packfiles(repo, basename, pack_tmp_name,
- written_list, nr_written, NULL, pack_idx_opts, hash,
- &idx_tmp_name);
+- written_list, nr_written, NULL, pack_idx_opts, hash,
+- &idx_tmp_name);
- rename_tmp_packfile_idx(the_repository, basename, &idx_tmp_name);
++ stage_tmp_packfiles(repo, basename, state->pack_tmp_name,
++ state->written, state->nr_written, NULL,
++ &state->pack_idx_opts, hash, &idx_tmp_name);
+ rename_tmp_packfile_idx(repo, basename, &idx_tmp_name);
free(idx_tmp_name);
}
-static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
-+static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state,
-+ struct repository *repo)
++static void flush_bulk_checkin_packfile(struct odb_transaction *transaction)
{
++ struct bulk_checkin_packfile *state = &transaction->packfile;
++ struct repository *repo = transaction->odb->repo;
unsigned char hash[GIT_MAX_RAWSZ];
struct strbuf packname = STRBUF_INIT;
+
@@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
} else {
@@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_pack
- strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(the_repository),
- hash_to_hex(hash));
- finish_tmp_packfile(&packname, state->pack_tmp_name,
-+ strbuf_addf(&packname, "%s/pack/pack-%s.", repo_get_object_directory(repo),
+- state->written, state->nr_written,
+- &state->pack_idx_opts, hash);
++ strbuf_addf(&packname, "%s/pack/pack-%s.",
++ transaction->odb->sources->path,
+ hash_to_hex_algop(hash, repo->hash_algo));
-+ finish_tmp_packfile(repo, &packname, state->pack_tmp_name,
- state->written, state->nr_written,
- &state->pack_idx_opts, hash);
++
++ finish_tmp_packfile(transaction, &packname, hash);
for (uint32_t i = 0; i < state->nr_written; i++)
+ free(state->written[i]);
+
@@ bulk-checkin.c: static void flush_bulk_checkin_packfile(struct bulk_checkin_packfile *state)
strbuf_release(&packname);
@@ bulk-checkin.c: static void flush_batch_fsync(struct odb_transaction *transactio
*/
- strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", repo_get_object_directory(the_repository));
+ strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX",
-+ repo_get_object_directory(transaction->odb->repo));
++ transaction->odb->sources->path);
temp = xmks_tempfile(temp_path.buf);
fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
delete_tempfile(&temp);
@@ bulk-checkin.c: static void flush_batch_fsync(struct odb_transaction *transactio
}
-static int already_written(struct bulk_checkin_packfile *state, struct object_id *oid)
-+static int already_written(struct bulk_checkin_packfile *state,
-+ struct repository *repo, struct object_id *oid)
++static int already_written(struct odb_transaction *transaction,
++ struct object_id *oid)
{
/* The object may already exist in the repository */
- if (odb_has_object(the_repository->objects, oid,
-+ if (odb_has_object(repo->objects, oid,
++ if (odb_has_object(transaction->odb, oid,
HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
return 1;
+ /* Might want to keep the list sorted */
+- for (uint32_t i = 0; i < state->nr_written; i++)
+- if (oideq(&state->written[i]->oid, oid))
++ for (uint32_t i = 0; i < transaction->packfile.nr_written; i++)
++ if (oideq(&transaction->packfile.written[i]->oid, oid))
+ return 1;
+
+ /* This is a new object we need to keep */
@@ bulk-checkin.c: static int stream_blob_to_pack(struct bulk_checkin_packfile *state,
+ }
/* Lazily create backing packfile for the state */
- static void prepare_to_stream(struct bulk_checkin_packfile *state,
-+ struct repository *repo,
+-static void prepare_to_stream(struct bulk_checkin_packfile *state,
++static void prepare_to_stream(struct odb_transaction *transaction,
unsigned flags)
{
++ struct bulk_checkin_packfile *state = &transaction->packfile;
if (!(flags & INDEX_WRITE_OBJECT) || state->f)
return;
- state->f = create_tmp_packfile(the_repository, &state->pack_tmp_name);
-+ state->f = create_tmp_packfile(repo, &state->pack_tmp_name);
++ state->f = create_tmp_packfile(transaction->odb->repo,
++ &state->pack_tmp_name);
reset_pack_idx_option(&state->pack_idx_opts);
/* Pretend we are going to write only one object */
@@ bulk-checkin.c: static void prepare_to_stream(struct bulk_checkin_packfile *state,
+ die_errno("unable to write pack header");
}
- static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
-+ struct repository *repo,
- struct object_id *result_oid,
- int fd, size_t size,
- const char *path, unsigned flags)
+-static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
+- struct object_id *result_oid,
+- int fd, size_t size,
+- const char *path, unsigned flags)
++int index_blob_bulk_checkin(struct odb_transaction *transaction,
++ struct object_id *result_oid,
++ int fd, size_t size,
++ const char *path, unsigned flags)
+ {
++ struct bulk_checkin_packfile *state = &transaction->packfile;
+ off_t seekback, already_hashed_to;
+ struct git_hash_ctx ctx;
+ unsigned char obuf[16384];
@@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *state,
header_len = format_object_header((char *)obuf, sizeof(obuf),
OBJ_BLOB, size);
- the_hash_algo->init_fn(&ctx);
-+ repo->hash_algo->init_fn(&ctx);
++ transaction->odb->repo->hash_algo->init_fn(&ctx);
git_hash_update(&ctx, obuf, header_len);
/* Note: idx is non-NULL when we are writing */
@@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
CALLOC_ARRAY(idx, 1);
- prepare_to_stream(state, flags);
-+ prepare_to_stream(state, repo, flags);
++ prepare_to_stream(transaction, flags);
hashfile_checkpoint_init(state->f, &checkpoint);
}
@@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
while (1) {
- prepare_to_stream(state, flags);
-+ prepare_to_stream(state, repo, flags);
++ prepare_to_stream(transaction, flags);
if (idx) {
hashfile_checkpoint(state->f, &checkpoint);
idx->offset = state->offset;
@@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
hashfile_truncate(state->f, &checkpoint);
state->offset = checkpoint.offset;
- flush_bulk_checkin_packfile(state);
-+ flush_bulk_checkin_packfile(state, repo);
++ flush_bulk_checkin_packfile(transaction);
if (lseek(fd, seekback, SEEK_SET) == (off_t) -1)
return error("cannot seek back");
}
@@ bulk-checkin.c: static int deflate_blob_to_pack(struct bulk_checkin_packfile *st
idx->crc32 = crc32_end(state->f);
- if (already_written(state, result_oid)) {
-+ if (already_written(state, repo, result_oid)) {
++ if (already_written(transaction, result_oid)) {
hashfile_truncate(state->f, &checkpoint);
state->offset = checkpoint.offset;
free(idx);
@@ bulk-checkin.c: void fsync_loose_object_bulk_checkin(struct odb_transaction *tra
}
-int index_blob_bulk_checkin(struct odb_transaction *transaction,
-+int index_blob_bulk_checkin(struct repository *repo,
-+ struct odb_transaction *transaction,
- struct object_id *oid, int fd, size_t size,
- const char *path, unsigned flags)
+- struct object_id *oid, int fd, size_t size,
+- const char *path, unsigned flags)
+-{
+- return deflate_blob_to_pack(&transaction->packfile, oid, fd, size, path,
+- flags);
+-}
+-
+ struct odb_transaction *begin_odb_transaction(struct object_database *odb)
{
- int status;
-
- if (transaction) {
-- status = deflate_blob_to_pack(&transaction->packfile, oid, fd,
-- size, path, flags);
-+ status = deflate_blob_to_pack(&transaction->packfile,
-+ repo, oid, fd, size, path, flags);
- } else {
- struct bulk_checkin_packfile state = { 0 };
-
-- status = deflate_blob_to_pack(&state, oid, fd, size, path, flags);
-- flush_bulk_checkin_packfile(&state);
-+ status = deflate_blob_to_pack(&state, repo, oid, fd, size, path, flags);
-+ flush_bulk_checkin_packfile(&state, repo);
- }
-
- return status;
+ if (!odb->transaction) {
@@ bulk-checkin.c: void flush_odb_transaction(struct odb_transaction *transaction)
return;
flush_batch_fsync(transaction);
- flush_bulk_checkin_packfile(&transaction->packfile);
-+ flush_bulk_checkin_packfile(&transaction->packfile,
-+ transaction->odb->repo);
++ flush_bulk_checkin_packfile(transaction);
}
void end_odb_transaction(struct odb_transaction *transaction)
-
- ## bulk-checkin.h ##
-@@ bulk-checkin.h: void fsync_loose_object_bulk_checkin(struct odb_transaction *transaction,
- * binary blobs, they generally do not want to get any conversion, and
- * callers should avoid this code path when filters are requested.
- */
--int index_blob_bulk_checkin(struct odb_transaction *transaction,
-+int index_blob_bulk_checkin(struct repository *repo,
-+ struct odb_transaction *transaction,
- struct object_id *oid, int fd, size_t size,
- const char *path, unsigned flags);
-
-
- ## object-file.c ##
-@@ object-file.c: int index_fd(struct index_state *istate, struct object_id *oid,
- ret = index_core(istate, oid, fd, xsize_t(st->st_size),
- type, path, flags);
- else
-- ret = index_blob_bulk_checkin(the_repository->objects->transaction,
-+ ret = index_blob_bulk_checkin(the_repository,
-+ the_repository->objects->transaction,
- oid, fd, xsize_t(st->st_size),
- path, flags);
- close(fd);
base-commit: c44beea485f0f2feaf460e2ac87fdd5608d63cf0
--
2.51.0
next prev parent reply other threads:[~2025-08-21 23:22 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-20 22:55 [PATCH 0/3] bulk-checkin: remove global transaction state Justin Tobler
2025-08-20 22:55 ` [PATCH 1/3] bulk-checkin: introduce object database transaction structure Justin Tobler
2025-08-20 22:55 ` [PATCH 2/3] bulk-checkin: remove global transaction state Justin Tobler
2025-08-20 22:55 ` [PATCH 3/3] bulk-checkin: wire repository variable Justin Tobler
2025-08-21 0:15 ` Junio C Hamano
2025-08-21 20:26 ` Justin Tobler
2025-08-21 20:32 ` Junio C Hamano
2025-08-21 0:00 ` [PATCH 0/3] bulk-checkin: remove global transaction state Junio C Hamano
2025-08-21 23:22 ` Justin Tobler [this message]
2025-08-21 23:22 ` [PATCH v2 1/4] bulk-checkin: introduce object database transaction structure Justin Tobler
2025-08-21 23:22 ` [PATCH v2 2/4] bulk-checkin: remove global transaction state Justin Tobler
2025-08-22 16:37 ` Junio C Hamano
2025-08-22 18:07 ` Justin Tobler
2025-08-22 20:25 ` Junio C Hamano
2025-08-21 23:22 ` [PATCH v2 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin() Justin Tobler
2025-08-22 16:49 ` Junio C Hamano
2025-08-22 19:13 ` Justin Tobler
2025-08-22 20:33 ` Junio C Hamano
2025-08-21 23:22 ` [PATCH v2 4/4] bulk-checkin: use repository variable from transaction Justin Tobler
2025-08-22 17:03 ` Junio C Hamano
2025-08-22 19:38 ` Justin Tobler
2025-08-22 21:34 ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Justin Tobler
2025-08-22 21:34 ` [PATCH v3 1/4] bulk-checkin: introduce object database transaction structure Justin Tobler
2025-08-22 21:34 ` [PATCH v3 2/4] bulk-checkin: remove global transaction state Justin Tobler
2025-08-22 21:34 ` [PATCH v3 3/4] bulk-checkin: require transaction for index_blob_bulk_checkin() Justin Tobler
2025-08-22 21:35 ` [PATCH v3 4/4] bulk-checkin: use repository variable from transaction Justin Tobler
2025-08-25 20:25 ` [PATCH v3 0/4] bulk-checkin: remove global transaction state Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250821232249.319427-1-jltobler@gmail.com \
--to=jltobler@gmail$(echo .)com \
--cc=git@vger$(echo .)kernel.org \
--cc=gitster@pobox$(echo .)com \
--cc=ps@pks$(echo .)im \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox