From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail•com>
To: git@vger•kernel.org
Cc: gitster@pobox•com, peff@peff•net, Patrick Steinhardt <ps@pks•im>,
Johannes Schindelin <Johannes.Schindelin@gmx•de>,
Derrick Stolee <stolee@gmail•com>,
Derrick Stolee <stolee@gmail•com>
Subject: [PATCH v2 1/3] test-tool: add pack-deltas helper
Date: Mon, 28 Apr 2025 20:24:43 +0000 [thread overview]
Message-ID: <41aac8e782fdd9e2a19c6fadd27807782fc36203.1745871885.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1906.v2.git.1745871885.gitgitgadget@gmail.com>
From: Derrick Stolee <stolee@gmail•com>
When trying to demonstrate certain behavior in tests, it can be helpful
to create packfiles that have specific delta structures. 'git
pack-objects' uses various algorithms to select deltas based on their
compression rates, but that does not always demonstrate all possible
packfile shapes. This becomes especially important when wanting to test
'git index-pack' and its ability to parse certain pack shapes.
We have prior art in t/lib-pack.sh, where certain delta structures are
produced by manually writing certain opaque pack contents. However,
producing these script updates is cumbersome and difficult to do as a
contributor.
Instead, create a new test-tool, 'test-tool pack-deltas', that reads a
list of instructions for which objects to include in a packfile and how
those objects should be written in delta form.
At the moment, this only supports REF_DELTAs as those are the kinds of
deltas needed to exercise a bug in 'git index-pack'.
Signed-off-by: Derrick Stolee <stolee@gmail•com>
---
Makefile | 1 +
t/helper/meson.build | 1 +
t/helper/test-pack-deltas.c | 148 ++++++++++++++++++++++++++++++++++++
t/helper/test-tool.c | 1 +
t/helper/test-tool.h | 1 +
5 files changed, 152 insertions(+)
create mode 100644 t/helper/test-pack-deltas.c
diff --git a/Makefile b/Makefile
index 13f9062a056..c4d21ccd3d1 100644
--- a/Makefile
+++ b/Makefile
@@ -821,6 +821,7 @@ TEST_BUILTINS_OBJS += test-mergesort.o
TEST_BUILTINS_OBJS += test-mktemp.o
TEST_BUILTINS_OBJS += test-name-hash.o
TEST_BUILTINS_OBJS += test-online-cpus.o
+TEST_BUILTINS_OBJS += test-pack-deltas.o
TEST_BUILTINS_OBJS += test-pack-mtimes.o
TEST_BUILTINS_OBJS += test-parse-options.o
TEST_BUILTINS_OBJS += test-parse-pathspec-file.o
diff --git a/t/helper/meson.build b/t/helper/meson.build
index d2cabaa2bcf..d4e8b26df8d 100644
--- a/t/helper/meson.build
+++ b/t/helper/meson.build
@@ -36,6 +36,7 @@ test_tool_sources = [
'test-mktemp.c',
'test-name-hash.c',
'test-online-cpus.c',
+ 'test-pack-deltas.c',
'test-pack-mtimes.c',
'test-parse-options.c',
'test-parse-pathspec-file.c',
diff --git a/t/helper/test-pack-deltas.c b/t/helper/test-pack-deltas.c
new file mode 100644
index 00000000000..4caa024b1eb
--- /dev/null
+++ b/t/helper/test-pack-deltas.c
@@ -0,0 +1,148 @@
+#define USE_THE_REPOSITORY_VARIABLE
+
+#include "test-tool.h"
+#include "git-compat-util.h"
+#include "delta.h"
+#include "git-zlib.h"
+#include "hash.h"
+#include "hex.h"
+#include "pack.h"
+#include "pack-objects.h"
+#include "parse-options.h"
+#include "setup.h"
+#include "strbuf.h"
+#include "string-list.h"
+
+static const char *usage_str[] = {
+ "test-tool pack-deltas --num-objects <num-objects>",
+ NULL
+};
+
+static unsigned long do_compress(void **pptr, unsigned long size)
+{
+ git_zstream stream;
+ void *in, *out;
+ unsigned long maxsize;
+
+ git_deflate_init(&stream, 1);
+ maxsize = git_deflate_bound(&stream, size);
+
+ in = *pptr;
+ out = xmalloc(maxsize);
+ *pptr = out;
+
+ stream.next_in = in;
+ stream.avail_in = size;
+ stream.next_out = out;
+ stream.avail_out = maxsize;
+ while (git_deflate(&stream, Z_FINISH) == Z_OK)
+ ; /* nothing */
+ git_deflate_end(&stream);
+
+ free(in);
+ return stream.total_out;
+}
+
+static void write_ref_delta(struct hashfile *f,
+ struct object_id *oid,
+ struct object_id *base)
+{
+ unsigned char header[MAX_PACK_OBJECT_HEADER];
+ unsigned long size, base_size, delta_size, compressed_size, hdrlen;
+ enum object_type type;
+ void *base_buf, *delta_buf;
+ void *buf = repo_read_object_file(the_repository,
+ oid, &type,
+ &size);
+
+ if (!buf)
+ die("unable to read %s", oid_to_hex(oid));
+
+ base_buf = repo_read_object_file(the_repository,
+ base, &type,
+ &base_size);
+
+ if (!base_buf)
+ die("unable to read %s", oid_to_hex(base));
+
+ delta_buf = diff_delta(base_buf, base_size,
+ buf, size, &delta_size, 0);
+
+ compressed_size = do_compress(&delta_buf, delta_size);
+
+ hdrlen = encode_in_pack_object_header(header, sizeof(header),
+ OBJ_REF_DELTA, delta_size);
+ hashwrite(f, header, hdrlen);
+ hashwrite(f, base->hash, the_repository->hash_algo->rawsz);
+ hashwrite(f, delta_buf, compressed_size);
+
+ free(buf);
+ free(base_buf);
+ free(delta_buf);
+}
+
+int cmd__pack_deltas(int argc, const char **argv)
+{
+ int num_objects = -1;
+ struct hashfile *f;
+ struct strbuf line = STRBUF_INIT;
+ struct option options[] = {
+ OPT_INTEGER('n', "num-objects", &num_objects, N_("the number of objects to write")),
+ OPT_END()
+ };
+
+ argc = parse_options(argc, argv, NULL,
+ options, usage_str, 0);
+
+ if (argc || num_objects < 0)
+ usage_with_options(usage_str, options);
+
+ setup_git_directory();
+
+ f = hashfd(the_repository->hash_algo, 1, "<stdout>");
+ write_pack_header(f, num_objects);
+
+ /* Read each line from stdin into 'line' */
+ while (strbuf_getline_lf(&line, stdin) != EOF) {
+ const char *type_str, *content_oid_str, *base_oid_str = NULL;
+ struct object_id content_oid, base_oid;
+ struct string_list items = STRING_LIST_INIT_NODUP;
+ /*
+ * Tokenize into two or three parts:
+ * 1. REF_DELTA, OFS_DELTA, or FULL.
+ * 2. The object ID for the content object.
+ * 3. The object ID for the base object (optional).
+ */
+ if (string_list_split_in_place(&items, line.buf, " ", 3) < 0)
+ die("invalid input format: %s", line.buf);
+
+ if (items.nr < 2)
+ die("invalid input format: %s", line.buf);
+
+ type_str = items.items[0].string;
+ content_oid_str = items.items[1].string;
+
+ if (get_oid_hex(content_oid_str, &content_oid))
+ die("invalid object: %s", content_oid_str);
+ if (items.nr >= 3) {
+ base_oid_str = items.items[2].string;
+ if (get_oid_hex(base_oid_str, &base_oid))
+ die("invalid object: %s", base_oid_str);
+ }
+ string_list_clear(&items, 0);
+
+ if (!strcmp(type_str, "REF_DELTA"))
+ write_ref_delta(f, &content_oid, &base_oid);
+ else if (!strcmp(type_str, "OFS_DELTA"))
+ die("OFS_DELTA not implemented");
+ else if (!strcmp(type_str, "FULL"))
+ die("FULL not implemented");
+ else
+ die("unknown pack type: %s", type_str);
+ }
+
+ finalize_hashfile(f, NULL, FSYNC_COMPONENT_PACK,
+ CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
+ strbuf_release(&line);
+ return 0;
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 50dc4dac4ed..74812ed86d3 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -46,6 +46,7 @@ static struct test_cmd cmds[] = {
{ "mktemp", cmd__mktemp },
{ "name-hash", cmd__name_hash },
{ "online-cpus", cmd__online_cpus },
+ { "pack-deltas", cmd__pack_deltas },
{ "pack-mtimes", cmd__pack_mtimes },
{ "parse-options", cmd__parse_options },
{ "parse-options-flags", cmd__parse_options_flags },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 6d62a5b53d9..2571a3ccfe8 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -39,6 +39,7 @@ int cmd__mergesort(int argc, const char **argv);
int cmd__mktemp(int argc, const char **argv);
int cmd__name_hash(int argc, const char **argv);
int cmd__online_cpus(int argc, const char **argv);
+int cmd__pack_deltas(int argc, const char **argv);
int cmd__pack_mtimes(int argc, const char **argv);
int cmd__parse_options(int argc, const char **argv);
int cmd__parse_options_flags(int argc, const char **argv);
--
gitgitgadget
next prev parent reply other threads:[~2025-04-28 20:24 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-23 17:40 [PATCH 0/3] Fix REF_DELTA chain bug in 'git index-pack' Derrick Stolee via GitGitGadget
2025-04-23 17:40 ` [PATCH 1/3] test-tool: add pack-deltas helper Derrick Stolee via GitGitGadget
2025-04-23 19:26 ` Junio C Hamano
2025-04-23 19:32 ` Derrick Stolee
2025-04-24 19:41 ` Junio C Hamano
2025-04-24 20:06 ` Derrick Stolee
2025-04-24 20:56 ` Junio C Hamano
2025-04-25 4:34 ` Patrick Steinhardt
2025-04-25 9:34 ` Johannes Schindelin
2025-04-25 9:45 ` Patrick Steinhardt
2025-04-25 9:51 ` Johannes Schindelin
2025-04-25 16:27 ` Junio C Hamano
2025-04-28 15:22 ` Derrick Stolee
2025-04-28 16:37 ` Junio C Hamano
2025-04-28 18:59 ` Derrick Stolee
2025-04-28 20:35 ` Junio C Hamano
2025-04-23 17:40 ` [PATCH 2/3] t5309: create failing test for 'git index-pack' Derrick Stolee via GitGitGadget
2025-04-23 19:37 ` Junio C Hamano
2025-04-23 17:40 ` [PATCH 3/3] index-pack: allow revisiting REF_DELTA chains Derrick Stolee via GitGitGadget
2025-04-24 21:41 ` Junio C Hamano
2025-04-25 3:49 ` Derrick Stolee
2025-04-28 20:24 ` [PATCH v2 0/3] Fix REF_DELTA chain bug in 'git index-pack' Derrick Stolee via GitGitGadget
2025-04-28 20:24 ` Derrick Stolee via GitGitGadget [this message]
2025-04-28 20:24 ` [PATCH v2 2/3] t5309: create failing test for " Derrick Stolee via GitGitGadget
2025-04-28 20:24 ` [PATCH v2 3/3] index-pack: allow revisiting REF_DELTA chains Derrick Stolee via GitGitGadget
2025-05-07 2:08 ` Taylor Blau
2025-05-07 13:47 ` Derrick Stolee
2025-04-28 22:40 ` [PATCH v2 0/3] Fix REF_DELTA chain bug in 'git index-pack' Junio C Hamano
2025-04-29 5:33 ` Patrick Steinhardt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=41aac8e782fdd9e2a19c6fadd27807782fc36203.1745871885.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail$(echo .)com \
--cc=Johannes.Schindelin@gmx$(echo .)de \
--cc=git@vger$(echo .)kernel.org \
--cc=gitster@pobox$(echo .)com \
--cc=peff@peff$(echo .)net \
--cc=ps@pks$(echo .)im \
--cc=stolee@gmail$(echo .)com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox