From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail•com>
To: git@vger•kernel.org
Cc: msporleder@gmail•com, "Nguyễn Thái Ngọc Duy" <pclouds@gmail•com>
Subject: [PATCH] index-pack: reduce memory footprint a bit
Date: Mon, 9 Feb 2015 20:18:49 +0700 [thread overview]
Message-ID: <1423487929-28019-1-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <CACsJy8A=6m5sWnDhPPMNrWbZ=fOMXPxO_1GVh-WpHycf5gm+rg@mail.gmail.com>
For each object in the input pack, we need one struct object_entry. On
x86-64, this struct is 64 bytes long. Although:
- The 8 bytes for delta_depth and base_object_no are only useful when
show_stat is set. And it's never set unless someone is debugging.
- The three fields hdr_size, type and real_type take 4 bytes each
even though they never use more than 4 bits.
By moving delta_depth and base_object_no out of struct object_entry
and make the other 3 fields one byte long instead of 4, we shrink 25%
of this struct.
On a 3.4M object repo that's about 53MB. The saving is less impressive
compared to index-pack total memory use (about 400MB before delta
resolving, so the saving is just 13%)
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail•com>
---
I'm not sure if this patch is worth pursuing. It makes the code a
little bit harder to read. I was just wondering how much memory could
be saved..
We could maybe save some more by splitting union delta_base with the
assumption that pack-objects would utilize delta-ofs-offset as much
as possible, which makes the delta_base.sha1[] a waste most of the
time.
This repo has 2803447 deltas, and because it's a clone case, all
delta should be ofs-delta, which means we waste about 32MB. But
shrinking this could get ugly.
builtin/index-pack.c | 30 +++++++++++++++++++-----------
1 file changed, 19 insertions(+), 11 deletions(-)
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 4632117..479ec5e 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -18,9 +18,12 @@ static const char index_pack_usage[] =
struct object_entry {
struct pack_idx_entry idx;
unsigned long size;
- unsigned int hdr_size;
- enum object_type type;
- enum object_type real_type;
+ unsigned char hdr_size;
+ char type;
+ char real_type;
+};
+
+struct object_entry_extra {
unsigned delta_depth;
int base_object_no;
};
@@ -64,6 +67,7 @@ struct delta_entry {
};
static struct object_entry *objects;
+static struct object_entry_extra *objects_extra;
static struct delta_entry *deltas;
static struct thread_local nothread_data;
static int nr_objects;
@@ -873,13 +877,15 @@ static void resolve_delta(struct object_entry *delta_obj,
void *base_data, *delta_data;
if (show_stat) {
- delta_obj->delta_depth = base->obj->delta_depth + 1;
+ int i = delta_obj - objects;
+ int j = base->obj - objects;
+ objects_extra[i].delta_depth = objects_extra[j].delta_depth + 1;
deepest_delta_lock();
- if (deepest_delta < delta_obj->delta_depth)
- deepest_delta = delta_obj->delta_depth;
+ if (deepest_delta < objects_extra[i].delta_depth)
+ deepest_delta = objects_extra[i].delta_depth;
deepest_delta_unlock();
+ objects_extra[i].base_object_no = j;
}
- delta_obj->base_object_no = base->obj - objects;
delta_data = get_data_from_pack(delta_obj);
base_data = get_base_data(base);
result->obj = delta_obj;
@@ -902,7 +908,7 @@ static void resolve_delta(struct object_entry *delta_obj,
* "want"; if so, swap in "set" and return true. Otherwise, leave it untouched
* and return false.
*/
-static int compare_and_swap_type(enum object_type *type,
+static int compare_and_swap_type(char *type,
enum object_type want,
enum object_type set)
{
@@ -1499,7 +1505,7 @@ static void show_pack_info(int stat_only)
struct object_entry *obj = &objects[i];
if (is_delta_type(obj->type))
- chain_histogram[obj->delta_depth - 1]++;
+ chain_histogram[objects_extra[i].delta_depth - 1]++;
if (stat_only)
continue;
printf("%s %-6s %lu %lu %"PRIuMAX,
@@ -1508,8 +1514,8 @@ static void show_pack_info(int stat_only)
(unsigned long)(obj[1].idx.offset - obj->idx.offset),
(uintmax_t)obj->idx.offset);
if (is_delta_type(obj->type)) {
- struct object_entry *bobj = &objects[obj->base_object_no];
- printf(" %u %s", obj->delta_depth, sha1_to_hex(bobj->idx.sha1));
+ struct object_entry *bobj = &objects[objects_extra[i].base_object_no];
+ printf(" %u %s", objects_extra[i].delta_depth, sha1_to_hex(bobj->idx.sha1));
}
putchar('\n');
}
@@ -1672,6 +1678,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
curr_pack = open_pack_file(pack_name);
parse_pack_header();
objects = xcalloc(nr_objects + 1, sizeof(struct object_entry));
+ if (show_stat)
+ objects_extra = xcalloc(nr_objects + 1, sizeof(struct object_entry_extra));
deltas = xcalloc(nr_objects, sizeof(struct delta_entry));
parse_pack_objects(pack_sha1);
resolve_deltas();
--
2.3.0.rc1.137.g477eb31
next prev parent reply other threads:[~2015-02-09 13:18 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-01-08 16:10 low memory system to clone larger repo matthew sporleder
2015-02-09 10:40 ` Duy Nguyen
2015-02-09 11:20 ` Matt Sporleder
2015-02-09 12:32 ` Duy Nguyen
2015-02-09 13:18 ` Nguyễn Thái Ngọc Duy [this message]
2015-02-09 19:27 ` [PATCH] index-pack: reduce memory footprint a bit Junio C Hamano
2015-02-10 9:30 ` Duy Nguyen
2015-02-10 12:08 ` matthew sporleder
2015-02-10 18:49 ` Junio C Hamano
2015-02-11 13:01 ` matthew sporleder
2015-02-11 13:10 ` Duy Nguyen
2015-02-10 3:56 ` low memory system to clone larger repo matthew sporleder
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1423487929-28019-1-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail$(echo .)com \
--cc=git@vger$(echo .)kernel.org \
--cc=msporleder@gmail$(echo .)com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox