public inbox for git@vger.kernel.org 
 help / color / mirror / Atom feed
From: "brian m. carlson" <sandals@crustytoothpaste•net>
To: <git@vger•kernel.org>
Cc: Junio C Hamano <gitster@pobox•com>,
	Patrick Steinhardt <ps@pks•im>,
	Ezekiel Newren <ezekielnewren@gmail•com>
Subject: [PATCH 11/14] rust: add functionality to hash an object
Date: Mon, 27 Oct 2025 00:44:01 +0000	[thread overview]
Message-ID: <20251027004404.2152927-12-sandals@crustytoothpaste.net> (raw)
In-Reply-To: <20251027004404.2152927-1-sandals@crustytoothpaste.net>

In a future commit, we'll want to hash some data when dealing with a
loose object map.  Let's make this easy by creating a structure to hash
objects and calling into the C functions as necessary to perform the
hashing.  For now, we only implement safe hashing, but in the future we
could add unsafe hashing if we want.  Implement Clone and Drop to
appropriately manage our memory.  Additionally implement Write to make
it easy to use with other formats that implement this trait.

While we're at it, add some tests for the various cases in this file.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste•net>
---
 src/hash.rs | 157 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 157 insertions(+)

diff --git a/src/hash.rs b/src/hash.rs
index a5b9493bd8..8798a50aef 100644
--- a/src/hash.rs
+++ b/src/hash.rs
@@ -10,6 +10,7 @@
 // You should have received a copy of the GNU General Public License along
 // with this program; if not, see <https://www.gnu.org/licenses/>.
 
+use std::io::{self, Write};
 use std::os::raw::c_void;
 
 pub const GIT_MAX_RAWSZ: usize = 32;
@@ -39,6 +40,81 @@ impl ObjectID {
     }
 }
 
+pub struct Hasher {
+    algo: HashAlgorithm,
+    safe: bool,
+    ctx: *mut c_void,
+}
+
+impl Hasher {
+    /// Create a new safe hasher.
+    pub fn new(algo: HashAlgorithm) -> Hasher {
+        let ctx = unsafe { c::git_hash_alloc() };
+        unsafe { c::git_hash_init(ctx, algo.hash_algo_ptr()) };
+        Hasher {
+            algo,
+            safe: true,
+            ctx,
+        }
+    }
+
+    /// Return whether this is a safe hasher.
+    pub fn is_safe(&self) -> bool {
+        self.safe
+    }
+
+    /// Update the hasher with the specified data.
+    pub fn update(&mut self, data: &[u8]) {
+        unsafe { c::git_hash_update(self.ctx, data.as_ptr() as *const c_void, data.len()) };
+    }
+
+    /// Return an object ID, consuming the hasher.
+    pub fn into_oid(self) -> ObjectID {
+        let mut oid = ObjectID {
+            hash: [0u8; 32],
+            algo: self.algo as u32,
+        };
+        unsafe { c::git_hash_final_oid(&mut oid as *mut ObjectID as *mut c_void, self.ctx) };
+        oid
+    }
+
+    /// Return a hash as a `Vec`, consuming the hasher.
+    pub fn into_vec(self) -> Vec<u8> {
+        let mut v = vec![0u8; self.algo.raw_len()];
+        unsafe { c::git_hash_final(v.as_mut_ptr(), self.ctx) };
+        v
+    }
+}
+
+impl Write for Hasher {
+    fn write(&mut self, data: &[u8]) -> io::Result<usize> {
+        self.update(data);
+        Ok(data.len())
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        Ok(())
+    }
+}
+
+impl Clone for Hasher {
+    fn clone(&self) -> Hasher {
+        let ctx = unsafe { c::git_hash_alloc() };
+        unsafe { c::git_hash_clone(ctx, self.ctx) };
+        Hasher {
+            algo: self.algo,
+            safe: self.safe,
+            ctx,
+        }
+    }
+}
+
+impl Drop for Hasher {
+    fn drop(&mut self) {
+        unsafe { c::git_hash_free(self.ctx) };
+    }
+}
+
 /// A hash algorithm,
 #[repr(C)]
 #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
@@ -167,6 +243,11 @@ impl HashAlgorithm {
     pub fn hash_algo_ptr(self) -> *const c_void {
         unsafe { c::hash_algo_ptr_by_offset(self as u32) }
     }
+
+    /// Create a hasher for this algorithm.
+    pub fn hasher(self) -> Hasher {
+        Hasher::new(self)
+    }
 }
 
 pub mod c {
@@ -174,5 +255,81 @@ pub mod c {
 
     extern "C" {
         pub fn hash_algo_ptr_by_offset(n: u32) -> *const c_void;
+        pub fn unsafe_hash_algo(algop: *const c_void) -> *const c_void;
+        pub fn git_hash_alloc() -> *mut c_void;
+        pub fn git_hash_free(ctx: *mut c_void);
+        pub fn git_hash_init(dst: *mut c_void, algop: *const c_void);
+        pub fn git_hash_clone(dst: *mut c_void, src: *const c_void);
+        pub fn git_hash_update(ctx: *mut c_void, inp: *const c_void, len: usize);
+        pub fn git_hash_final(hash: *mut u8, ctx: *mut c_void);
+        pub fn git_hash_final_oid(hash: *mut c_void, ctx: *mut c_void);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{HashAlgorithm, ObjectID};
+    use std::io::Write;
+
+    fn all_algos() -> &'static [HashAlgorithm] {
+        &[HashAlgorithm::SHA1, HashAlgorithm::SHA256]
+    }
+
+    #[test]
+    fn format_id_round_trips() {
+        for algo in all_algos() {
+            assert_eq!(
+                *algo,
+                HashAlgorithm::from_format_id(algo.format_id()).unwrap()
+            );
+        }
+    }
+
+    #[test]
+    fn offset_round_trips() {
+        for algo in all_algos() {
+            assert_eq!(*algo, HashAlgorithm::from_u32(*algo as u32).unwrap());
+        }
+    }
+
+    #[test]
+    fn slices_have_correct_length() {
+        for algo in all_algos() {
+            for oid in [algo.null_oid(), algo.empty_blob(), algo.empty_tree()] {
+                assert_eq!(oid.as_slice().len(), algo.raw_len());
+            }
+        }
+    }
+
+    #[test]
+    fn hasher_works_correctly() {
+        for algo in all_algos() {
+            let tests: &[(&[u8], &ObjectID)] = &[
+                (b"blob 0\0", algo.empty_blob()),
+                (b"tree 0\0", algo.empty_tree()),
+            ];
+            for (data, oid) in tests {
+                let mut h = algo.hasher();
+                assert_eq!(h.is_safe(), true);
+                // Test that this works incrementally.
+                h.update(&data[0..2]);
+                h.update(&data[2..]);
+
+                let h2 = h.clone();
+
+                let actual_oid = h.into_oid();
+                assert_eq!(**oid, actual_oid);
+
+                let v = h2.into_vec();
+                assert_eq!((*oid).as_slice(), &v);
+
+                let mut h = algo.hasher();
+                h.write_all(&data[0..2]).unwrap();
+                h.write_all(&data[2..]).unwrap();
+
+                let actual_oid = h.into_oid();
+                assert_eq!(**oid, actual_oid);
+            }
+        }
     }
 }

  parent reply	other threads:[~2025-10-27  0:44 UTC|newest]

Thread overview: 118+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-27  0:43 [PATCH 00/14] SHA-1/SHA-256 interoperability, part 2 brian m. carlson
2025-10-27  0:43 ` [PATCH 01/14] repository: require Rust support for interoperability brian m. carlson
2025-10-28  9:16   ` Patrick Steinhardt
2025-10-27  0:43 ` [PATCH 02/14] conversion: don't crash when no destination algo brian m. carlson
2025-10-27  0:43 ` [PATCH 03/14] hash: use uint32_t for object_id algorithm brian m. carlson
2025-10-28  9:16   ` Patrick Steinhardt
2025-10-28 18:28     ` Ezekiel Newren
2025-10-28 19:33     ` Junio C Hamano
2025-10-28 19:58       ` Ezekiel Newren
2025-10-28 20:20         ` Junio C Hamano
2025-10-30  0:23       ` brian m. carlson
2025-10-30  1:58         ` Collin Funk
2025-11-03  1:30           ` brian m. carlson
2025-10-29  0:33     ` brian m. carlson
2025-10-29  9:07       ` Patrick Steinhardt
2025-10-27  0:43 ` [PATCH 04/14] rust: add a ObjectID struct brian m. carlson
2025-10-28  9:17   ` Patrick Steinhardt
2025-10-28 19:07     ` Ezekiel Newren
2025-10-29  0:42       ` brian m. carlson
2025-10-28 19:40     ` Junio C Hamano
2025-10-29  0:47       ` brian m. carlson
2025-10-29  0:36     ` brian m. carlson
2025-10-29  9:08       ` Patrick Steinhardt
2025-10-30  0:32         ` brian m. carlson
2025-10-27  0:43 ` [PATCH 05/14] rust: add a hash algorithm abstraction brian m. carlson
2025-10-28  9:18   ` Patrick Steinhardt
2025-10-28 17:09     ` Ezekiel Newren
2025-10-28 20:00   ` Junio C Hamano
2025-10-28 20:03     ` Ezekiel Newren
2025-10-29 13:27       ` Junio C Hamano
2025-10-29 14:32         ` Junio C Hamano
2025-10-27  0:43 ` [PATCH 06/14] hash: add a function to look up hash algo structs brian m. carlson
2025-10-28  9:18   ` Patrick Steinhardt
2025-10-28 20:12   ` Junio C Hamano
2025-11-04  1:48     ` brian m. carlson
2025-11-04 10:24       ` Junio C Hamano
2025-10-27  0:43 ` [PATCH 07/14] csum-file: define hashwrite's count as a uint32_t brian m. carlson
2025-10-28 17:22   ` Ezekiel Newren
2025-10-27  0:43 ` [PATCH 08/14] write-or-die: add an fsync component for the loose object map brian m. carlson
2025-10-27  0:43 ` [PATCH 09/14] hash: expose hash context functions to Rust brian m. carlson
2025-10-29 16:32   ` Junio C Hamano
2025-10-30 21:42     ` brian m. carlson
2025-10-30 21:52       ` Junio C Hamano
2025-10-27  0:44 ` [PATCH 10/14] rust: add a build.rs script for tests brian m. carlson
2025-10-28  9:18   ` Patrick Steinhardt
2025-10-28 17:42     ` Ezekiel Newren
2025-10-29 16:43   ` Junio C Hamano
2025-10-29 22:10     ` Ezekiel Newren
2025-10-29 23:12       ` Junio C Hamano
2025-10-30  6:26         ` Patrick Steinhardt
2025-10-30 13:54           ` Junio C Hamano
2025-10-31 22:43             ` Ezekiel Newren
2025-11-01 11:18               ` Junio C Hamano
2025-10-27  0:44 ` brian m. carlson [this message]
2025-10-28  9:18   ` [PATCH 11/14] rust: add functionality to hash an object Patrick Steinhardt
2025-10-29  0:53     ` brian m. carlson
2025-10-29  9:07       ` Patrick Steinhardt
2025-10-28 18:05   ` Ezekiel Newren
2025-10-29  1:05     ` brian m. carlson
2025-10-29 16:02       ` Ben Knoble
2025-10-27  0:44 ` [PATCH 12/14] rust: add a new binary loose object map format brian m. carlson
2025-10-28  9:18   ` Patrick Steinhardt
2025-10-29  1:37     ` brian m. carlson
2025-10-29  9:07       ` Patrick Steinhardt
2025-10-29 17:03   ` Junio C Hamano
2025-10-29 18:21   ` Junio C Hamano
2025-10-27  0:44 ` [PATCH 13/14] rust: add a small wrapper around the hashfile code brian m. carlson
2025-10-28 18:19   ` Ezekiel Newren
2025-10-29  1:39     ` brian m. carlson
2025-10-27  0:44 ` [PATCH 14/14] object-file-convert: always make sure object ID algo is valid brian m. carlson
2025-10-29 20:07 ` [PATCH 00/14] SHA-1/SHA-256 interoperability, part 2 Junio C Hamano
2025-10-29 20:15   ` Junio C Hamano
2025-11-11  0:12 ` Ezekiel Newren
2025-11-14 17:25 ` Junio C Hamano
2025-11-14 21:11   ` Junio C Hamano
2025-11-17  6:56   ` Junio C Hamano
2025-11-17 22:09     ` brian m. carlson
2025-11-18  0:13       ` Junio C Hamano
2025-11-19 23:04         ` brian m. carlson
2025-11-19 23:24           ` Junio C Hamano
2025-11-19 23:37           ` Ezekiel Newren
2025-11-20 19:52             ` Ezekiel Newren
2025-11-20 23:02               ` brian m. carlson
2025-11-20 23:11                 ` Ezekiel Newren
2025-11-20 23:14                   ` Junio C Hamano
2025-11-17 22:16 ` [PATCH v2 00/15] " brian m. carlson
2025-11-17 22:16   ` [PATCH v2 01/15] repository: require Rust support for interoperability brian m. carlson
2025-11-17 22:16   ` [PATCH v2 02/15] conversion: don't crash when no destination algo brian m. carlson
2025-11-17 22:16   ` [PATCH v2 03/15] hash: use uint32_t for object_id algorithm brian m. carlson
2025-11-17 22:16   ` [PATCH v2 04/15] rust: add a ObjectID struct brian m. carlson
2025-11-17 22:16   ` [PATCH v2 05/15] rust: add a hash algorithm abstraction brian m. carlson
2025-11-17 22:16   ` [PATCH v2 06/15] hash: add a function to look up hash algo structs brian m. carlson
2025-11-17 22:16   ` [PATCH v2 07/15] rust: add additional helpers for ObjectID brian m. carlson
2025-11-17 22:16   ` [PATCH v2 08/15] csum-file: define hashwrite's count as a uint32_t brian m. carlson
2025-11-17 22:16   ` [PATCH v2 09/15] write-or-die: add an fsync component for the object map brian m. carlson
2025-11-17 22:16   ` [PATCH v2 10/15] hash: expose hash context functions to Rust brian m. carlson
2025-11-17 22:16   ` [PATCH v2 11/15] rust: add a build.rs script for tests brian m. carlson
2025-11-17 22:16   ` [PATCH v2 12/15] rust: add functionality to hash an object brian m. carlson
2025-11-17 22:16   ` [PATCH v2 13/15] rust: add a new binary object map format brian m. carlson
2025-11-17 22:16   ` [PATCH v2 14/15] rust: add a small wrapper around the hashfile code brian m. carlson
2025-11-17 22:16   ` [PATCH v2 15/15] object-file-convert: always make sure object ID algo is valid brian m. carlson
2026-02-07 20:04   ` [PATCH v3 00/16] SHA-1/SHA-256 interoperability, part 2 brian m. carlson
2026-02-07 20:04     ` [PATCH v3 01/16] repository: require Rust support for interoperability brian m. carlson
2026-02-07 20:04     ` [PATCH v3 02/16] conversion: don't crash when no destination algo brian m. carlson
2026-02-07 20:04     ` [PATCH v3 03/16] hash: use uint32_t for object_id algorithm brian m. carlson
2026-02-07 20:04     ` [PATCH v3 04/16] rust: add a ObjectID struct brian m. carlson
2026-02-07 20:04     ` [PATCH v3 05/16] rust: add a hash algorithm abstraction brian m. carlson
2026-02-07 20:04     ` [PATCH v3 06/16] hash: add a function to look up hash algo structs brian m. carlson
2026-02-07 20:04     ` [PATCH v3 07/16] rust: add additional helpers for ObjectID brian m. carlson
2026-02-07 20:04     ` [PATCH v3 08/16] csum-file: define hashwrite's count as a uint32_t brian m. carlson
2026-02-07 20:04     ` [PATCH v3 09/16] write-or-die: add an fsync component for the object map brian m. carlson
2026-02-07 20:04     ` [PATCH v3 10/16] hash: expose hash context functions to Rust brian m. carlson
2026-02-07 20:04     ` [PATCH v3 11/16] rust: fix linking binaries with cargo brian m. carlson
2026-02-07 20:04     ` [PATCH v3 12/16] rust: add a build.rs script for tests brian m. carlson
2026-02-07 20:04     ` [PATCH v3 13/16] rust: add functionality to hash an object brian m. carlson
2026-02-07 20:04     ` [PATCH v3 14/16] rust: add a new binary object map format brian m. carlson
2026-02-07 20:04     ` [PATCH v3 15/16] rust: add a small wrapper around the hashfile code brian m. carlson
2026-02-07 20:04     ` [PATCH v3 16/16] object-file-convert: always make sure object ID algo is valid brian m. carlson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251027004404.2152927-12-sandals@crustytoothpaste.net \
    --to=sandals@crustytoothpaste$(echo .)net \
    --cc=ezekielnewren@gmail$(echo .)com \
    --cc=git@vger$(echo .)kernel.org \
    --cc=gitster@pobox$(echo .)com \
    --cc=ps@pks$(echo .)im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox