public inbox for linuxppc-dev@ozlabs.org 
 help / color / mirror / Atom feed
From: Nicholas Piggin <npiggin@gmail•com>
To: linuxppc-dev@lists•ozlabs.org
Cc: Nicholas Piggin <npiggin@gmail•com>,
	"Aneesh Kumar K . V" <aneesh.kumar@linux•vnet.ibm.com>,
	Christophe Leroy <christophe.leroy@c-s•fr>
Subject: [PATCH v2 03/10] powerpc/mm/slice: pass pointers to struct slice_mask where possible
Date: Wed,  7 Mar 2018 11:37:11 +1000	[thread overview]
Message-ID: <20180307013718.13749-4-npiggin@gmail.com> (raw)
In-Reply-To: <20180307013718.13749-1-npiggin@gmail.com>

Pass around const pointers to struct slice_mask where possible, rather
than copies of slice_mask, to reduce stack and call overhead.

checkstack.pl gives, before:
0x00000d1c slice_get_unmapped_area [slice.o]:		592
0x00001864 is_hugepage_only_range [slice.o]:		448
0x00000754 slice_find_area_topdown [slice.o]:		400
0x00000484 slice_find_area_bottomup.isra.1 [slice.o]:	272
0x000017b4 slice_set_range_psize [slice.o]:		224
0x00000a4c slice_find_area [slice.o]:			128
0x00000160 slice_check_fit [slice.o]:			112

after:
0x00000ad0 slice_get_unmapped_area [slice.o]:		448
0x00001464 is_hugepage_only_range [slice.o]:		288
0x000006c0 slice_find_area [slice.o]:			144
0x0000016c slice_check_fit [slice.o]:			128
0x00000528 slice_find_area_bottomup.isra.2 [slice.o]:	128
0x000013e4 slice_set_range_psize [slice.o]:		128

This increases vfork+exec+exit performance by 1.5%.

Reduces time to mmap+munmap a 64kB page by 17%.

Signed-off-by: Nicholas Piggin <npiggin@gmail•com>
---
 arch/powerpc/mm/slice.c | 84 ++++++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 432c328b3e94..420d791f0e18 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -50,19 +50,21 @@ struct slice_mask {
 #ifdef DEBUG
 int _slice_debug = 1;
 
-static void slice_print_mask(const char *label, struct slice_mask mask)
+static void slice_print_mask(const char *label, const struct slice_mask *mask)
 {
 	if (!_slice_debug)
 		return;
-	pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices);
-	pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices);
+	pr_devel("%s low_slice: %*pbl\n", label,
+			(int)SLICE_NUM_LOW, &mask->low_slices);
+	pr_devel("%s high_slice: %*pbl\n", label,
+			(int)SLICE_NUM_HIGH, mask->high_slices);
 }
 
 #define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
 
 #else
 
-static void slice_print_mask(const char *label, struct slice_mask mask) {}
+static void slice_print_mask(const char *label, const struct slice_mask *mask) {}
 #define slice_dbg(fmt...)
 
 #endif
@@ -179,7 +181,8 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma
 }
 
 static int slice_check_fit(struct mm_struct *mm,
-			   struct slice_mask mask, struct slice_mask available)
+			   const struct slice_mask *mask,
+			   const struct slice_mask *available)
 {
 	DECLARE_BITMAP(result, SLICE_NUM_HIGH);
 	/*
@@ -189,14 +192,14 @@ static int slice_check_fit(struct mm_struct *mm,
 	unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit);
 
 	if (!SLICE_NUM_HIGH)
-		return (mask.low_slices & available.low_slices) ==
-		       mask.low_slices;
+		return (mask->low_slices & available->low_slices) ==
+		       mask->low_slices;
 
-	bitmap_and(result, mask.high_slices,
-		   available.high_slices, slice_count);
+	bitmap_and(result, mask->high_slices,
+		   available->high_slices, slice_count);
 
-	return (mask.low_slices & available.low_slices) == mask.low_slices &&
-		bitmap_equal(result, mask.high_slices, slice_count);
+	return (mask->low_slices & available->low_slices) == mask->low_slices &&
+		bitmap_equal(result, mask->high_slices, slice_count);
 }
 
 static void slice_flush_segments(void *parm)
@@ -216,7 +219,8 @@ static void slice_flush_segments(void *parm)
 #endif
 }
 
-static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
+static void slice_convert(struct mm_struct *mm,
+				const struct slice_mask *mask, int psize)
 {
 	int index, mask_index;
 	/* Write the new slice psize bits */
@@ -233,7 +237,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 
 	lpsizes = mm->context.low_slices_psize;
 	for (i = 0; i < SLICE_NUM_LOW; i++) {
-		if (!(mask.low_slices & (1u << i)))
+		if (!(mask->low_slices & (1u << i)))
 			continue;
 
 		mask_index = i & 0x1;
@@ -244,7 +248,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
 
 	hpsizes = mm->context.high_slices_psize;
 	for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
-		if (!test_bit(i, mask.high_slices))
+		if (!test_bit(i, mask->high_slices))
 			continue;
 
 		mask_index = i & 0x1;
@@ -270,26 +274,25 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
  * 'available' slice_mark.
  */
 static bool slice_scan_available(unsigned long addr,
-				 struct slice_mask available,
-				 int end,
-				 unsigned long *boundary_addr)
+				 const struct slice_mask *available,
+				 int end, unsigned long *boundary_addr)
 {
 	unsigned long slice;
 	if (addr < SLICE_LOW_TOP) {
 		slice = GET_LOW_SLICE_INDEX(addr);
 		*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
-		return !!(available.low_slices & (1u << slice));
+		return !!(available->low_slices & (1u << slice));
 	} else {
 		slice = GET_HIGH_SLICE_INDEX(addr);
 		*boundary_addr = (slice + end) ?
 			((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
-		return !!test_bit(slice, available.high_slices);
+		return !!test_bit(slice, available->high_slices);
 	}
 }
 
 static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 					      unsigned long len,
-					      struct slice_mask available,
+					      const struct slice_mask *available,
 					      int psize, unsigned long high_limit)
 {
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -335,7 +338,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
 
 static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 					     unsigned long len,
-					     struct slice_mask available,
+					     const struct slice_mask *available,
 					     int psize, unsigned long high_limit)
 {
 	int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
@@ -393,7 +396,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
 
 
 static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
-				     struct slice_mask mask, int psize,
+				     const struct slice_mask *mask, int psize,
 				     int topdown, unsigned long high_limit)
 {
 	if (topdown)
@@ -402,7 +405,8 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
 		return slice_find_area_bottomup(mm, len, mask, psize, high_limit);
 }
 
-static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_or_mask(struct slice_mask *dst,
+					const struct slice_mask *src)
 {
 	dst->low_slices |= src->low_slices;
 	if (!SLICE_NUM_HIGH)
@@ -411,7 +415,8 @@ static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src)
 		  SLICE_NUM_HIGH);
 }
 
-static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src)
+static inline void slice_andnot_mask(struct slice_mask *dst,
+					const struct slice_mask *src)
 {
 	dst->low_slices &= ~src->low_slices;
 
@@ -501,7 +506,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * already
 	 */
 	slice_mask_for_size(mm, psize, &good_mask, high_limit);
-	slice_print_mask(" good_mask", good_mask);
+	slice_print_mask(" good_mask", &good_mask);
 
 	/*
 	 * Here "good" means slices that are already the right page size,
@@ -535,12 +540,12 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	if (addr != 0 || fixed) {
 		/* Build a mask for the requested range */
 		slice_range_to_mask(addr, len, &mask);
-		slice_print_mask(" mask", mask);
+		slice_print_mask(" mask", &mask);
 
 		/* Check if we fit in the good mask. If we do, we just return,
 		 * nothing else to do
 		 */
-		if (slice_check_fit(mm, mask, good_mask)) {
+		if (slice_check_fit(mm, &mask, &good_mask)) {
 			slice_dbg(" fits good !\n");
 			return addr;
 		}
@@ -548,7 +553,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 		/* Now let's see if we can find something in the existing
 		 * slices for that size
 		 */
-		newaddr = slice_find_area(mm, len, good_mask,
+		newaddr = slice_find_area(mm, len, &good_mask,
 					  psize, topdown, high_limit);
 		if (newaddr != -ENOMEM) {
 			/* Found within the good mask, we don't have to setup,
@@ -564,9 +569,10 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 */
 	slice_mask_for_free(mm, &potential_mask, high_limit);
 	slice_or_mask(&potential_mask, &good_mask);
-	slice_print_mask(" potential", potential_mask);
+	slice_print_mask(" potential", &potential_mask);
 
-	if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) {
+	if ((addr != 0 || fixed) &&
+			slice_check_fit(mm, &mask, &potential_mask)) {
 		slice_dbg(" fits potential !\n");
 		goto convert;
 	}
@@ -581,7 +587,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	 * anywhere in the good area.
 	 */
 	if (addr) {
-		addr = slice_find_area(mm, len, good_mask,
+		addr = slice_find_area(mm, len, &good_mask,
 				       psize, topdown, high_limit);
 		if (addr != -ENOMEM) {
 			slice_dbg(" found area at 0x%lx\n", addr);
@@ -592,14 +598,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	/* Now let's see if we can find something in the existing slices
 	 * for that size plus free slices
 	 */
-	addr = slice_find_area(mm, len, potential_mask,
+	addr = slice_find_area(mm, len, &potential_mask,
 			       psize, topdown, high_limit);
 
 #ifdef CONFIG_PPC_64K_PAGES
 	if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
 		/* retry the search with 4k-page slices included */
 		slice_or_mask(&potential_mask, &compat_mask);
-		addr = slice_find_area(mm, len, potential_mask,
+		addr = slice_find_area(mm, len, &potential_mask,
 				       psize, topdown, high_limit);
 	}
 #endif
@@ -609,7 +615,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 
 	slice_range_to_mask(addr, len, &mask);
 	slice_dbg(" found potential area at 0x%lx\n", addr);
-	slice_print_mask(" mask", mask);
+	slice_print_mask(" mask", &mask);
 
  convert:
 	slice_andnot_mask(&mask, &good_mask);
@@ -617,7 +623,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 	if (mask.low_slices ||
 	    (SLICE_NUM_HIGH &&
 	     !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH))) {
-		slice_convert(mm, mask, psize);
+		slice_convert(mm, &mask, psize);
 		if (psize > MMU_PAGE_BASE)
 			on_each_cpu(slice_flush_segments, mm, 1);
 	}
@@ -711,7 +717,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 	VM_BUG_ON(radix_enabled());
 
 	slice_range_to_mask(start, len, &mask);
-	slice_convert(mm, mask, psize);
+	slice_convert(mm, &mask, psize);
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -758,9 +764,9 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
 #if 0 /* too verbose */
 	slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
 		 mm, addr, len);
-	slice_print_mask(" mask", mask);
-	slice_print_mask(" available", available);
+	slice_print_mask(" mask", &mask);
+	slice_print_mask(" available", &available);
 #endif
-	return !slice_check_fit(mm, mask, available);
+	return !slice_check_fit(mm, &mask, &available);
 }
 #endif
-- 
2.16.1

  parent reply	other threads:[~2018-03-07  1:37 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-03-07  1:37 [PATCH v2 00/10] powerpc/mm/slice: improve slice speed and stack use Nicholas Piggin
2018-03-07  1:37 ` [PATCH v2 01/10] powerpc/mm/slice: Simplify and optimise slice context initialisation Nicholas Piggin
2018-03-14  9:28   ` [v2, " Michael Ellerman
2018-03-07  1:37 ` [PATCH v2 02/10] powerpc/mm/slice: tidy lpsizes and hpsizes update loops Nicholas Piggin
2018-03-07  1:37 ` Nicholas Piggin [this message]
2018-03-07  1:37 ` [PATCH v2 04/10] powerpc/mm/slice: implement a slice mask cache Nicholas Piggin
2018-03-07  1:37 ` [PATCH v2 05/10] powerpc/mm/slice: implement slice_check_range_fits Nicholas Piggin
2018-03-07  1:37 ` [PATCH v2 06/10] powerpc/mm/slice: Switch to 3-operand slice bitops helpers Nicholas Piggin
2018-03-07  1:37 ` [PATCH v2 07/10] powerpc/mm/slice: remove dead code Nicholas Piggin
2018-03-07  1:37 ` [PATCH v2 08/10] powerpc/mm/slice: Use const pointers to cached slice masks where possible Nicholas Piggin
2018-03-07  1:37 ` [PATCH v2 09/10] powerpc/mm/slice: remove radix calls to the slice code Nicholas Piggin
2018-03-07  1:37 ` [PATCH v2 10/10] powerpc/mm/slice: use the dynamic high slice size to limit bitmap operations Nicholas Piggin
2018-03-07  3:22   ` Nicholas Piggin
2018-03-07 10:45     ` Michael Ellerman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180307013718.13749-4-npiggin@gmail.com \
    --to=npiggin@gmail$(echo .)com \
    --cc=aneesh.kumar@linux$(echo .)vnet.ibm.com \
    --cc=christophe.leroy@c-s$(echo .)fr \
    --cc=linuxppc-dev@lists$(echo .)ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox