public inbox for linuxppc-dev@ozlabs.org 
 help / color / mirror / Atom feed
From: Nicholas Piggin <npiggin@gmail•com>
To: linuxppc-dev@lists•ozlabs.org
Cc: Nicholas Piggin <npiggin@gmail•com>
Subject: [PATCH 14/14] powerpc/64s/radix: allocate kernel page tables node-local if possible
Date: Wed, 14 Feb 2018 01:08:24 +1000	[thread overview]
Message-ID: <20180213150824.27689-15-npiggin@gmail.com> (raw)
In-Reply-To: <20180213150824.27689-1-npiggin@gmail.com>

Try to allocate kernel page tables for direct mapping and vmemmap
according to the node of the memory they will map. The node is not
available for the linear map in early boot, so use range allocation
to allocate the page tables from the region they map, which is
effectively node-local.

Signed-off-by: Nicholas Piggin <npiggin@gmail•com>
---
 arch/powerpc/mm/pgtable-radix.c | 111 ++++++++++++++++++++++++++++++----------
 1 file changed, 85 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 4c5cc69c92c2..66b07718875a 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -48,11 +48,26 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz
 	return 0;
 }
 
-static __ref void *early_alloc_pgtable(unsigned long size)
+static __ref void *early_alloc_pgtable(unsigned long size, int nid,
+			unsigned long region_start, unsigned long region_end)
 {
+	unsigned long pa = 0;
 	void *pt;
 
-	pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
+	if (region_start || region_end) /* has region hint */
+		pa = memblock_alloc_range(size, size, region_start, region_end,
+						MEMBLOCK_NONE);
+	else if (nid != -1) /* has node hint */
+		pa = memblock_alloc_base_nid(size, size,
+						MEMBLOCK_ALLOC_ANYWHERE,
+						nid, MEMBLOCK_NONE);
+
+	if (!pa)
+		pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE);
+
+	BUG_ON(!pa);
+
+	pt = __va(pa);
 	memset(pt, 0, size);
 
 	return pt;
@@ -60,8 +75,11 @@ static __ref void *early_alloc_pgtable(unsigned long size)
 
 static int early_map_kernel_page(unsigned long ea, unsigned long pa,
 			  pgprot_t flags,
-			  unsigned int map_page_size)
+			  unsigned int map_page_size,
+			  int nid,
+			  unsigned long region_start, unsigned long region_end)
 {
+	unsigned long pfn = pa >> PAGE_SHIFT;
 	pgd_t *pgdp;
 	pud_t *pudp;
 	pmd_t *pmdp;
@@ -69,8 +87,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
 
 	pgdp = pgd_offset_k(ea);
 	if (pgd_none(*pgdp)) {
-		pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
-		BUG_ON(pudp == NULL);
+		pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
+						region_start, region_end);
 		pgd_populate(&init_mm, pgdp, pudp);
 	}
 	pudp = pud_offset(pgdp, ea);
@@ -79,8 +97,8 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
 		goto set_the_pte;
 	}
 	if (pud_none(*pudp)) {
-		pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
-		BUG_ON(pmdp == NULL);
+		pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
+						region_start, region_end);
 		pud_populate(&init_mm, pudp, pmdp);
 	}
 	pmdp = pmd_offset(pudp, ea);
@@ -89,23 +107,29 @@ static int early_map_kernel_page(unsigned long ea, unsigned long pa,
 		goto set_the_pte;
 	}
 	if (!pmd_present(*pmdp)) {
-		ptep = early_alloc_pgtable(PAGE_SIZE);
-		BUG_ON(ptep == NULL);
+		ptep = early_alloc_pgtable(PAGE_SIZE, nid,
+						region_start, region_end);
 		pmd_populate_kernel(&init_mm, pmdp, ptep);
 	}
 	ptep = pte_offset_kernel(pmdp, ea);
 
 set_the_pte:
-	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
 	smp_wmb();
 	return 0;
 }
 
-
-int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+/*
+ * nid, region_start, and region_end are hints to try to place the page
+ * table memory in the same node or region.
+ */
+static int __map_kernel_page(unsigned long ea, unsigned long pa,
 			  pgprot_t flags,
-			  unsigned int map_page_size)
+			  unsigned int map_page_size,
+			  int nid,
+			  unsigned long region_start, unsigned long region_end)
 {
+	unsigned long pfn = pa >> PAGE_SHIFT;
 	pgd_t *pgdp;
 	pud_t *pudp;
 	pmd_t *pmdp;
@@ -115,9 +139,15 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 	 */
 	BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
 
-	if (!slab_is_available())
-		return early_map_kernel_page(ea, pa, flags, map_page_size);
+	if (unlikely(!slab_is_available()))
+		return early_map_kernel_page(ea, pa, flags, map_page_size,
+						nid, region_start, region_end);
 
+	/*
+	 * Should make page table allocation functions be able to take a
+	 * node, so we can place kernel page tables on the right nodes after
+	 * boot.
+	 */
 	pgdp = pgd_offset_k(ea);
 	pudp = pud_alloc(&init_mm, pgdp, ea);
 	if (!pudp)
@@ -138,11 +168,25 @@ int radix__map_kernel_page(unsigned long ea, unsigned long pa,
 		return -ENOMEM;
 
 set_the_pte:
-	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+	set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
 	smp_wmb();
 	return 0;
 }
 
+static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
+			  pgprot_t flags,
+			  unsigned int map_page_size, int nid)
+{
+	return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
+}
+
+int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+			  pgprot_t flags,
+			  unsigned int map_page_size)
+{
+	return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
+}
+
 #ifdef CONFIG_STRICT_KERNEL_RWX
 void radix__change_memory_range(unsigned long start, unsigned long end,
 				unsigned long clear)
@@ -229,7 +273,8 @@ static inline void __meminit print_mapping(unsigned long start,
 }
 
 static int __meminit create_physical_mapping(unsigned long start,
-					     unsigned long end)
+					     unsigned long end,
+					     int nid)
 {
 	unsigned long vaddr, addr, mapping_size = 0;
 	pgprot_t prot;
@@ -285,7 +330,7 @@ static int __meminit create_physical_mapping(unsigned long start,
 		else
 			prot = PAGE_KERNEL;
 
-		rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
+		rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
 		if (rc)
 			return rc;
 	}
@@ -294,7 +339,7 @@ static int __meminit create_physical_mapping(unsigned long start,
 	return 0;
 }
 
-static void __init radix_init_pgtable(void)
+void __init radix_init_pgtable(void)
 {
 	unsigned long rts_field;
 	struct memblock_region *reg;
@@ -304,9 +349,16 @@ static void __init radix_init_pgtable(void)
 	/*
 	 * Create the linear mapping, using standard page size for now
 	 */
-	for_each_memblock(memory, reg)
+	for_each_memblock(memory, reg) {
+		/*
+		 * The memblock allocator  is up at this point, so the
+		 * page tables will be allocated within the range. No
+		 * need or a node (which we don't have yet).
+		 */
 		WARN_ON(create_physical_mapping(reg->base,
-						reg->base + reg->size));
+						reg->base + reg->size,
+						-1));
+	}
 
 	/* Find out how many PID bits are supported */
 	if (cpu_has_feature(CPU_FTR_HVMODE)) {
@@ -335,7 +387,7 @@ static void __init radix_init_pgtable(void)
 	 * host.
 	 */
 	BUG_ON(PRTB_SIZE_SHIFT > 36);
-	process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
+	process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
 	/*
 	 * Fill in the process table.
 	 */
@@ -716,14 +768,17 @@ static int stop_machine_change_mapping(void *data)
 {
 	struct change_mapping_params *params =
 			(struct change_mapping_params *)data;
+	int nid;
 
 	if (!data)
 		return -1;
 
+	nid = pfn_to_nid(params->start >> PAGE_SHIFT);
+
 	spin_unlock(&init_mm.page_table_lock);
 	pte_clear(&init_mm, params->aligned_start, params->pte);
-	create_physical_mapping(params->aligned_start, params->start);
-	create_physical_mapping(params->end, params->aligned_end);
+	create_physical_mapping(params->aligned_start, params->start, nid);
+	create_physical_mapping(params->end, params->aligned_end, nid);
 	spin_lock(&init_mm.page_table_lock);
 	return 0;
 }
@@ -882,7 +937,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)
 
 int __ref radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
 {
-	return create_physical_mapping(start, end);
+	return create_physical_mapping(start, end, nid);
 }
 
 int radix__remove_section_mapping(unsigned long start, unsigned long end)
@@ -899,8 +954,12 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
 {
 	/* Create a PTE encoding */
 	unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
+	int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
+	int ret;
+
+	ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
+	BUG_ON(ret);
 
-	BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
 	return 0;
 }
 
-- 
2.16.1

  parent reply	other threads:[~2018-02-13 15:09 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-13 15:08 [PATCH 00/14] numa aware allocation for pacas, stacks, pagetables Nicholas Piggin
2018-02-13 15:08 ` [PATCH 01/14] powerpc/64s: do not allocate lppaca if we are not virtualized Nicholas Piggin
2018-03-31 14:03   ` [01/14] " Michael Ellerman
2018-02-13 15:08 ` [PATCH 02/14] powerpc/64: Use array of paca pointers and allocate pacas individually Nicholas Piggin
2018-02-13 15:08 ` [PATCH 03/14] powerpc/64s: allocate lppacas individually Nicholas Piggin
2018-03-13 12:41   ` Michael Ellerman
2018-03-13 12:54     ` Nicholas Piggin
2018-03-16 14:16       ` Michael Ellerman
2018-02-13 15:08 ` [PATCH 04/14] powerpc/64s: allocate slb_shadow structures individually Nicholas Piggin
2018-02-13 15:08 ` [PATCH 05/14] mm: make memblock_alloc_base_nid non-static Nicholas Piggin
2018-03-13 12:06   ` OK to merge via powerpc? (was Re: [PATCH 05/14] mm: make memblock_alloc_base_nid non-static) Michael Ellerman
2018-03-13 19:41     ` Andrew Morton
2018-03-14  0:56       ` Nicholas Piggin
2018-02-13 15:08 ` [PATCH 06/14] powerpc/mm/numa: move numa topology discovery earlier Nicholas Piggin
2018-02-13 15:08 ` [PATCH 07/14] powerpc/64: move default SPR recording Nicholas Piggin
2018-03-13 12:25   ` Michael Ellerman
2018-03-13 12:55     ` Nicholas Piggin
2018-03-13 15:47     ` Nicholas Piggin
2018-02-13 15:08 ` [PATCH 08/14] powerpc/setup: cpu_to_phys_id array Nicholas Piggin
2018-03-29  5:51   ` Michael Ellerman
2018-02-13 15:08 ` [PATCH 09/14] powerpc/64: defer paca allocation until memory topology is discovered Nicholas Piggin
2018-03-29  5:51   ` Michael Ellerman
2018-02-13 15:08 ` [PATCH 10/14] powerpc/64: allocate pacas per node Nicholas Piggin
2018-03-29  5:50   ` Michael Ellerman
2018-02-13 15:08 ` [PATCH 11/14] powerpc/64: allocate per-cpu stacks node-local if possible Nicholas Piggin
2018-02-13 15:08 ` [PATCH 12/14] powerpc: pass node id into create_section_mapping Nicholas Piggin
2018-03-29  5:51   ` Michael Ellerman
2018-03-29 15:15     ` Nicholas Piggin
2018-02-13 15:08 ` [PATCH 13/14] powerpc/64s/radix: split early page table mapping to its own function Nicholas Piggin
2018-02-13 15:08 ` Nicholas Piggin [this message]
2018-03-07 10:50 ` [PATCH 00/14] numa aware allocation for pacas, stacks, pagetables Michael Ellerman
2018-03-07 11:23   ` Nicholas Piggin
2018-03-08  2:04   ` Nicholas Piggin
2018-03-29  6:18     ` Michael Ellerman
2018-03-29 12:04       ` Nicholas Piggin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180213150824.27689-15-npiggin@gmail.com \
    --to=npiggin@gmail$(echo .)com \
    --cc=linuxppc-dev@lists$(echo .)ozlabs.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox