From: "Barry Song (Xiaomi)" <baohua@kernel•org>
To: akpm@linux-foundation•org, linux-mm@kvack•org, willy@infradead•org
Cc: david@kernel•org, ljs@kernel•org, liam@infradead•org,
vbabka@kernel•org, rppt@kernel•org, surenb@google•com,
mhocko@suse•com, jack@suse•cz, pfalcato@suse•de,
wanglian@kylinos•cn, chentao@kylinos•cn, lianux.mm@gmail•com,
kunwu.chan@gmail•com, liyangouwen1@oppo•com, chrisl@kernel•org,
kasong@tencent•com, shikemeng@huaweicloud•com, nphamcs@gmail•com,
bhe@redhat•com, youngjun.park@lge•com,
linux-arm-kernel@lists•infradead.org,
linux-kernel@vger•kernel.org, loongarch@lists•linux.dev,
linuxppc-dev@lists•ozlabs.org, linux-riscv@lists•infradead.org,
linux-s390@vger•kernel.org, Barry Song <baohua@kernel•org>
Subject: [PATCH v2 1/5] mm/filemap: Retry fault by VMA lock if the lock was released for I/O
Date: Thu, 30 Apr 2026 12:04:23 +0800 [thread overview]
Message-ID: <20260430040427.4672-2-baohua@kernel.org> (raw)
In-Reply-To: <20260430040427.4672-1-baohua@kernel.org>
From: Oven Liyang <liyangouwen1@oppo•com>
If the current page fault is using the per-VMA lock, and we only released
the lock to wait for I/O completion (e.g., using folio_lock()), then when
the fault is retried after the I/O completes, it should still qualify for
the per-VMA-lock path.
Acked-by: Pedro Falcato <pfalcato@suse•de>
Tested-by: Wang Lian <wanglian@kylinos•cn>
Tested-by: Kunwu Chan <chentao@kylinos•cn>
Reviewed-by: Wang Lian <lianux.mm@gmail•com>
Reviewed-by: Kunwu Chan <kunwu.chan@gmail•com>
Signed-off-by: Oven Liyang <liyangouwen1@oppo•com>
Co-developed-by: Barry Song <baohua@kernel•org>
Signed-off-by: Barry Song <baohua@kernel•org>
---
arch/arm/mm/fault.c | 5 +++++
arch/arm64/mm/fault.c | 5 +++++
arch/loongarch/mm/fault.c | 4 ++++
arch/powerpc/mm/fault.c | 5 ++++-
arch/riscv/mm/fault.c | 4 ++++
arch/s390/mm/fault.c | 4 ++++
arch/x86/mm/fault.c | 4 ++++
include/linux/mm_types.h | 9 +++++----
mm/filemap.c | 5 ++++-
9 files changed, 39 insertions(+), 6 deletions(-)
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index e62cc4be5adf..5971e02845f7 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -391,6 +391,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, addr);
if (!vma)
goto lock_mmap;
@@ -420,6 +421,10 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
goto no_context;
return 0;
}
+
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 739800835920..d0362a3e11b7 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -673,6 +673,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
if (!(mm_flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, addr);
if (!vma)
goto lock_mmap;
@@ -719,6 +720,10 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
goto no_context;
return 0;
}
+
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index 2c93d33356e5..738f495560c0 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -219,6 +219,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, address);
if (!vma)
goto lock_mmap;
@@ -265,6 +266,9 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
no_context(regs, write, address);
return;
}
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 806c74e0d5ab..cb7ffc20c760 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -487,6 +487,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, address);
if (!vma)
goto lock_mmap;
@@ -516,7 +517,9 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
if (fault_signal_pending(fault, regs))
return user_mode(regs) ? 0 : SIGBUS;
-
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
/* When running in the kernel we expect faults to occur only to
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 04ed6f8acae4..b94cf57c2b9a 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -347,6 +347,7 @@ void handle_page_fault(struct pt_regs *regs)
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, addr);
if (!vma)
goto lock_mmap;
@@ -376,6 +377,9 @@ void handle_page_fault(struct pt_regs *regs)
no_context(regs, addr);
return;
}
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 191cc53caead..e0576e629f65 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -294,6 +294,7 @@ static void do_exception(struct pt_regs *regs, int access)
flags |= FAULT_FLAG_WRITE;
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, address);
if (!vma)
goto lock_mmap;
@@ -318,6 +319,9 @@ static void do_exception(struct pt_regs *regs, int access)
handle_fault_error_nolock(regs, 0);
return;
}
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
vma = lock_mm_and_find_vma(mm, address, regs);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f0e77e084482..0589fc693eea 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1322,6 +1322,7 @@ void do_user_addr_fault(struct pt_regs *regs,
if (!(flags & FAULT_FLAG_USER))
goto lock_mmap;
+retry_vma:
vma = lock_vma_under_rcu(mm, address);
if (!vma)
goto lock_mmap;
@@ -1351,6 +1352,9 @@ void do_user_addr_fault(struct pt_regs *regs,
ARCH_DEFAULT_PKEY);
return;
}
+ /* If the first try is only about waiting for the I/O to complete */
+ if (fault & VM_FAULT_RETRY_VMA)
+ goto retry_vma;
lock_mmap:
retry:
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a308e2c23b82..5907200ea587 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1678,10 +1678,11 @@ enum vm_fault_reason {
VM_FAULT_NOPAGE = (__force vm_fault_t)0x000100,
VM_FAULT_LOCKED = (__force vm_fault_t)0x000200,
VM_FAULT_RETRY = (__force vm_fault_t)0x000400,
- VM_FAULT_FALLBACK = (__force vm_fault_t)0x000800,
- VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000,
- VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000,
- VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000,
+ VM_FAULT_RETRY_VMA = (__force vm_fault_t)0x000800,
+ VM_FAULT_FALLBACK = (__force vm_fault_t)0x001000,
+ VM_FAULT_DONE_COW = (__force vm_fault_t)0x002000,
+ VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x004000,
+ VM_FAULT_COMPLETED = (__force vm_fault_t)0x008000,
VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000,
};
diff --git a/mm/filemap.c b/mm/filemap.c
index ab34cab2416a..a045b771e8de 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3525,6 +3525,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
struct folio *folio;
vm_fault_t ret = 0;
bool mapping_locked = false;
+ bool retry_by_vma_lock = false;
max_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
if (unlikely(index >= max_idx))
@@ -3621,6 +3622,8 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
*/
if (fpin) {
folio_unlock(folio);
+ if (vmf->flags & FAULT_FLAG_VMA_LOCK)
+ retry_by_vma_lock = true;
goto out_retry;
}
if (mapping_locked)
@@ -3671,7 +3674,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
filemap_invalidate_unlock_shared(mapping);
if (fpin)
fput(fpin);
- return ret | VM_FAULT_RETRY;
+ return ret | VM_FAULT_RETRY | (retry_by_vma_lock ? VM_FAULT_RETRY_VMA : 0);
}
EXPORT_SYMBOL(filemap_fault);
--
2.39.3 (Apple Git-146)
next prev parent reply other threads:[~2026-04-30 4:05 UTC|newest]
Thread overview: 72+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-30 4:04 [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance Barry Song (Xiaomi)
2026-04-30 4:04 ` Barry Song (Xiaomi) [this message]
2026-04-30 4:04 ` [PATCH v2 2/5] mm/swapin: Retry swapin by VMA lock if the lock was released for I/O Barry Song (Xiaomi)
2026-04-30 4:04 ` [PATCH v2 3/5] mm: Move folio_lock_or_retry() and drop __folio_lock_or_retry() Barry Song (Xiaomi)
2026-04-30 4:04 ` [PATCH v2 4/5] mm: Don't retry page fault if folio is uptodate during swap-in Barry Song (Xiaomi)
2026-04-30 12:35 ` Matthew Wilcox
2026-05-01 16:11 ` Matthew Wilcox
2026-04-30 4:04 ` [PATCH v2 5/5] mm/filemap: Avoid retrying page faults on uptodate folios in filemap faults Barry Song (Xiaomi)
2026-04-30 12:37 ` [PATCH v2 0/5] mm: reduce mmap_lock contention and improve page fault performance Matthew Wilcox
2026-04-30 22:49 ` Barry Song
2026-05-01 14:56 ` Matthew Wilcox
2026-05-01 17:44 ` Barry Song
2026-05-01 17:57 ` Matthew Wilcox
2026-05-01 18:25 ` Barry Song
2026-05-01 19:39 ` Matthew Wilcox
2026-05-03 20:39 ` Barry Song
2026-05-03 13:13 ` Jan Kara
2026-05-03 19:55 ` Barry Song
2026-05-04 13:03 ` Jan Kara
2026-05-04 13:35 ` Barry Song
2026-05-04 14:15 ` Barry Song
2026-05-17 8:45 ` Barry Song
2026-05-18 9:46 ` Lorenzo Stoakes
2026-05-18 11:25 ` Barry Song
2026-05-18 16:17 ` Matthew Wilcox
2026-05-18 20:50 ` Barry Song
2026-05-18 19:56 ` Suren Baghdasaryan
2026-05-18 21:14 ` Barry Song
2026-05-19 12:45 ` Lorenzo Stoakes
2026-05-19 14:17 ` Liam R. Howlett
2026-05-19 22:01 ` Barry Song
2026-05-20 21:04 ` Matthew Wilcox
2026-05-20 21:14 ` Barry Song
2026-05-20 21:15 ` Matthew Wilcox
2026-05-20 21:35 ` David Hildenbrand (Arm)
2026-05-20 23:37 ` Barry Song
2026-05-22 15:53 ` Lorenzo Stoakes
2026-05-22 21:31 ` Barry Song
2026-05-22 2:33 ` Barry Song (Xiaomi)
2026-05-22 13:09 ` Matthew Wilcox
2026-05-22 13:36 ` Barry Song
2026-05-22 13:48 ` Barry Song
2026-05-22 15:42 ` Lorenzo Stoakes
2026-05-19 12:53 ` Lorenzo Stoakes
2026-05-19 21:18 ` Barry Song
2026-05-20 7:50 ` Lorenzo Stoakes
2026-05-20 9:07 ` Barry Song
2026-05-20 10:07 ` Lorenzo Stoakes
2026-05-20 16:20 ` Suren Baghdasaryan
2026-05-20 5:51 ` Suren Baghdasaryan
2026-05-22 15:39 ` Lorenzo Stoakes
2026-05-20 10:33 ` David Hildenbrand (Arm)
2026-05-20 12:55 ` Lorenzo Stoakes
2026-05-20 21:39 ` Yang Shi
2026-05-22 15:37 ` Lorenzo Stoakes
2026-05-19 12:43 ` Lorenzo Stoakes
2026-05-18 9:53 ` David Hildenbrand (Arm)
2026-05-19 13:42 ` Lorenzo Stoakes
2026-05-18 21:21 ` Yang Shi
2026-05-19 11:07 ` Barry Song
2026-05-19 13:34 ` Lorenzo Stoakes
2026-05-19 18:50 ` Yang Shi
2026-05-19 20:53 ` Yang Shi
2026-05-19 13:12 ` Lorenzo Stoakes
2026-05-19 13:39 ` Lorenzo Stoakes
2026-05-19 18:41 ` Yang Shi
2026-05-19 21:02 ` Yang Shi
2026-05-20 8:11 ` Lorenzo Stoakes
2026-05-01 15:52 ` Lorenzo Stoakes
2026-05-01 16:06 ` Matthew Wilcox
2026-05-01 17:09 ` Lorenzo Stoakes
2026-05-01 17:59 ` Barry Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260430040427.4672-2-baohua@kernel.org \
--to=baohua@kernel$(echo .)org \
--cc=akpm@linux-foundation$(echo .)org \
--cc=bhe@redhat$(echo .)com \
--cc=chentao@kylinos$(echo .)cn \
--cc=chrisl@kernel$(echo .)org \
--cc=david@kernel$(echo .)org \
--cc=jack@suse$(echo .)cz \
--cc=kasong@tencent$(echo .)com \
--cc=kunwu.chan@gmail$(echo .)com \
--cc=liam@infradead$(echo .)org \
--cc=lianux.mm@gmail$(echo .)com \
--cc=linux-arm-kernel@lists$(echo .)infradead.org \
--cc=linux-kernel@vger$(echo .)kernel.org \
--cc=linux-mm@kvack$(echo .)org \
--cc=linux-riscv@lists$(echo .)infradead.org \
--cc=linux-s390@vger$(echo .)kernel.org \
--cc=linuxppc-dev@lists$(echo .)ozlabs.org \
--cc=liyangouwen1@oppo$(echo .)com \
--cc=ljs@kernel$(echo .)org \
--cc=loongarch@lists$(echo .)linux.dev \
--cc=mhocko@suse$(echo .)com \
--cc=nphamcs@gmail$(echo .)com \
--cc=pfalcato@suse$(echo .)de \
--cc=rppt@kernel$(echo .)org \
--cc=shikemeng@huaweicloud$(echo .)com \
--cc=surenb@google$(echo .)com \
--cc=vbabka@kernel$(echo .)org \
--cc=wanglian@kylinos$(echo .)cn \
--cc=willy@infradead$(echo .)org \
--cc=youngjun.park@lge$(echo .)com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox