그냥 mm 이 있는데, active mm 는 뭔가?
kernel thread 가 잠시 빌려쓰는 그전 process 의 mm 이라고 알고 있는데, 이렇게만 알고 있으면 아, 너무 외운듯한 설명이다.
https://kernelnewbies.kernelnewbies.narkive.com/Glj0IaUL/active-mm-versus-mm
구글링 해보면 상세 설명된 글이 쉽게 찾아진다.
위 link 에서 아래처럼 설명해주면서 Gorman book 에도 있다고 얘기해준다.
This avoids some costly TLB switches.
https://students.mimuw.edu.pl/SO/Linux-doc/gorman_book.pdf
그래서 Gorman book 을 찾아본다.
The call to switch mm(), which results in a TLB flush, is avoided by borrowing the mm struct used by the previous task and placing it in task struct→active mm. This technique has made large improvements to context switch times.
https://docs.kernel.org/vm/active_mm.html
구글링에 kernel doc 이 검색되었다. 이런, active_mm 도 doc 이 있었다니. 그런데 Torvalds 가 1999 년에 쓴 글이다. 아, 너무 Linux 초창기 글이다. Gorman book 보다 먼저 쓴 글이리라.
code 좀 찾아보자.
kernel/sched/core.c @ v5.15
schedule 에서 next 가 kernel thread 로 되는 경우도 기존에 user thread 인지 kernel thread 인지에 조금 다르게 처리해야 하네. user thread 에서 왔으면 mmgrab 해서 mm->mm_count 를 inc 해야 하고, kernel thread 에서 왔으면 그 prev kernel thread 의 acative_mm 을 NULL 로 원복해준다. Torvalds 글이 이런 내용을 포함하고 있는 것 같다.
4888 static __always_inline struct rq *
4889 context_switch(struct rq *rq, struct task_struct *prev,
4890 struct task_struct *next, struct rq_flags *rf)
4891 {
4892 prepare_task_switch(rq, prev, next);
4893
4894 /*
4895 * For paravirt, this is coupled with an exit in switch_to to
4896 * combine the page table reload and the switch backend into
4897 * one hypercall.
4898 */
4899 arch_start_context_switch(prev);
4900
4901 /*
4902 * kernel -> kernel lazy + transfer active
4903 * user -> kernel lazy + mmgrab() active
4904 *
4905 * kernel -> user switch + mmdrop() active
4906 * user -> user switch
4907 */
4908 if (!next->mm) { // to kernel
4909 enter_lazy_tlb(prev->active_mm, next);
4910
4911 next->active_mm = prev->active_mm;
4912 if (prev->mm) // from user
4913 mmgrab(prev->active_mm);
4914 else
4915 prev->active_mm = NULL;
4916 } else { // to user
4917 membarrier_switch_mm(rq, prev->active_mm, next->mm);
4918 /*
4919 * sys_membarrier() requires an smp_mb() between setting
4920 * rq->curr / membarrier_switch_mm() and returning to userspace.
4921 *
4922 * The below provides this either through switch_mm(), or in
4923 * case 'prev->active_mm == next->mm' through
4924 * finish_task_switch()'s mmdrop().
4925 */
4926 switch_mm_irqs_off(prev->active_mm, next->mm, next);
4927
4928 if (!prev->mm) { // from kernel
4929 /* will mmdrop() in finish_task_switch(). */
4930 rq->prev_mm = prev->active_mm;
4931 prev->active_mm = NULL;
4932 }
4933 }
4934
4935 rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
4936
4937 prepare_lock_switch(rq, next, rf);
4938
4939 /* Here we just switch the register state and the stack. */
4940 switch_to(prev, next, prev);
4941 barrier();
4942
4943 return finish_task_switch(prev);
4944 }
그런데 TLB flush 를 회피한다는 내용은 어디에 있는 건가.
arch/arm64/include/asm/mmu_context.h @ v5.15
194 static inline void
195 enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
196 {
197 /*
198 * We don't actually care about the ttbr0 mapping, so point it at the
199 * zero page.
200 */
201 update_saved_ttbr0(tsk, &init_mm);
202 }
context_switch 에서 연결되는 enter_lazy_tlb 에 zero page 로 point 한다고 주석이 되어 있다. CONFIG_ARM64_SW_TTBR0_PAN 즉 SW 로 TTB0_PAN 을 하는 경우 아래처럼 update_saved_ttbr0 가 구현되어 있다.
170 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
171 static inline void update_saved_ttbr0(struct task_struct *tsk,
172 struct mm_struct *mm)
173 {
174 u64 ttbr;
175
176 if (!system_uses_ttbr0_pan())
177 return;
178
179 if (mm == &init_mm)
180 ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
181 else
182 ttbr = phys_to_ttbr(virt_to_phys(mm->pgd)) | ASID(mm) << 48;
183
184 WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
185 }
186 #else
187 static inline void update_saved_ttbr0(struct task_struct *tsk,
188 struct mm_struct *mm)
189 {
190 }
191 #endif
arch/arm64/kernel/vmlinux.lds.S @ v5.15
195 reserved_pg_dir = .;
196 . += PAGE_SIZE;
그럼 반대로 kernel thread 로 context_switch 하지 않으면 tlb flush 를 하는 code 는 어디에 있는건지? tlb flush 를 해야 한다고 알고 있고, code 도 예전에 찾아봤었는데 code 가 바로 안 찾아지네.
찾았다.
include/linux/mmu_context.h @ v5.15
8 /* Architectures that care about IRQ state in switch_mm can override this. */
9 #ifndef switch_mm_irqs_off
10 # define switch_mm_irqs_off switch_mm
11 #endif
arch/arm64/include/asm/mmu_context.h @ v5.15
218 static inline void
219 switch_mm(struct mm_struct *prev, struct mm_struct *next,
220 struct task_struct *tsk)
221 {
222 if (prev != next)
223 __switch_mm(next);
224
225 /*
226 * Update the saved TTBR0_EL1 of the scheduled-in task as the previous
227 * value may have not been initialised yet (activate_mm caller) or the
228 * ASID has changed since the last run (following the context switch
229 * of another thread of the same process).
230 */
231 update_saved_ttbr0(tsk, next);
232 }
204 static inline void __switch_mm(struct mm_struct *next)
205 {
206 /*
207 * init_mm.pgd does not contain any user mappings and it is always
208 * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
209 */
210 if (next == &init_mm) {
211 cpu_set_reserved_ttbr0();
212 return;
213 }
214
215 check_and_switch_context(next);
216 }
arch/arm64/mm/context.c @ v5.15
215 void check_and_switch_context(struct mm_struct *mm)
216 {
217 unsigned long flags;
218 unsigned int cpu;
219 u64 asid, old_active_asid;
220
221 if (system_supports_cnp())
222 cpu_set_reserved_ttbr0();
223
224 asid = atomic64_read(&mm->context.id);
225
226 /*
227 * The memory ordering here is subtle.
228 * If our active_asids is non-zero and the ASID matches the current
229 * generation, then we update the active_asids entry with a relaxed
230 * cmpxchg. Racing with a concurrent rollover means that either:
231 *
232 * - We get a zero back from the cmpxchg and end up waiting on the
233 * lock. Taking the lock synchronises with the rollover and so
234 * we are forced to see the updated generation.
235 *
236 * - We get a valid ASID back from the cmpxchg, which means the
237 * relaxed xchg in flush_context will treat us as reserved
238 * because atomic RmWs are totally ordered for a given location.
239 */
240 old_active_asid = atomic64_read(this_cpu_ptr(&active_asids));
241 if (old_active_asid && asid_gen_match(asid) &&
242 atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_asids),
243 old_active_asid, asid))
244 goto switch_mm_fastpath;
245
246 raw_spin_lock_irqsave(&cpu_asid_lock, flags);
247 /* Check that our ASID belongs to the current generation. */
248 asid = atomic64_read(&mm->context.id);
249 if (!asid_gen_match(asid)) {
250 asid = new_context(mm);
251 atomic64_set(&mm->context.id, asid);
252 }
253
254 cpu = smp_processor_id();
255 if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending))
256 local_flush_tlb_all();
257
258 atomic64_set(this_cpu_ptr(&active_asids), asid);
259 raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
260
261 switch_mm_fastpath:
flush tlb 까지 오는데 code 가 이렇게 깊었던가;
그리고 ASID 에 따라서 flush_tlb 를 안 해도 된다는 건데, 아 이건 좀 나중에 다시 봐야겠다.
https://developer.arm.com/documentation/den0024/a/The-Memory-Management-Unit/Context-switching