SMP Implementation for
OpenBSD/sgi [         ]
   Takuya ASADA<syuu@openbsd.org>
•                AsiaBSDCon 2010



•
    http://bit.ly/octane2
•          BSD   EmbeddedOS 64bit         SMP



•                  MIPS
    *BSD                   MIPS     SMP



•

•      SMP
                 …
MIPS/SMP                          (1)

• Broadcom SiByte BCM1250
  2core 1GHz MIPS64, DDR DRAM, GbE,
  PCI, HT
• Cisco 3845
•
MIPS/SMP                          (1)

• Broadcom SiByte BCM1250
  2core 1GHz MIPS64, DDR DRAM, GbE,
  PCI, HT
• Cisco 3845
•
      128                   orz
MIPS/SMP                    (2)

•         SGI               ebay



•
• SGI Octane    2core   OpenBSD
MIPS/SMP                      (2)

•         SGI                 ebay



•
• SGI Octane     2core    OpenBSD


                $33!!!!
Octane2
Processors MIPS R12000 400MHz x2


 Memory         1GB SDRAM

Graphics     3D Graphics Card

 Sound     Integrated Digital Audio

 Storage      35GB SCSI HDD

Ethernet         100BASE-T
OpenBSD developer
•   2009/4


•                OpenBSD/sgi



•   2009/9 developer



•   2009/11
• OpenBSD
• -current

•
Demo
'(&!!"   4% overhead
 '(%!!"
 '($!!"
 '(#!!"                     46% speed up
 '(!!!"
                                           ,-.-/01)02+!"
  &!!"
                                           ,-.-/01)02+!342"
  %!!"
  $!!"
  #!!"
     !"
(sec)        ")*'"      ")*#"      ")*+"
OpenBSD/amd64
 &!!"

 %#!"
        2% overhead

 %!!"
                              44% speed up
 $#!"                                        )*+*,-."
                                             )*+*,-./01"
 $!!"

  #!"

   !"
           "'($"      "'(%"          "'(&"
(sec)
•         cpu_info   •
                     •
•   per-processor
    cpu_info
                     •   IPI: Inter-Processor Interrupt
•                    •   Per-processor ASID

•                    •   TLB shootdown

                     •   Lazy FPU handling
•                    •   Per-processor clock
•   per-processor

•

•
…
per-processor

•                   sgi port




•
    per-processor                  cpu_info

•
    pmap       per-processor * per-process
clock.c
// defined as global variables
       u_int32_t cpu_counter_last;
       u_int32_t cpu_counter_interval;
       u_int32_t pendingticks;

uint32_t clock_int5(uint32_t mask, struct trap_frame *tf)
...
       clkdiff = cp0_get_count() - cpu_counter_last;
       while (clkdiff >= cpu_counter_interval) {
               cpu_counter_last += cpu_counter_interval;
               clkdiff = cp0_get_count() - cpu_counter_last;
               pendingticks++;
clock.c


uint32_t clock_int5(uint32_t mask, struct trap_frame *tf)
...
       struct cpu_info *ci = curcpu();
       clkdiff = cp0_get_count() - ci->ci_cpu_counter_last;
       while (clkdiff >= ci->ci_cpu_counter_interval) {
               ci->ci_cpu_counter_last +=
                   ci->ci_cpu_counter_interval;
               clkdiff =
                  cp0_get_count() - ci->ci_cpu_counter_last;
               ci->ci_pendingticks++;
pmap
•   MIPS TLB            ASID         8bit                 ID
                                             TLB



•      8bit      PID                               ASID


              ASID Process

•          SMP                              ASID



•
    ASID                     per-processor * per-process
pmap


uint pmap_alloc_tlbpid(struct proc *p)
...
               tlbpid_cnt = id + 1;
               pmap->pm_tlbpid = id;




uint pmap_alloc_tlbpid(struct proc *p)
...
               tlbpid_cnt[cpuid] = id + 1;
               pmap->pm_tlbpid[cpuid] = id;
•
    •            : mutex, mp_lock

    •                : CAS, 64bit add, etc..

•

    •
    •
    •   trap()
uint32_t clock_int5(uint32_t mask, struct trap_frame *tf)
...
       if (tf->ipl < IPL_CLOCK) {
#ifdef MULTIPROCESSOR
                __mp_lock(&kernel_lock);
#endif
                while (ci->ci_pendingticks) {
                        clk_count.ec_count++;
                        hardclock(tf);
                        ci->ci_pendingticks--;
                }
#ifdef MULTIPROCESSOR
                __mp_unlock(&kernel_lock);
#endif
uint32_t clock_int5(uint32_t mask, struct trap_frame *tf)
...
       if (tf->ipl < IPL_CLOCK) {
#ifdef MULTIPROCESSOR
                __mp_lock(&kernel_lock);
#endif
                while (ci->ci_pendingticks) {
                        clk_count.ec_count++;
                        hardclock(tf);
                        ci->ci_pendingticks--;
                }
#ifdef MULTIPROCESSOR
                __mp_unlock(&kernel_lock);
#endif

       Actually, it causes a bug... described later
•
•   TLB

•
•
    •

•
    •

        →
TLB

•   MIPS TLB




•   TLB shootdown                              TLB
        invalidate/update

    •   IPI(Inter-Processor Interrupt)
TLB shootdown
              CPU A         CPU B
      Fault
TLB shootdown                  IPI
                      IPI
TLB invalidate                 TLB invalidate
CPU B                          CPU A
• MIPS R10000/R12000

•       Octane

•
SMP

•
    •
    •

    • TLB shootdown
•

•

•

•
JTAG ICE
•
•


•   Embedded OS SMP



•      Octane
                  or
ddb
•   OpenBSD

•


•                 SMP
          ddb
    ddb

•         …
printf()!
•
•   printf(message)

•
    •

    •                  printf()



•
printf

•   Octane

•

•

•                   cpu0         com0 cpu1
             com1

•                          ;-)
print
•

    •           printf

    •
•
•
•

•
void mtx_enter(struct mutex *mtx)
...
        for (;;) {
                if (mtx->mtx_wantipl != IPL_NONE)
                        s = splraise(mtx->mtx_wantipl);
                if (try_lock(mtx)) {
                        if (mtx->mtx_wantipl != IPL_NONE)
                                mtx->mtx_oldipl = s;
                        mtx->mtx_owner = curcpu();
                        return;
                }
                if (mtx->mtx_wantipl != IPL_NONE)
                        splx(s);
                if (++i > MTX_TIMEOUT)
                        panic("mtx deadlockedn”);
        }
void mtx_enter(struct mutex *mtx)
...
        for (;;) {
                if (mtx->mtx_wantipl != IPL_NONE)
                        s = splraise(mtx->mtx_wantipl);
                if (try_lock(mtx)) {
                        if (mtx->mtx_wantipl != IPL_NONE)
                                mtx->mtx_oldipl = s;
                        mtx->mtx_owner = curcpu();
                        return;
                }
                if (mtx->mtx_wantipl != IPL_NONE)
                        splx(s);
                if (++i > MTX_TIMEOUT)
                        panic("mtx deadlockedn”);
        }
CPU A                 CPU B



        Lock   Lock
         A      B
CPU A                 CPU B



        Lock   Lock
         A      B
CPU A                 CPU B



        Lock   Lock
         A      B
void mtx_enter(struct mutex *mtx)
...
        for (;;) {
                if (mtx->mtx_wantipl != IPL_NONE)
                        s = splraise(mtx->mtx_wantipl);
                if (try_lock(mtx)) {
                        if (mtx->mtx_wantipl != IPL_NONE)
                                mtx->mtx_oldipl = s;
                        mtx->mtx_owner = curcpu();
                        mtx->mtx_ra =
                            __builtin_return_address(0);
                        return;
                }
                if (mtx->mtx_wantipl != IPL_NONE)
                        splx(s);
                if (++i > MTX_TIMEOUT)
                        panic("mtx deadlocked ra:%pn",
                                mtx->mtx_ra);
        }
IPI
           CPU A          CPU B
                                  Interrupt
       Fault
                             Disable interrupt
  Acquire lock
                   Lock
                             Wait until released
TLB shootdown                 Blocked
                   IPI
    Wait ACK
IPI
           CPU A          CPU B
                                  Interrupt
       Fault
                             Disable interrupt
  Acquire lock
                   Lock
                             Wait until released
TLB shootdown
                   IPI
    Wait ACK
IPI
           CPU A          CPU B
                                  Interrupt
       Fault
                             Disable interrupt
  Acquire lock               Re-enable interrupt
                   Lock
                             Wait until released
TLB shootdown                Accept Interrupt
                   IPI
    Wait ACK
IPI
           CPU A          CPU B
                                  Interrupt
       Fault
                             Disable interrupt
  Acquire lock               Re-enable interrupt
                   Lock
                             Wait until released
TLB shootdown                Accept Interrupt
                   IPI
    Wait ACK                 ACK for rendezvous
IPI
           CPU A              CPU B
                                      Interrupt
       Fault
                 IPI             Disable interrupt
  Acquire lock                   Re-enable interrupt
                       Lock
                                 Wait until released
TLB shootdown                    Accept Interrupt
                       IPI
    Wait ACK                     ACK for rendezvous
uint32_t clock_int5(uint32_t mask, struct trap_frame *tf)
...
       if (tf->ipl < IPL_CLOCK) {
#ifdef MULTIPROCESSOR
                u_int32_t sr;

                sr = getsr();
                ENABLEIPI();
                __mp_lock(&kernel_lock);
#endif
                while (ci->ci_pendingticks) {
                        clk_count.ec_count++;
                        hardclock(tf);
                        ci->ci_pendingticks--;
                }
#ifdef MULTIPROCESSOR
                __mp_unlock(&kernel_lock);
                setsr(sr);
#endif
splhigh()       IPI
           CPU A              CPU B
                                      splhigh()
       Fault
  Acquire lock
                       Lock
                                 Wait until released
TLB shootdown                     Blocked
                       IPI
    Wait ACK
splhigh()       IPI
           CPU A              CPU B
                                      splhigh()
       Fault
  Acquire lock
                       Lock
                                 Wait until released
TLB shootdown
                       IPI
    Wait ACK
splhigh()       IPI
           CPU A              CPU B
                                      splhigh()
       Fault
  Acquire lock
                       Lock
                                 Wait until released
TLB shootdown                    Accept Interrupt
                       IPI
    Wait ACK
splhigh()       IPI
           CPU A              CPU B
                                      splhigh()
       Fault
  Acquire lock
                       Lock
                                 Wait until released
TLB shootdown                    Accept Interrupt
                       IPI
    Wait ACK                     ACK for rendezvous
splhigh()         IPI
           CPU A                CPU B
                 IPL_IPI                splhigh()
       Fault
                                IPL_HIGH
  Acquire lock
                         Lock
                                   Wait until released
TLB shootdown                      Accept Interrupt
                         IPI
    Wait ACK                        ACK for rendezvous
 #define          IPL_TTY           4         /*   terminal */
 #define          IPL_VM            5         /*   memory allocation */
 #define          IPL_CLOCK         6         /*   clock */
 #define          IPL_HIGH          7         /*   everything */
 #define          NIPLS             8         /*   Number of levels */



 #define          IPL_TTY           4         /*   terminal */
 #define          IPL_VM            5         /*   memory allocation */
 #define          IPL_CLOCK         6         /*   clock */
 #define          IPL_HIGH          7         /*   everything */
 #define          IPL_IPI           8         /*   ipi */
 #define          NIPLS             9         /*   Number of levels */
•                                    cpu_info



•   malloc()              →

    •             TLB



    •   MIPS             TLB   TLB
        TLB                          cpu_info



•
               wrapper
wrapper
vaddr_t smp_malloc(size_t size)
...
       if (size < PAGE_SIZE) {
                va = (vaddr_t)malloc(size, M_DEVBUF, M_NOWAIT);
                if (va == NULL)
                        return NULL;
                error = pmap_extract(pmap_kernel(), va, &pa);
                if (error == FALSE)
                        return NULL;
       } else {
                TAILQ_INIT(&mlist);
                error = uvm_pglistalloc(size, 0, -1L, 0, 0,
                    &mlist, 1, UVM_PLA_NOWAIT);
                if (error)
                        return NULL;
                m = TAILQ_FIRST(&mlist);
                pa = VM_PAGE_TO_PHYS(m);
       }

       return PHYS_TO_XKPHYS(pa, CCA_CACHED);
TLB shootdown
•   invalidate/update    TLB shootdown



           kernel mode
            user mode

•                        shootdown
                               shootdown
    ASID

•               pmap                       TLB shootdown
void pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
...
   arg.pmap = pmap;
   arg.va = va;
   smp_rendezvous(0, pmap_invalidate_page_action, 0,
    (void *)&arg);

void pmap_invalidate_page_action(void *arg)
...
   pmap_t pmap = ((struct pmap_invalidate_page_arg *)arg)->pmap;
   vm_offset_t va = ((struct pmap_invalidate_page_arg *)arg)->va;
   if (is_kernel_pmap(pmap)) {
      pmap_TLB_invalidate_kernel(va);
      return;
   }
   if (pmap->pm_asid[PCPU_GET(cpuid)].gen
       != PCPU_GET(asid_generation))
      return;
   else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
      pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
      return;
   }
   va = pmap_va_asid(pmap, (va & ~PGOFSET));
   mips_TBIS(va);
CPU_INFO_FOREACH(cii, ci)
        if (cpuset_isset(&cpus_running, ci)) {
                unsigned int i = ci->ci_cpuid;
                unsigned int m = 1 << i;
                if (pmap->pm_asid[i].pma_asidgen !=
                    pmap_asid_info[i].pma_asidgen)
                        continue;
                else if (ci->ci_curpmap != pmap) {
                        pmap->pm_asid[i].pma_asidgen = 0;
                        continue;
                }
                cpumask |= m;
        }

if (cpumask == 1 << cpuid) {
        u_long asid;

        asid = pmap->pm_asid[cpuid].pma_asid << VMTLB_PID_SHIFT;
        tlb_flush_addr(va | asid);
} else if (cpumask) {
        struct pmap_invalidate_page_arg arg;
        arg.pmap = pmap;
        arg.va = va;

        smp_rendezvous_cpus(cpumask, pmap_invalidate_user_page_action,
                &arg);
}
•   SGI Origin 350   SMP

•   Cavium OCTEON OpenBSD
                           SoC



•   SMP

    •   rthread

    •

SMP Implementation for OpenBSD/sgi [Japanese Edition]

  • 1.
    SMP Implementation for OpenBSD/sgi[ ] Takuya ASADA<syuu@openbsd.org>
  • 2.
    AsiaBSDCon 2010 • http://bit.ly/octane2
  • 3.
    BSD EmbeddedOS 64bit SMP • MIPS *BSD MIPS SMP • • SMP …
  • 4.
    MIPS/SMP (1) • Broadcom SiByte BCM1250 2core 1GHz MIPS64, DDR DRAM, GbE, PCI, HT • Cisco 3845 •
  • 5.
    MIPS/SMP (1) • Broadcom SiByte BCM1250 2core 1GHz MIPS64, DDR DRAM, GbE, PCI, HT • Cisco 3845 • 128 orz
  • 6.
    MIPS/SMP (2) • SGI ebay • • SGI Octane 2core OpenBSD
  • 7.
    MIPS/SMP (2) • SGI ebay • • SGI Octane 2core OpenBSD $33!!!!
  • 8.
    Octane2 Processors MIPS R12000400MHz x2 Memory 1GB SDRAM Graphics 3D Graphics Card Sound Integrated Digital Audio Storage 35GB SCSI HDD Ethernet 100BASE-T
  • 13.
    OpenBSD developer • 2009/4 • OpenBSD/sgi • 2009/9 developer • 2009/11
  • 22.
  • 23.
  • 24.
    '(&!!" 4% overhead '(%!!" '($!!" '(#!!" 46% speed up '(!!!" ,-.-/01)02+!" &!!" ,-.-/01)02+!342" %!!" $!!" #!!" !" (sec) ")*'" ")*#" ")*+"
  • 25.
    OpenBSD/amd64 &!!" %#!" 2% overhead %!!" 44% speed up $#!" )*+*,-." )*+*,-./01" $!!" #!" !" "'($" "'(%" "'(&" (sec)
  • 26.
    cpu_info • • • per-processor cpu_info • IPI: Inter-Processor Interrupt • • Per-processor ASID • • TLB shootdown • Lazy FPU handling • • Per-processor clock
  • 27.
    per-processor • • …
  • 28.
    per-processor • sgi port • per-processor cpu_info • pmap per-processor * per-process
  • 29.
    clock.c // defined asglobal variables u_int32_t cpu_counter_last; u_int32_t cpu_counter_interval; u_int32_t pendingticks; uint32_t clock_int5(uint32_t mask, struct trap_frame *tf) ... clkdiff = cp0_get_count() - cpu_counter_last; while (clkdiff >= cpu_counter_interval) { cpu_counter_last += cpu_counter_interval; clkdiff = cp0_get_count() - cpu_counter_last; pendingticks++;
  • 30.
    clock.c uint32_t clock_int5(uint32_t mask,struct trap_frame *tf) ... struct cpu_info *ci = curcpu(); clkdiff = cp0_get_count() - ci->ci_cpu_counter_last; while (clkdiff >= ci->ci_cpu_counter_interval) { ci->ci_cpu_counter_last += ci->ci_cpu_counter_interval; clkdiff = cp0_get_count() - ci->ci_cpu_counter_last; ci->ci_pendingticks++;
  • 31.
    pmap • MIPS TLB ASID 8bit ID TLB • 8bit PID ASID ASID Process • SMP ASID • ASID per-processor * per-process
  • 32.
    pmap uint pmap_alloc_tlbpid(struct proc*p) ... tlbpid_cnt = id + 1; pmap->pm_tlbpid = id; uint pmap_alloc_tlbpid(struct proc *p) ... tlbpid_cnt[cpuid] = id + 1; pmap->pm_tlbpid[cpuid] = id;
  • 33.
    • : mutex, mp_lock • : CAS, 64bit add, etc.. • • • • trap()
  • 34.
    uint32_t clock_int5(uint32_t mask,struct trap_frame *tf) ... if (tf->ipl < IPL_CLOCK) { #ifdef MULTIPROCESSOR __mp_lock(&kernel_lock); #endif while (ci->ci_pendingticks) { clk_count.ec_count++; hardclock(tf); ci->ci_pendingticks--; } #ifdef MULTIPROCESSOR __mp_unlock(&kernel_lock); #endif
  • 35.
    uint32_t clock_int5(uint32_t mask,struct trap_frame *tf) ... if (tf->ipl < IPL_CLOCK) { #ifdef MULTIPROCESSOR __mp_lock(&kernel_lock); #endif while (ci->ci_pendingticks) { clk_count.ec_count++; hardclock(tf); ci->ci_pendingticks--; } #ifdef MULTIPROCESSOR __mp_unlock(&kernel_lock); #endif Actually, it causes a bug... described later
  • 36.
    • • TLB •
  • 37.
    • • • →
  • 38.
    TLB • MIPS TLB • TLB shootdown TLB invalidate/update • IPI(Inter-Processor Interrupt)
  • 39.
    TLB shootdown CPU A CPU B Fault TLB shootdown IPI IPI TLB invalidate TLB invalidate CPU B CPU A
  • 40.
  • 41.
    SMP • • • • TLB shootdown
  • 42.
  • 43.
    JTAG ICE • • • Embedded OS SMP • Octane or
  • 44.
    ddb • OpenBSD • • SMP ddb ddb • …
  • 45.
    printf()! • • printf(message) • • • printf() •
  • 46.
    printf • Octane • • • cpu0 com0 cpu1 com1 • ;-)
  • 47.
    print • • printf • •
  • 48.
  • 49.
    void mtx_enter(struct mutex*mtx) ...         for (;;) {                 if (mtx->mtx_wantipl != IPL_NONE)                         s = splraise(mtx->mtx_wantipl);                 if (try_lock(mtx)) {                         if (mtx->mtx_wantipl != IPL_NONE)                                 mtx->mtx_oldipl = s;                         mtx->mtx_owner = curcpu();                         return;                 }                 if (mtx->mtx_wantipl != IPL_NONE)                         splx(s);                 if (++i > MTX_TIMEOUT)                         panic("mtx deadlockedn”);         }
  • 50.
    void mtx_enter(struct mutex*mtx) ...         for (;;) {                 if (mtx->mtx_wantipl != IPL_NONE)                         s = splraise(mtx->mtx_wantipl);                 if (try_lock(mtx)) {                         if (mtx->mtx_wantipl != IPL_NONE)                                 mtx->mtx_oldipl = s;                         mtx->mtx_owner = curcpu();                         return;                 }                 if (mtx->mtx_wantipl != IPL_NONE)                         splx(s);                 if (++i > MTX_TIMEOUT)                         panic("mtx deadlockedn”);         }
  • 51.
    CPU A CPU B Lock Lock A B
  • 52.
    CPU A CPU B Lock Lock A B
  • 53.
    CPU A CPU B Lock Lock A B
  • 54.
    void mtx_enter(struct mutex*mtx) ...         for (;;) {                 if (mtx->mtx_wantipl != IPL_NONE)                         s = splraise(mtx->mtx_wantipl);                 if (try_lock(mtx)) {                         if (mtx->mtx_wantipl != IPL_NONE)                                 mtx->mtx_oldipl = s;                         mtx->mtx_owner = curcpu();                         mtx->mtx_ra =                          __builtin_return_address(0);                         return;                 }                 if (mtx->mtx_wantipl != IPL_NONE)                         splx(s);                 if (++i > MTX_TIMEOUT)                         panic("mtx deadlocked ra:%pn",                                 mtx->mtx_ra);         }
  • 55.
    IPI CPU A CPU B Interrupt Fault Disable interrupt Acquire lock Lock Wait until released TLB shootdown Blocked IPI Wait ACK
  • 56.
    IPI CPU A CPU B Interrupt Fault Disable interrupt Acquire lock Lock Wait until released TLB shootdown IPI Wait ACK
  • 57.
    IPI CPU A CPU B Interrupt Fault Disable interrupt Acquire lock Re-enable interrupt Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK
  • 58.
    IPI CPU A CPU B Interrupt Fault Disable interrupt Acquire lock Re-enable interrupt Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK ACK for rendezvous
  • 59.
    IPI CPU A CPU B Interrupt Fault IPI Disable interrupt Acquire lock Re-enable interrupt Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK ACK for rendezvous
  • 60.
    uint32_t clock_int5(uint32_t mask,struct trap_frame *tf) ... if (tf->ipl < IPL_CLOCK) { #ifdef MULTIPROCESSOR u_int32_t sr; sr = getsr(); ENABLEIPI(); __mp_lock(&kernel_lock); #endif while (ci->ci_pendingticks) { clk_count.ec_count++; hardclock(tf); ci->ci_pendingticks--; } #ifdef MULTIPROCESSOR __mp_unlock(&kernel_lock); setsr(sr); #endif
  • 61.
    splhigh() IPI CPU A CPU B splhigh() Fault Acquire lock Lock Wait until released TLB shootdown Blocked IPI Wait ACK
  • 62.
    splhigh() IPI CPU A CPU B splhigh() Fault Acquire lock Lock Wait until released TLB shootdown IPI Wait ACK
  • 63.
    splhigh() IPI CPU A CPU B splhigh() Fault Acquire lock Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK
  • 64.
    splhigh() IPI CPU A CPU B splhigh() Fault Acquire lock Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK ACK for rendezvous
  • 65.
    splhigh() IPI CPU A CPU B IPL_IPI splhigh() Fault IPL_HIGH Acquire lock Lock Wait until released TLB shootdown Accept Interrupt IPI Wait ACK ACK for rendezvous
  • 66.
     #define        IPL_TTY         4       /* terminal */  #define        IPL_VM          5       /* memory allocation */  #define        IPL_CLOCK       6       /* clock */ #define        IPL_HIGH        7       /* everything */ #define        NIPLS           8       /* Number of levels */  #define        IPL_TTY         4       /* terminal */  #define        IPL_VM          5       /* memory allocation */  #define        IPL_CLOCK       6       /* clock */ #define        IPL_HIGH        7       /* everything */ #define        IPL_IPI         8       /* ipi */ #define        NIPLS           9       /* Number of levels */
  • 67.
    cpu_info • malloc() → • TLB • MIPS TLB TLB TLB cpu_info • wrapper
  • 68.
    wrapper vaddr_t smp_malloc(size_t size) ... if (size < PAGE_SIZE) { va = (vaddr_t)malloc(size, M_DEVBUF, M_NOWAIT); if (va == NULL) return NULL; error = pmap_extract(pmap_kernel(), va, &pa); if (error == FALSE) return NULL; } else { TAILQ_INIT(&mlist); error = uvm_pglistalloc(size, 0, -1L, 0, 0, &mlist, 1, UVM_PLA_NOWAIT); if (error) return NULL; m = TAILQ_FIRST(&mlist); pa = VM_PAGE_TO_PHYS(m); } return PHYS_TO_XKPHYS(pa, CCA_CACHED);
  • 69.
    TLB shootdown • invalidate/update TLB shootdown kernel mode user mode • shootdown shootdown ASID • pmap TLB shootdown
  • 70.
    void pmap_invalidate_page(pmap_t pmap,vm_offset_t va) ... arg.pmap = pmap; arg.va = va; smp_rendezvous(0, pmap_invalidate_page_action, 0, (void *)&arg); void pmap_invalidate_page_action(void *arg) ... pmap_t pmap = ((struct pmap_invalidate_page_arg *)arg)->pmap; vm_offset_t va = ((struct pmap_invalidate_page_arg *)arg)->va; if (is_kernel_pmap(pmap)) { pmap_TLB_invalidate_kernel(va); return; } if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) return; else if (!(pmap->pm_active & PCPU_GET(cpumask))) { pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; return; } va = pmap_va_asid(pmap, (va & ~PGOFSET)); mips_TBIS(va);
  • 71.
    CPU_INFO_FOREACH(cii, ci) if (cpuset_isset(&cpus_running, ci)) { unsigned int i = ci->ci_cpuid; unsigned int m = 1 << i; if (pmap->pm_asid[i].pma_asidgen != pmap_asid_info[i].pma_asidgen) continue; else if (ci->ci_curpmap != pmap) { pmap->pm_asid[i].pma_asidgen = 0; continue; } cpumask |= m; } if (cpumask == 1 << cpuid) { u_long asid; asid = pmap->pm_asid[cpuid].pma_asid << VMTLB_PID_SHIFT; tlb_flush_addr(va | asid); } else if (cpumask) { struct pmap_invalidate_page_arg arg; arg.pmap = pmap; arg.va = va; smp_rendezvous_cpus(cpumask, pmap_invalidate_user_page_action, &arg); }
  • 72.
    SGI Origin 350 SMP • Cavium OCTEON OpenBSD SoC • SMP • rthread •