Ядро Linux в комментариях

       

Arch/i386/kernel/smp.c


3492 /* 3493 * Intel MP v1.1/v1.4 specification support routines 3494 * for multi-pentium hosts. 3495 * 3496 * (c) 1995 Alan Cox, CymruNET Ltd <alan@cymru.net> 3497 * (c) 1998 Ingo Molnar 3498 * 3499 * Supported by Caldera http://www.caldera.com. 3500 * Much of the core SMP work is based on previous 3501 * work by Thomas Radke, to whom a great many thanks 3502 * are extended. 3503 * 3504 * Thanks to Intel for making available several 3505 * different Pentium, Pentium Pro and 3506 * Pentium-II/Xeon MP machines. 3507 * 3508 * This code is released under the GNU public 3509 * license version 2 or later. 3510 * 3511 * Fixes 3512 * Felix Koop : NR_CPUS used properly 3513 * Jose Renau : Handle single CPU case. 3514 * Alan Cox : By repeated request 8) - 3515 * Total BogoMIP report. 3516 * Greg Wright : Fix for kernel stacks panic. 3517 * Erich Boleyn : MP v1.4 and additional changes. 3518 * Matthias Sattler : Changes for 2.1 kernel map. 3519 * Michel Lespinasse: Changes for 2.1 kernel map. 3520 * Michael Chastain : Change trampoline.S to gnu as. 3521 * Alan Cox : Dumb bug: 'B' step PPro's are fine 3522 * Ingo Molnar : Added APIC timers, based on code 3523 * from Jose Renau 3524 * Alan Cox : Added EBDA scanning 3525 * Ingo Molnar : various cleanups and rewrites */ 3526 3527 #include <linux/config.h> 3528 #include <linux/mm.h> 3529 #include <linux/kernel_stat.h> 3530 #include <linux/delay.h> 3531 #include <linux/mc146818rtc.h> 3532 #include <linux/smp_lock.h> 3533 #include <linux/init.h> 3534 #include <asm/mtrr.h> 3535 3536 #include "irq.h" 3537 3538 extern unsigned long start_kernel; 3539 extern void update_one_process( struct task_struct *p, 3540 unsigned long ticks, unsigned long user, 3541 unsigned long system, int cpu); 3542 /* Some notes on processor bugs: 3543 * 3544 * Pentium and Pentium Pro (and all CPUs) have 3545 * bugs. The Linux issues for SMP are handled as 3546 * follows. 3547 * 3548 * Pentium Pro: 3549 * Occasional delivery of 'spurious interrupt' as trap 3550 * #16. This is very rare. The kernel logs the event and 3551 * recovers 3552 * 3553 * Pentium: 3554 * There is a marginal case where REP MOVS on 100MHz SMP 3555 * machines with B stepping processors can fail. XXX 3556 * should provide an L1cache=Writethrough or L1cache=off 3557 * option. 3558 * 3559 * B stepping CPUs may hang. There are hardware work 3560 * arounds for this. We warn about it in case your board 3561 * doesnt have the work arounds. Basically thats so I can 3562 * tell anyone with a B stepping CPU and SMP problems 3563 * "tough". 3564 * 3565 * Specific items [From Pentium Processor 3566 * Specification Update] 3567 * 3568 * 1AP. Linux doesn't use remote read 3569 * 2AP. Linux doesn't trust APIC errors 3570 * 3AP. We work around this 3571 * 4AP. Linux never generated 3 interrupts of the 3572 * same pri to cause a lost local interrupt. 3573 * 5AP. Remote read is never used 3574 * 9AP. XXX NEED TO CHECK WE HANDLE THIS XXX 3575 * 10AP. XXX NEED TO CHECK WE HANDLE THIS XXX 3576 * 11AP. Linux reads the APIC between writes to 3577 * avoid this, as per the documentation. Make 3578 * sure you preserve this as it affects the C 3579 * stepping chips too. 3580 * 3581 * If this sounds worrying believe me these bugs are 3582 * ___RARE___ and there's about nothing of note with 3583 * C stepping upwards. */ 3584 3585 3586 /* Kernel spinlock */ 3587 spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; 3588 3589 /* function prototypes: */ 3590 3591 static void cache_APIC_registers (void); 3592 static void stop_this_cpu (void); 3593 3594 /* Set if we find a B stepping CPU */ 3595 static int smp_b_stepping = 0; 3596 3597 /* Setup configured maximum number of CPUs to activate */ 3598 static int max_cpus = -1; 3599 /* Have we found an SMP box */ 3600 int smp_found_config=0; 3601 3602 /* Bitmask of physically existing CPUs */ 3603 unsigned long cpu_present_map = 0; 3604 /* Bitmask of currently online CPUs */ 3605 unsigned long cpu_online_map = 0; 3606 /* Total count of live CPUs */ 3607 int smp_num_cpus = 1; 3608 /* Set when the idlers are all forked */ 3609 int smp_threads_ready=0; 3610 /* which CPU maps to which logical number */ 3611 volatile int cpu_number_map[NR_CPUS]; 3612 /* which logical number maps to which CPU */ 3613 volatile int __cpu_logical_map[NR_CPUS]; 3614 /* We always use 0 the rest is ready for parallel 3615 * delivery */ 3616 static volatile 3617 unsigned long cpu_callin_map[NR_CPUS] = {0,}; 3618 /* We always use 0 the rest is ready for parallel 3619 * delivery */ 3620 static volatile 3621 unsigned long cpu_callout_map[NR_CPUS] = {0,}; 3622 /* Used for the invalidate map that's also checked in the 3623 * spinlock */ 3624 volatile unsigned long smp_invalidate_needed; 3625 /* Stack vector for booting CPUs */ 3626 volatile unsigned long kstack_ptr; 3627 /* Per CPU bogomips and other parameters */ 3628 struct cpuinfo_x86 cpu_data[NR_CPUS]; 3629 /* Internal processor count */ 3630 static unsigned int num_processors = 1; 3631 /* Address of the I/O apic (not yet used) */ 3632 unsigned long mp_ioapic_addr = 0xFEC00000; 3633 /* Processor that is doing the boot up */ 3634 unsigned char boot_cpu_id = 0; 3635 /* Tripped once we need to start cross invalidating */ 3636 static int smp_activated = 0; 3637 /* APIC version number */ 3638 int apic_version[NR_CPUS]; 3639 /* Just debugging the assembler.. */ 3640 unsigned long apic_retval; 3641 3642 /* Number of times the processor holds the lock */ 3643 volatile unsigned long kernel_counter=0; 3644 /* Number of times the processor holds the syscall lock*/ 3645 volatile unsigned long syscall_count=0; 3646 3647 /* Number of IPIs delivered */ 3648 volatile unsigned long ipi_count; 3649 3650 const char lk_lockmsg[] = 3651 "lock from interrupt context at %p\n"; 3652 3653 int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, }; 3654 extern int mp_irq_entries; 3655 extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 3656 extern int mpc_default_type; 3657 int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, }; 3658 int mp_current_pci_id = 0; 3659 unsigned long mp_lapic_addr = 0; 3660 /* 1 if "noapic" boot option passed */ 3661 int skip_ioapic_setup = 0; 3662 3663 /* #define SMP_DEBUG */ 3664 3665 #ifdef SMP_DEBUG 3666 #define SMP_PRINTK(x) printk x 3667 #else 3668 #define SMP_PRINTK(x) 3669 #endif 3670 3671 /* IA s/w dev Vol 3, Section 7.4 */ 3672 #define APIC_DEFAULT_PHYS_BASE 0xfee00000 3673 3674 /* Reads and clears the Pentium Timestamp-Counter */ 3675 #define READ_TSC(x) __asm__ __volatile__ ( "rdtsc" \ 3676 :"=a" (((unsigned long*)&(x))[0]), \ 3677 "=d" (((unsigned long*)&(x))[1])) 3678 3679 #define CLEAR_TSC \ 3680 __asm__ __volatile__ ("\t.byte 0x0f, 0x30;\n":: \ 3681 "a"(0x00001000), "d"(0x00001000), "c"(0x10):"memory") 3682 3683 /* Setup routine for controlling SMP activation 3684 * 3685 * Command-line option of "nosmp" or "maxcpus=0" 3686 * will disable SMP activation entirely (the MPS 3687 * table probe still happens, though). 3688 * 3689 * Command-line option of "maxcpus=<NUM>", where 3690 * <NUM> is an integer greater than 0, limits the 3691 * maximum number of CPUs activated in SMP mode to 3692 * <NUM>. */ 3693 3694 void __init smp_setup(char *str, int *ints) 3695 { 3696 if (ints && ints[0] > 0) 3697 max_cpus = ints[1]; 3698 else 3699 max_cpus = 0; 3700 } 3701 3702 void ack_APIC_irq(void) 3703 { 3704 /* Clear the IPI */ 3705 3706 /* Dummy read */ 3707 apic_read(APIC_SPIV); 3708 3709 /* Docs say use 0 for future compatibility */ 3710 apic_write(APIC_EOI, 0); 3711 } 3712 3713 /* Intel MP BIOS table parsing routines: */ 3714 3715 #ifndef CONFIG_X86_VISWS_APIC 3716 /* Checksum an MP configuration block. */ 3717 3718 static int mpf_checksum(unsigned char *mp, int len) 3719 { 3720 int sum=0; 3721 while(len--) 3722 sum+=*mp++; 3723 return sum&0xFF; 3724 } 3725 3726 /* Processor encoding in an MP configuration block */ 3727 3728 static char *mpc_family(int family,int model) 3729 { 3730 static char n[32]; 3731 static char *model_defs[]= 3732 { 3733 "80486DX","80486DX", 3734 "80486SX","80486DX/2 or 80487", 3735 "80486SL","Intel5X2(tm)", 3736 "Unknown","Unknown", 3737 "80486DX/4" 3738 }; 3739 if (family==0x6) 3740 return("Pentium(tm) Pro"); 3741 if (family==0x5) 3742 return("Pentium(tm)"); 3743 if (family==0x0F && model==0x0F) 3744 return("Special controller"); 3745 if (family==0x04 && model<9) 3746 return model_defs[model]; 3747 sprintf(n,"Unknown CPU [%d:%d]",family, model); 3748 return n; 3749 } 3750 3751 /* Read the MPC */ 3752 3753 static int __init 3754 smp_read_mpc(struct mp_config_table *mpc) 3755 { 3756 char str[16]; 3757 int count=sizeof(*mpc); 3758 int ioapics = 0; 3759 unsigned char *mpt=((unsigned char *)mpc)+count; 3760 3761 if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) 3762 { 3763 panic("SMP mptable: bad signature [%c%c%c%c]!\n", 3764 mpc->mpc_signature[0], 3765 mpc->mpc_signature[1], 3766 mpc->mpc_signature[2], 3767 mpc->mpc_signature[3]); 3768 return 1; 3769 } 3770 if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) 3771 { 3772 panic("SMP mptable: checksum error!\n"); 3773 return 1; 3774 } 3775 if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) 3776 { 3777 printk("Bad Config Table version (%d)!!\n", 3778 mpc->mpc_spec); 3779 return 1; 3780 } 3781 memcpy(str,mpc->mpc_oem,8); 3782 str[8]=0; 3783 memcpy(ioapic_OEM_ID,str,9); 3784 printk("OEM ID: %s ",str); 3785 3786 memcpy(str,mpc->mpc_productid,12); 3787 str[12]=0; 3788 memcpy(ioapic_Product_ID,str,13); 3789 printk("Product ID: %s ",str); 3790 3791 printk("APIC at: 0x%lX\n",mpc->mpc_lapic); 3792 3793 /* save the local APIC address, it might be 3794 * non-default */ 3795 mp_lapic_addr = mpc->mpc_lapic; 3796 3797 /* Now process the configuration blocks. */ 3798 3799 while(count<mpc->mpc_length) 3800 { 3801 switch(*mpt) 3802 { 3803 case MP_PROCESSOR: 3804 { 3805 struct mpc_config_processor *m= 3806 (struct mpc_config_processor *)mpt; 3807 if (m->mpc_cpuflag&CPU_ENABLED) 3808 { 3809 printk("Processor #%d %s APIC version %d\n", 3810 m->mpc_apicid, 3811 mpc_family((m->mpc_cpufeature& 3812 CPU_FAMILY_MASK)>>8, 3813 (m->mpc_cpufeature& 3814 CPU_MODEL_MASK)>>4), 3815 m->mpc_apicver); 3816 #ifdef SMP_DEBUG 3817 if (m->mpc_featureflag&(1<<0)) 3818 printk(" Floating point unit present.\n"); 3819 if (m->mpc_featureflag&(1<<7)) 3820 printk(" Machine Exception supported.\n"); 3821 if (m->mpc_featureflag&(1<<8)) 3822 printk(" 64 bit compare & exchange " 3823 "supported.\n"); 3824 if (m->mpc_featureflag&(1<<9)) 3825 printk(" Internal APIC present.\n"); 3826 #endif 3827 if (m->mpc_cpuflag&CPU_BOOTPROCESSOR) 3828 { 3829 SMP_PRINTK((" Bootup CPU\n")); 3830 boot_cpu_id=m->mpc_apicid; 3831 } 3832 else /* Boot CPU already counted */ 3833 num_processors++; 3834 3835 if (m->mpc_apicid>NR_CPUS) 3836 printk("Processor #%d unused. (Max %d " 3837 "processors).\n",m->mpc_apicid, NR_CPUS); 3838 else 3839 { 3840 int ver = m->mpc_apicver; 3841 3842 cpu_present_map|=(1<<m->mpc_apicid); 3843 /* Validate version */ 3844 if (ver == 0x0) { 3845 printk("BIOS bug, APIC version is 0 for " 3846 "CPU#%d! fixing up to 0x10. (tell " 3847 "your hw vendor)\n", m->mpc_apicid); 3848 ver = 0x10; 3849 } 3850 apic_version[m->mpc_apicid] = ver; 3851 } 3852 } 3853 mpt+=sizeof(*m); 3854 count+=sizeof(*m); 3855 break; 3856 } 3857 case MP_BUS: 3858 { 3859 struct mpc_config_bus *m= 3860 (struct mpc_config_bus *)mpt; 3861 memcpy(str,m->mpc_bustype,6); 3862 str[6]=0; 3863 SMP_PRINTK(("Bus #%d is %s\n", 3864 m->mpc_busid, 3865 str)); 3866 if ((strncmp(m->mpc_bustype,"ISA",3) == 0) 3867 (strncmp(m->mpc_bustype,"EISA",4) == 0)) 3868 mp_bus_id_to_type[m->mpc_busid] = 3869 MP_BUS_ISA; 3870 else 3871 if (strncmp(m->mpc_bustype,"PCI",3) == 0) { 3872 mp_bus_id_to_type[m->mpc_busid] = 3873 MP_BUS_PCI; 3874 mp_bus_id_to_pci_bus[m->mpc_busid] = 3875 mp_current_pci_id; 3876 mp_current_pci_id++; 3877 } 3878 mpt+=sizeof(*m); 3879 count+=sizeof(*m); 3880 break; 3881 } 3882 case MP_IOAPIC: 3883 { 3884 struct mpc_config_ioapic *m= 3885 (struct mpc_config_ioapic *)mpt; 3886 if (m->mpc_flags&MPC_APIC_USABLE) 3887 { 3888 ioapics++; 3889 printk("I/O APIC #%d Version %d at 0x%lX.\n", 3890 m->mpc_apicid,m->mpc_apicver, 3891 m->mpc_apicaddr); 3892 /* we use the first one only currently */ 3893 if (ioapics == 1) 3894 mp_ioapic_addr = m->mpc_apicaddr; 3895 } 3896 mpt+=sizeof(*m); 3897 count+=sizeof(*m); 3898 break; 3899 } 3900 case MP_INTSRC: 3901 { 3902 struct mpc_config_intsrc *m= 3903 (struct mpc_config_intsrc *)mpt; 3904 3905 mp_irqs [mp_irq_entries] = *m; 3906 if (++mp_irq_entries == MAX_IRQ_SOURCES) { 3907 printk("Max irq sources exceeded!!\n"); 3908 printk("Skipping remaining sources.\n"); 3909 --mp_irq_entries; 3910 } 3911 3912 mpt+=sizeof(*m); 3913 count+=sizeof(*m); 3914 break; 3915 } 3916 case MP_LINTSRC: 3917 { 3918 struct mpc_config_intlocal *m= 3919 (struct mpc_config_intlocal *)mpt; 3920 mpt+=sizeof(*m); 3921 count+=sizeof(*m); 3922 break; 3923 } 3924 } 3925 } 3926 if (ioapics > 1) 3927 { 3928 printk("Warning: " 3929 "Multiple IO-APICs not yet supported.\n"); 3930 printk("Warning: switching to non APIC mode.\n"); 3931 skip_ioapic_setup=1; 3932 } 3933 return num_processors; 3934 } 3935 3936 /* Scan the memory blocks for an SMP configuration block. 3937 */ 3938 3939 static int __init smp_scan_config(unsigned long base, 3940 unsigned long length) 3941 { 3942 unsigned long *bp=phys_to_virt(base); 3943 struct intel_mp_floating *mpf; 3944 3945 SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n", 3946 bp,length)); 3947 if (sizeof(*mpf)!=16) 3948 printk("Error: MPF size\n"); 3949 3950 while (length>0) 3951 { 3952 if (*bp==SMP_MAGIC_IDENT) 3953 { 3954 mpf=(struct intel_mp_floating *)bp; 3955 if (mpf->mpf_length==1 && 3956 !mpf_checksum((unsigned char *)bp,16) && 3957 (mpf->mpf_specification == 1 3958 mpf->mpf_specification == 4) ) 3959 { 3960 printk("Intel MultiProcessor Specification " 3961 "v1.%d\n", mpf->mpf_specification); 3962 if (mpf->mpf_feature2&(1<<7)) 3963 printk(" IMCR and PIC " 3964 "compatibility mode.\n"); 3965 else 3966 printk(" Virtual Wire " 3967 "compatibility mode.\n"); 3968 smp_found_config=1; 3969 /* Now see if we need to read further. */ 3970 if (mpf->mpf_feature1!=0) 3971 { 3972 unsigned long cfg; 3973 3974 /* local APIC has default address */ 3975 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; 3976 /* We need to know what the local APIC id of 3977 * the boot CPU is! */ 3978 3979 /* HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK 3980 * It's not just a crazy hack. ;-) */ 3981 3982 /* Standard page mapping functions don't work 3983 * yet. We know that page 0 is not used. 3984 * Steal it for now! */ 3985 3986 cfg=pg0[0]; 3987 pg0[0] = (mp_lapic_addr | 3988 _PAGE_RW | _PAGE_PRESENT); 3989 local_flush_tlb(); 3990 3991 boot_cpu_id = 3992 GET_APIC_ID(*((volatile unsigned long *) 3993 APIC_ID)); 3994 3995 /* Give it back */ 3996 pg0[0]= cfg; 3997 local_flush_tlb(); 3998 3999 /* 4000 * END OF HACK END OF HACK END OF HACK END OF HACK 4001 */ 4002 /* 2 CPUs, numbered 0 & 1. */ 4003 cpu_present_map=3; 4004 num_processors=2; 4005 printk("I/O APIC at 0xFEC00000.\n"); 4006 4007 /* Save the default type number, we need it 4008 * later to set the IO-APIC up properly: */ 4009 mpc_default_type = mpf->mpf_feature1; 4010 4011 printk("Bus #0 is "); 4012 } 4013 switch(mpf->mpf_feature1) 4014 { 4015 case 1: 4016 case 5: 4017 printk("ISA\n"); 4018 break; 4019 case 2: 4020 printk("EISA with no IRQ8 chaining\n"); 4021 break; 4022 case 6: 4023 case 3: 4024 printk("EISA\n"); 4025 break; 4026 case 4: 4027 case 7: 4028 printk("MCA\n"); 4029 break; 4030 case 0: 4031 break; 4032 default: 4033 printk("???\nUnknown standard configuration " 4034 "%d\n", mpf->mpf_feature1); 4035 return 1; 4036 } 4037 if (mpf->mpf_feature1>4) 4038 { 4039 printk("Bus #1 is PCI\n"); 4040 4041 /* Set local APIC version to the integrated 4042 * form. It's initialized to zero otherwise, 4043 * representing a discrete 82489DX. */ 4044 apic_version[0] = 0x10; 4045 apic_version[1] = 0x10; 4046 } 4047 /* Read the physical hardware table. Anything 4048 * here will override the defaults. */ 4049 if (mpf->mpf_physptr) 4050 smp_read_mpc((void *)mpf->mpf_physptr); 4051 4052 __cpu_logical_map[0] = boot_cpu_id; 4053 global_irq_holder = boot_cpu_id; 4054 current->processor = boot_cpu_id; 4055 4056 printk("Processors: %d\n", num_processors); 4057 /* Only use the first configuration found. */ 4058 return 1; 4059 } 4060 } 4061 bp+=4; 4062 length-=16; 4063 } 4064 4065 return 0; 4066 } 4067 4068 void __init init_intel_smp (void) 4069 { 4070 /* FIXME: Linux assumes you have 640K of base ram.. 4071 * this continues the error... 4072 * 4073 * 1) Scan the bottom 1K for a signature 4074 * 2) Scan the top 1K of base RAM 4075 * 3) Scan the 64K of bios */ 4076 if (!smp_scan_config(0x0,0x400) && 4077 !smp_scan_config(639*0x400,0x400) && 4078 !smp_scan_config(0xF0000,0x10000)) { 4079 /* If it is an SMP machine we should know now, unless 4080 * the configuration is in an EISA/MCA bus machine 4081 * with an extended bios data area. 4082 * 4083 * there is a real-mode segmented pointer pointing to 4084 * the 4K EBDA area at 0x40E, calculate and scan it 4085 * here. 4086 * 4087 * NOTE! There are Linux loaders that will corrupt 4088 * the EBDA area, and as such this kind of SMP config 4089 * may be less trustworthy, simply because the SMP 4090 * table may have been stomped on during early 4091 * boot. These loaders are buggy and should be fixed. 4092 */ 4093 unsigned int address; 4094 4095 address = *(unsigned short *)phys_to_virt(0x40E); 4096 address<<=4; 4097 smp_scan_config(address, 0x1000); 4098 if (smp_found_config) 4099 printk(KERN_WARNING "WARNING: MP table in the EBDA" 4100 " can be UNSAFE, contact linux-smp@vger.rutgers." 4101 "edu if you experience SMP problems!\n"); 4102 } 4103 } 4104 4105 #else 4106 4107 /* The Visual Workstation is Intel MP compliant in the 4108 * hardware sense, but it doesnt have a 4109 * BIOS(-configuration table). No problem for Linux. */ 4110 void __init init_visws_smp(void) 4111 { 4112 smp_found_config = 1; 4113 4114 cpu_present_map |= 2; /* or in id 1 */ 4115 apic_version[1] |= 0x10; /* integrated APIC */ 4116 apic_version[0] |= 0x10; 4117 4118 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; 4119 } 4120 4121 #endif 4122 4123 /* - Intel MP Configuration Table 4124 * - or SGI Visual Workstation configuration */ 4125 void __init init_smp_config (void) 4126 { 4127 #ifndef CONFIG_VISWS 4128 init_intel_smp(); 4129 #else 4130 init_visws_smp(); 4131 #endif 4132 } 4133 4134 /* Trampoline 80x86 program as an array. */ 4135 4136 extern unsigned char trampoline_data []; 4137 extern unsigned char trampoline_end []; 4138 static unsigned char *trampoline_base; 4139 4140 /* Currently trivial. Write the real->protected mode 4141 * bootstrap into the page concerned. The caller has made 4142 * sure it's suitably aligned. */ 4143 4144 static unsigned long __init setup_trampoline(void) 4145 { 4146 memcpy(trampoline_base, trampoline_data, 4147 trampoline_end - trampoline_data); 4148 return virt_to_phys(trampoline_base); 4149 } 4150 4151 /* We are called very early to get the low memory for the 4152 * SMP bootup trampoline page. */ 4153 unsigned long __init 4154 smp_alloc_memory(unsigned long mem_base) 4155 { 4156 if (virt_to_phys((void *)mem_base) >= 0x9F000) 4157 panic("smp_alloc_memory: Insufficient low memory for" 4158 " kernel trampoline 0x%lx.", mem_base); 4159 trampoline_base = (void *)mem_base; 4160 return mem_base + PAGE_SIZE; 4161 } 4162 4163 /* The bootstrap kernel entry code has set these up. Save 4164 * them for a given CPU */ 4165 void __init smp_store_cpu_info(int id) 4166 { 4167 struct cpuinfo_x86 *c=&cpu_data[id]; 4168 4169 *c = boot_cpu_data; 4170 c->pte_quick = 0; 4171 c->pgd_quick = 0; 4172 c->pgtable_cache_sz = 0; 4173 identify_cpu(c); 4174 /* Mask B, Pentium, but not Pentium MMX */ 4175 if (c->x86_vendor == X86_VENDOR_INTEL && 4176 c->x86 == 5 && 4177 c->x86_mask >= 1 && c->x86_mask <= 4 && 4178 c->x86_model <= 3) 4179 /* Remember we have B step Pentia with bugs */ 4180 smp_b_stepping=1; 4181 } 4182 4183 /* Architecture specific routine called by the kernel 4184 * just before init is fired off. This allows the BP to 4185 * have everything in order [we hope]. At the end of 4186 * this all the APs will hit the system scheduling and 4187 * off we go. Each AP will load the system gdt's and jump 4188 * through the kernel init into idle(). At this point the 4189 * scheduler will one day take over and give them jobs to 4190 * do. smp_callin is a standard routine we use to track 4191 * CPUs as they power up. */ 4192 4193 static atomic_t smp_commenced = ATOMIC_INIT(0); 4194 4195 void __init smp_commence(void) 4196 { 4197 /* Lets the callins below out of their loop. */ 4198 SMP_PRINTK(("Setting commenced=1, go go go\n")); 4199 4200 wmb(); 4201 atomic_set(&smp_commenced,1); 4202 } 4203 4204 void __init enable_local_APIC(void) 4205 { 4206 unsigned long value; 4207 4208 value = apic_read(APIC_SPIV); 4209 value |= (1<<8); /* Enable APIC (bit==1) */ 4210 value &= ~(1<<9); /* Enable focus processor (bit==0) */ 4211 value |= 0xff; /* Set spurious IRQ vector to 0xff */ 4212 apic_write(APIC_SPIV,value); 4213 4214 /* Set Task Priority to 'accept all' */ 4215 value = apic_read(APIC_TASKPRI); 4216 value &= ~APIC_TPRI_MASK; 4217 apic_write(APIC_TASKPRI,value); 4218 4219 /* Clear the logical destination ID, just to be safe. 4220 * also, put the APIC into flat delivery mode. */ 4221 value = apic_read(APIC_LDR); 4222 value &= ~APIC_LDR_MASK; 4223 apic_write(APIC_LDR,value); 4224 4225 value = apic_read(APIC_DFR); 4226 value |= SET_APIC_DFR(0xf); 4227 apic_write(APIC_DFR, value); 4228 4229 udelay(100); /* B safe */ 4230 } 4231 4232 unsigned long __init 4233 init_smp_mappings(unsigned long memory_start) 4234 { 4235 unsigned long apic_phys; 4236 4237 memory_start = PAGE_ALIGN(memory_start); 4238 if (smp_found_config) { 4239 apic_phys = mp_lapic_addr; 4240 } else { 4241 /* set up a fake all zeroes page to simulate the 4242 * local APIC and another one for the IO-APIC. We 4243 * could use the real zero-page, but it's safer this 4244 * way if some buggy code writes to this page ... */ 4245 apic_phys = __pa(memory_start); 4246 memset((void *)memory_start, 0, PAGE_SIZE); 4247 memory_start += PAGE_SIZE; 4248 } 4249 set_fixmap(FIX_APIC_BASE,apic_phys); 4250 printk("mapped APIC to %08lx (%08lx)\n", 4251 APIC_BASE, apic_phys); 4252 4253 #ifdef CONFIG_X86_IO_APIC 4254 { 4255 unsigned long ioapic_phys; 4256 4257 if (smp_found_config) { 4258 ioapic_phys = mp_ioapic_addr; 4259 } else { 4260 ioapic_phys = __pa(memory_start); 4261 memset((void *)memory_start, 0, PAGE_SIZE); 4262 memory_start += PAGE_SIZE; 4263 } 4264 set_fixmap(FIX_IO_APIC_BASE,ioapic_phys); 4265 printk("mapped IOAPIC to %08lx (%08lx)\n", 4266 fix_to_virt(FIX_IO_APIC_BASE), ioapic_phys); 4267 } 4268 #endif 4269 4270 return memory_start; 4271 } 4272 4273 extern void calibrate_delay(void); 4274 4275 void __init smp_callin(void) 4276 { 4277 int cpuid; 4278 unsigned long timeout; 4279 4280 /* (This works even if the APIC is not enabled.) */ 4281 cpuid = GET_APIC_ID(apic_read(APIC_ID)); 4282 4283 SMP_PRINTK(("CPU#%d waiting for CALLOUT\n", cpuid)); 4284 4285 /* STARTUP IPIs are fragile beasts as they might 4286 * sometimes trigger some glue motherboard 4287 * logic. Complete APIC bus silence for 1 second, this 4288 * overestimates the time the boot CPU is spending to 4289 * send the up to 2 STARTUP IPIs by a factor of 4290 * two. This should be enough. */ 4291 4292 /* Waiting 2s total for startup (udelay is not yet 4293 * working) */ 4294 timeout = jiffies + 2*HZ; 4295 while (time_before(jiffies,timeout)) 4296 { 4297 /* Has the boot CPU finished its STARTUP sequence? */ 4298 if (test_bit(cpuid, 4299 (unsigned long *)&cpu_callout_map[0])) 4300 break; 4301 } 4302 4303 while (!time_before(jiffies,timeout)) { 4304 printk("BUG: CPU%d started up but did not get a " 4305 "callout!\n", cpuid); 4306 stop_this_cpu(); 4307 } 4308 4309 /* the boot CPU has finished the init stage and is 4310 * spinning on callin_map until we finish. We are free 4311 * to set up this CPU, first the APIC. (this is 4312 * probably redundant on most boards) */ 4313 SMP_PRINTK(("CALLIN, before enable_local_APIC().\n")); 4314 enable_local_APIC(); 4315 4316 /* Set up our APIC timer. */ 4317 setup_APIC_clock(); 4318 4319 __sti(); 4320 4321 #ifdef CONFIG_MTRR 4322 /* Must be done before calibration delay is computed */ 4323 mtrr_init_secondary_cpu (); 4324 #endif 4325 /* Get our bogomips. */ 4326 calibrate_delay(); 4327 SMP_PRINTK(("Stack at about %p\n",&cpuid)); 4328 4329 /* Save our processor parameters */ 4330 smp_store_cpu_info(cpuid); 4331 4332 /* Allow the master to continue. */ 4333 set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]); 4334 } 4335 4336 int cpucount = 0; 4337 4338 extern int cpu_idle(void * unused); 4339 4340 /* Activate a secondary processor. */ 4341 int __init start_secondary(void *unused) 4342 { 4343 /* Don't put anything before smp_callin(), SMP booting 4344 * is too fragile that we want to limit the things done 4345 * here to the most necessary things. */ 4346 smp_callin(); 4347 while (!atomic_read(&smp_commenced)) 4348 /* nothing */ ; 4349 return cpu_idle(NULL); 4350 } 4351 4352 /* Everything has been set up for the secondary CPUs - 4353 * they just need to reload everything from the task 4354 * structure */ 4355 void __init initialize_secondary(void) 4356 { 4357 struct thread_struct * p = &current->tss; 4358 4359 /* Load up the LDT and the task register. */ 4360 asm volatile("lldt %%ax": :"a" (p->ldt)); 4361 asm volatile("ltr %%ax": :"a" (p->tr)); 4362 stts(); 4363 4364 /* We don't actually need to load the full TSS, 4365 * basically just the stack pointer and the eip. */ 4366 4367 asm volatile( 4368 "movl %0,%%esp\n\t" 4369 "jmp *%1" 4370 : 4371 :"r" (p->esp),"r" (p->eip)); 4372 } 4373 4374 extern struct { 4375 void * esp; 4376 unsigned short ss; 4377 } stack_start; 4378 4379 static void __init do_boot_cpu(int i) 4380 { 4381 unsigned long cfg; 4382 pgd_t maincfg; 4383 struct task_struct *idle; 4384 unsigned long send_status, accept_status; 4385 int timeout, num_starts, j; 4386 unsigned long start_eip; 4387 4388 /* We need an idle process for each processor. */ 4389 4390 kernel_thread(start_secondary, NULL, CLONE_PID); 4391 cpucount++; 4392 4393 idle = task[cpucount]; 4394 if (!idle) 4395 panic("No idle process for CPU %d", i); 4396 4397 idle->processor = i; 4398 __cpu_logical_map[cpucount] = i; 4399 cpu_number_map[i] = cpucount; 4400 4401 /* start_eip had better be page-aligned! */ 4402 start_eip = setup_trampoline(); 4403 4404 /* So we see what's up */ 4405 printk("Booting processor %d eip %lx\n", i, start_eip); 4406 stack_start.esp = (void *) (1024 + PAGE_SIZE + 4407 (char *)idle); 4408 4409 /* This grunge runs the startup process for the 4410 * targeted processor. */ 4411 4412 SMP_PRINTK(("Setting warm reset code and vector.\n")); 4413 4414 CMOS_WRITE(0xa, 0xf); 4415 local_flush_tlb(); 4416 SMP_PRINTK(("1.\n")); 4417 *((volatile unsigned short *) phys_to_virt(0x469)) = 4418 start_eip >> 4; 4419 SMP_PRINTK(("2.\n")); 4420 *((volatile unsigned short *) phys_to_virt(0x467)) = 4421 start_eip & 0xf; 4422 SMP_PRINTK(("3.\n")); 4423 4424 maincfg=swapper_pg_dir[0]; 4425 ((unsigned long *)swapper_pg_dir)[0]=0x102007; 4426 4427 /* Be paranoid about clearing APIC errors. */ 4428 4429 if ( apic_version[i] & 0xF0 ) 4430 { 4431 apic_write(APIC_ESR, 0); 4432 accept_status = (apic_read(APIC_ESR) & 0xEF); 4433 } 4434 4435 /* Status is now clean */ 4436 4437 send_status = 0; 4438 accept_status = 0; 4439 4440 /* Starting actual IPI sequence... */ 4441 4442 SMP_PRINTK(("Asserting INIT.\n")); 4443 4444 /* Turn INIT on */ 4445 4446 cfg=apic_read(APIC_ICR2); 4447 cfg&=0x00FFFFFF; 4448 /* Target chip */ 4449 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); 4450 cfg=apic_read(APIC_ICR); 4451 /* Clear bits */ 4452 cfg&=~0xCDFFF; 4453 cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_ASSERT | 4454 APIC_DEST_DM_INIT); 4455 /* Send IPI */ 4456 apic_write(APIC_ICR, cfg); 4457 4458 udelay(200); 4459 SMP_PRINTK(("Deasserting INIT.\n")); 4460 4461 cfg=apic_read(APIC_ICR2); 4462 cfg&=0x00FFFFFF; 4463 /* Target chip */ 4464 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); 4465 cfg=apic_read(APIC_ICR); 4466 /* Clear bits */ 4467 cfg&=~0xCDFFF; 4468 cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_DM_INIT); 4469 /* Send IPI */ 4470 apic_write(APIC_ICR, cfg); 4471 4472 /* Should we send STARTUP IPIs? 4473 * 4474 * Determine this based on the APIC version. If we 4475 * don't have an integrated APIC, don't send the 4476 * STARTUP IPIs. */ 4477 4478 if ( apic_version[i] & 0xF0 ) 4479 num_starts = 2; 4480 else 4481 num_starts = 0; 4482 4483 /* Run STARTUP IPI loop. */ 4484 4485 for (j = 1; !(send_status accept_status) 4486 && (j <= num_starts) ; j++) 4487 { 4488 SMP_PRINTK(("Sending STARTUP #%d.\n",j)); 4489 apic_write(APIC_ESR, 0); 4490 SMP_PRINTK(("After apic_write.\n")); 4491 4492 /* STARTUP IPI */ 4493 4494 cfg=apic_read(APIC_ICR2); 4495 cfg&=0x00FFFFFF; 4496 /* Target chip */ 4497 apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); 4498 cfg=apic_read(APIC_ICR); 4499 /* Clear bits */ 4500 cfg&=~0xCDFFF; 4501 /* Boot on the stack */ 4502 cfg |= (APIC_DEST_DM_STARTUP | (start_eip >> 12)); 4503 SMP_PRINTK(("Before start apic_write.\n")); 4504 /* Kick the second */ 4505 apic_write(APIC_ICR, cfg); 4506 4507 SMP_PRINTK(("Startup point 1.\n")); 4508 4509 timeout = 0; 4510 SMP_PRINTK(("Waiting for send to finish...\n")); 4511 do { 4512 SMP_PRINTK(("+")); 4513 udelay(100); 4514 send_status = apic_read(APIC_ICR) & 0x1000; 4515 } while (send_status && (timeout++ < 1000)); 4516 4517 /* Give the other CPU some time to accept the IPI. */ 4518 udelay(200); 4519 accept_status = (apic_read(APIC_ESR) & 0xEF); 4520 } 4521 SMP_PRINTK(("After Startup.\n")); 4522 4523 if (send_status) /* APIC never delivered?? */ 4524 printk("APIC never delivered???\n"); 4525 if (accept_status) /* Send accept error */ 4526 printk("APIC delivery error (%lx).\n",accept_status); 4527 4528 if ( !(send_status accept_status) ) 4529 { 4530 /* allow APs to start initializing. */ 4531 SMP_PRINTK(("Before Callout %d.\n", i)); 4532 set_bit(i, (unsigned long *)&cpu_callout_map[0]); 4533 SMP_PRINTK(("After Callout %d.\n", i)); 4534 4535 for(timeout=0;timeout<50000;timeout++) 4536 { 4537 if (cpu_callin_map[0]&(1<<i)) 4538 break; /* It has booted */ 4539 udelay(100); /* Wait 5s total for a response */ 4540 } 4541 if (cpu_callin_map[0]&(1<<i)) 4542 { 4543 /* # CPUs logically, starting from 1 (BSP is 0) */ 4544 #if 0 4545 cpu_number_map[i] = cpucount; 4546 __cpu_logical_map[cpucount] = i; 4547 #endif 4548 printk("OK.\n"); 4549 printk("CPU%d: ", i); 4550 print_cpu_info(&cpu_data[i]); 4551 } 4552 else 4553 { 4554 if (*((volatile unsigned char *)phys_to_virt(8192)) 4555 == 0xA5) 4556 printk("Stuck ??\n"); 4557 else 4558 printk("Not responding.\n"); 4559 } 4560 SMP_PRINTK(("CPU has booted.\n")); 4561 } 4562 else 4563 { 4564 __cpu_logical_map[cpucount] = -1; 4565 cpu_number_map[i] = -1; 4566 cpucount--; 4567 } 4568 4569 swapper_pg_dir[0]=maincfg; 4570 local_flush_tlb(); 4571 4572 /* mark "stuck" area as not stuck */ 4573 *((volatile unsigned long *)phys_to_virt(8192)) = 0; 4574 } 4575 4576 cycles_t cacheflush_time; 4577 extern unsigned long cpu_hz; 4578 4579 static void smp_tune_scheduling (void) 4580 { 4581 unsigned long cachesize; 4582 /* Rough estimation for SMP scheduling, this is the 4583 * number of cycles it takes for a fully memory-limited 4584 * process to flush the SMP-local cache. 4585 * 4586 * (For a P5 this pretty much means we will choose 4587 * another idle CPU almost always at wakeup time (this 4588 * is due to the small L1 cache), on PIIs it's around 4589 * 50-100 usecs, depending on the cache size) */ 4590 4591 if (!cpu_hz) { 4592 /* this basically disables processor-affinity 4593 * scheduling on SMP without a TSC. */ 4594 cacheflush_time = 0; 4595 return; 4596 } else { 4597 cachesize = boot_cpu_data.x86_cache_size; 4598 if (cachesize == -1) 4599 cachesize = 8; /* Pentiums */ 4600 4601 cacheflush_time = cpu_hz/1024*cachesize/5000; 4602 } 4603 4604 printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n", 4605 (long)cacheflush_time/(cpu_hz/1000000), 4606 ((long)cacheflush_time*100/(cpu_hz/1000000)) % 100); 4607 } 4608 4609 unsigned int prof_multiplier[NR_CPUS]; 4610 unsigned int prof_counter[NR_CPUS]; 4611 4612 /* Cycle through the processors, sending APIC IPIs to 4613 * boot each. */ 4614 void __init smp_boot_cpus(void) 4615 { 4616 int i; 4617 4618 #ifdef CONFIG_MTRR 4619 /* Must be done before other processors booted */ 4620 mtrr_init_boot_cpu (); 4621 #endif 4622 /* Initialize the logical to physical CPU number 4623 * mapping and the per-CPU profiling counter/multiplier 4624 */ 4625 4626 for (i = 0; i < NR_CPUS; i++) { 4627 cpu_number_map[i] = -1; 4628 prof_counter[i] = 1; 4629 prof_multiplier[i] = 1; 4630 } 4631 4632 /* Setup boot CPU information */ 4633 4634 /* Final full version of the data */ 4635 smp_store_cpu_info(boot_cpu_id); 4636 smp_tune_scheduling(); 4637 printk("CPU%d: ", boot_cpu_id); 4638 print_cpu_info(&cpu_data[boot_cpu_id]); 4639 4640 /* not necessary because the MP table should list the 4641 * boot CPU too, but we do it for the sake of 4642 * robustness anyway. (and for the case when a non-SMP 4643 * board boots an SMP kernel) */ 4644 cpu_present_map |= (1 << hard_smp_processor_id()); 4645 4646 cpu_number_map[boot_cpu_id] = 0; 4647 4648 /* If we couldnt find an SMP configuration at boot 4649 * time, get out of here now! */ 4650 if (!smp_found_config) 4651 { 4652 printk(KERN_NOTICE "SMP motherboard not detected. " 4653 "Using dummy APIC emulation.\n"); 4654 #ifndef CONFIG_VISWS 4655 io_apic_irqs = 0; 4656 #endif 4657 cpu_online_map = cpu_present_map; 4658 goto smp_done; 4659 } 4660 4661 /* If SMP should be disabled, really disable it! */ 4662 4663 if (!max_cpus) 4664 { 4665 smp_found_config = 0; 4666 printk(KERN_INFO "SMP mode deactivated, forcing use " 4667 "of dummy APIC emulation.\n"); 4668 } 4669 4670 #ifdef SMP_DEBUG 4671 { 4672 int reg; 4673 4674 /* This is to verify that we're looking at a real 4675 * local APIC. Check these against your board if the 4676 * CPUs aren't getting started for no apparent 4677 * reason. */ 4678 reg = apic_read(APIC_VERSION); 4679 SMP_PRINTK(("Getting VERSION: %x\n", reg)); 4680 4681 apic_write(APIC_VERSION, 0); 4682 reg = apic_read(APIC_VERSION); 4683 SMP_PRINTK(("Getting VERSION: %x\n", reg)); 4684 4685 /* The two version reads above should print the same 4686 * NON-ZERO!!! numbers. If the second one is zero, 4687 * there is a problem with the APIC write/read 4688 * definitions. 4689 * 4690 * The next two are just to see if we have sane 4691 * values. They're only really relevant if we're in 4692 * Virtual Wire compatibility mode, but most boxes 4693 * are anymore. */ 4694 reg = apic_read(APIC_LVT0); 4695 SMP_PRINTK(("Getting LVT0: %x\n", reg)); 4696 4697 reg = apic_read(APIC_LVT1); 4698 SMP_PRINTK(("Getting LVT1: %x\n", reg)); 4699 } 4700 #endif 4701 4702 enable_local_APIC(); 4703 4704 /* Set up our local APIC timer: */ 4705 setup_APIC_clock (); 4706 4707 /* Now scan the CPU present map and fire up the other 4708 * CPUs. */ 4709 4710 /* Add all detected CPUs. (later on we can down 4711 * individual CPUs which will change cpu_online_map but 4712 * not necessarily cpu_present_map. We are pretty much 4713 * ready for hot-swap CPUs.) */ 4714 cpu_online_map = cpu_present_map; 4715 mb(); 4716 4717 SMP_PRINTK(("CPU map: %lx\n", cpu_present_map)); 4718 4719 for(i=0;i<NR_CPUS;i++) 4720 { 4721 /* Don't even attempt to start the boot CPU! */ 4722 if (i == boot_cpu_id) 4723 continue; 4724 4725 if ((cpu_online_map & (1 << i)) 4726 && (max_cpus < 0 max_cpus > cpucount+1)) 4727 { 4728 do_boot_cpu(i); 4729 } 4730 4731 /* Make sure we unmap all failed CPUs */ 4732 4733 if (cpu_number_map[i] == -1 && 4734 (cpu_online_map & (1 << i))) { 4735 printk("CPU #%d not responding. " 4736 "Removing from cpu_online_map.\n", i); 4737 cpu_online_map &= ~(1 << i); 4738 } 4739 } 4740 4741 /* Cleanup possible dangling ends... */ 4742 #ifndef CONFIG_VISWS 4743 { 4744 unsigned long cfg; 4745 4746 /* Install writable page 0 entry. */ 4747 cfg = pg0[0]; 4748 /* writeable, present, addr 0 */ 4749 pg0[0] = _PAGE_RW | _PAGE_PRESENT; 4750 local_flush_tlb(); 4751 4752 /* Paranoid: Set warm reset code and vector here back 4753 * to default values. */ 4754 CMOS_WRITE(0, 0xf); 4755 4756 *((volatile long *) phys_to_virt(0x467)) = 0; 4757 4758 /* Restore old page 0 entry. */ 4759 pg0[0] = cfg; 4760 local_flush_tlb(); 4761 } 4762 #endif 4763 4764 /* Allow the user to impress friends. */ 4765 SMP_PRINTK(("Before bogomips.\n")); 4766 if (cpucount==0) 4767 { 4768 printk(KERN_ERR 4769 "Error: only one processor found.\n"); 4770 cpu_online_map = (1<<hard_smp_processor_id()); 4771 } 4772 else 4773 { 4774 unsigned long bogosum=0; 4775 for(i=0;i<32;i++) 4776 { 4777 if (cpu_online_map&(1<<i)) 4778 bogosum+=cpu_data[i].loops_per_sec; 4779 } 4780 printk(KERN_INFO "Total of %d processors activated " 4781 "(%lu.%02lu BogoMIPS).\n", 4782 cpucount+1, 4783 (bogosum+2500)/500000, 4784 ((bogosum+2500)/5000)%100); 4785 SMP_PRINTK(("Before bogocount - " 4786 "setting activated=1.\n")); 4787 smp_activated=1; 4788 smp_num_cpus=cpucount+1; 4789 } 4790 if (smp_b_stepping) 4791 printk(KERN_WARNING "WARNING: SMP operation may be " 4792 "unreliable with B stepping processors.\n"); 4793 SMP_PRINTK(("Boot done.\n")); 4794 4795 cache_APIC_registers(); 4796 #ifndef CONFIG_VISWS 4797 /* Here we can be sure that there is an IO-APIC in the 4798 * system. Let's go and set it up: */ 4799 if (!skip_ioapic_setup) 4800 setup_IO_APIC(); 4801 #endif 4802 4803 smp_done: 4804 } 4805 4806 4807 /* the following functions deal with sending IPIs between 4808 * CPUs. 4809 * 4810 * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.*/ 4811 4812 4813 /* Silly serialization to work around CPU bug in P5s. We 4814 * can safely turn it off on a 686. */ 4815 #ifdef CONFIG_X86_GOOD_APIC 4816 # define FORCE_APIC_SERIALIZATION 0 4817 #else 4818 # define FORCE_APIC_SERIALIZATION 1 4819 #endif 4820 4821 static unsigned int cached_APIC_ICR; 4822 static unsigned int cached_APIC_ICR2; 4823 4824 /* Caches reserved bits, APIC reads are (mildly) 4825 * expensive and force otherwise unnecessary CPU 4826 * synchronization. (We could cache other APIC registers 4827 * too, but these are the main ones used in RL.) */ 4828 #define slow_ICR (apic_read(APIC_ICR) & ~0xFDFFF) 4829 #define slow_ICR2 (apic_read(APIC_ICR2) & 0x00FFFFFF) 4830 4831 void cache_APIC_registers (void) 4832 { 4833 cached_APIC_ICR = slow_ICR; 4834 cached_APIC_ICR2 = slow_ICR2; 4835 mb(); 4836 } 4837 4838 static inline unsigned int __get_ICR (void) 4839 { 4840 #if FORCE_APIC_SERIALIZATION 4841 /* Wait for the APIC to become ready - this should 4842 * never occur. It's a debugging check really. */ 4843 int count = 0; 4844 unsigned int cfg; 4845 4846 while (count < 1000) 4847 { 4848 cfg = slow_ICR; 4849 if (!(cfg&(1<<12))) { 4850 if (count) 4851 atomic_add(count, (atomic_t*)&ipi_count); 4852 return cfg; 4853 } 4854 count++; 4855 udelay(10); 4856 } 4857 printk("CPU #%d: previous IPI still not cleared " 4858 "after 10mS\n", smp_processor_id()); 4859 return cfg; 4860 #else 4861 return cached_APIC_ICR; 4862 #endif 4863 } 4864 4865 static inline unsigned int __get_ICR2 (void) 4866 { 4867 #if FORCE_APIC_SERIALIZATION 4868 return slow_ICR2; 4869 #else 4870 return cached_APIC_ICR2; 4871 #endif 4872 } 4873 4874 static inline int __prepare_ICR (unsigned int shortcut, 4875 int vector) 4876 { 4877 unsigned int cfg; 4878 4879 cfg = __get_ICR(); 4880 cfg |= APIC_DEST_DM_FIXED|shortcut|vector; 4881 4882 return cfg; 4883 } 4884 4885 static inline int __prepare_ICR2 (unsigned int dest) 4886 { 4887 unsigned int cfg; 4888 4889 cfg = __get_ICR2(); 4890 cfg |= SET_APIC_DEST_FIELD(dest); 4891 4892 return cfg; 4893 } 4894 4895 static inline void 4896 __send_IPI_shortcut(unsigned int shortcut, int vector) 4897 { 4898 unsigned int cfg; 4899 /* Subtle. In the case of the 'never do double writes' 4900 * workaround we have to lock out interrupts to be 4901 * safe. Otherwise it's just one single atomic write to 4902 * the APIC, no need for cli/sti. */ 4903 #if FORCE_APIC_SERIALIZATION 4904 unsigned long flags; 4905 4906 __save_flags(flags); 4907 __cli(); 4908 #endif 4909 4910 /* No need to touch the target chip field */ 4911 4912 cfg = __prepare_ICR(shortcut, vector); 4913 4914 /* Send the IPI. The write to APIC_ICR 4915 * fires this off. */ 4916 apic_write(APIC_ICR, cfg); 4917 #if FORCE_APIC_SERIALIZATION 4918 __restore_flags(flags); 4919 #endif 4920 } 4921 4922 static inline void send_IPI_allbutself(int vector) 4923 { 4924 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); 4925 } 4926 4927 static inline void send_IPI_all(int vector) 4928 { 4929 __send_IPI_shortcut(APIC_DEST_ALLINC, vector); 4930 } 4931 4932 void send_IPI_self(int vector) 4933 { 4934 __send_IPI_shortcut(APIC_DEST_SELF, vector); 4935 } 4936


4937 static inline void send_IPI_single( int dest, int vector) 4938 { 4939 unsigned long cfg; 4940 #if FORCE_APIC_SERIALIZATION 4941 unsigned long flags; 4942 4943 __save_flags(flags); 4944 __cli(); 4945 #endif 4946 4947 /* prepare target chip field */ 4948 4949 cfg = __prepare_ICR2(dest); 4950 apic_write(APIC_ICR2, cfg); 4951 4952 /* program the ICR*/ 4953 cfg = __prepare_ICR(0, vector); 4954 4955 /* Send the IPI. The write to APIC_ICR fires this off. 4956 */ 4957 apic_write(APIC_ICR, cfg); 4958 #if FORCE_APIC_SERIALIZATION 4959 __restore_flags(flags); 4960 #endif 4961 } 4962 4963 /* This is fraught with deadlocks. Probably the situation 4964 * is not that bad as in the early days of SMP, so we 4965 * might ease some of the paranoia here. */ 4966 4967 void smp_flush_tlb(void) 4968 { 4969 int cpu = smp_processor_id(); 4970 int stuck; 4971 unsigned long flags; 4972 4973 /* it's important that we do not generate any APIC 4974 * traffic until the AP CPUs have booted up! */ 4975 if (cpu_online_map) { 4976 /* The assignment is safe because it's volatile so 4977 * the compiler cannot reorder it, because the i586 4978 * has strict memory ordering and because only the 4979 * kernel lock holder may issue a tlb flush. If you 4980 * break any one of those three change this to an 4981 * atomic bus locked or. */ 4982 4983 smp_invalidate_needed = cpu_online_map; 4984 4985 /* Processors spinning on some lock with IRQs 4986 * disabled will see this IRQ late. The 4987 * smp_invalidate_needed map will ensure they don't 4988 * do a spurious flush tlb or miss one. */ 4989 4990 __save_flags(flags); 4991 __cli(); 4992 4993 send_IPI_allbutself(INVALIDATE_TLB_VECTOR); 4994 4995 /* Spin waiting for completion */ 4996 stuck = 50000000; 4997 while (smp_invalidate_needed) { 4998 /* Take care of "crossing" invalidates */ 4999 if (test_bit(cpu, &smp_invalidate_needed)) 5000 clear_bit(cpu, &smp_invalidate_needed); 5001 --stuck; 5002 if (!stuck) { 5003 printk("stuck on TLB IPI wait (CPU#%d)\n",cpu); 5004 break; 5005 } 5006 } 5007 __restore_flags(flags); 5008 } 5009 5010 /* Flush the local TLB */ 5011 local_flush_tlb(); 5012 } 5013 5014 5015 /* this function sends a 'reschedule' IPI to another CPU. 5016 * it goes straight through and wastes no time 5017 * serializing anything. Worst case is that we lose a 5018 * reschedule ... */



5019 void smp_send_reschedule(int cpu) 5020 { 5021 send_IPI_single(cpu, RESCHEDULE_VECTOR); 5022 } 5023 5024 /* this function sends a 'stop' IPI to all other CPUs in 5025 * the system. it goes straight through. */ 5026 void smp_send_stop(void) 5027 { 5028 send_IPI_allbutself(STOP_CPU_VECTOR); 5029 } 5030 5031 /* this function sends an 'reload MTRR state' IPI to all 5032 * other CPUs in the system. it goes straight through, 5033 * completion processing is done on the mttr.c level. */ 5034 void smp_send_mtrr(void) 5035 { 5036 send_IPI_allbutself(MTRR_CHANGE_VECTOR); 5037 } 5038 5039 /* Local timer interrupt handler. It does both profiling 5040 * and process statistics/rescheduling. 5041 * 5042 * We do profiling in every local tick, 5043 * statistics/rescheduling happen only every 'profiling 5044 * multiplier' ticks. The default multiplier is 1 and it 5045 * can be changed by writing the new multiplier value 5046 * into /proc/profile. */ 5047 void smp_local_timer_interrupt(struct pt_regs * regs) 5048 { 5049 int cpu = smp_processor_id(); 5050 5051 /* The profiling function is SMP safe. (nothing can 5052 * mess around with "current", and the profiling 5053 * counters are updated with atomic operations). This 5054 * is especially useful with a profiling 5055 * multiplier != 1 */ 5056 if (!user_mode(regs)) 5057 x86_do_profile(regs->eip); 5058

5059 if (!--prof_counter[cpu]) { 5060 int user=0,system=0; 5061 struct task_struct * p = current; 5062 5063 /* After doing the above, we need to make like a 5064 * normal interrupt - otherwise timer interrupts 5065 * ignore the global interrupt lock, which is the 5066 * WrongThing (tm) to do. */ 5067 5068 if (user_mode(regs)) 5069 user=1; 5070 else 5071 system=1; 5072 5073 irq_enter(cpu, 0); 5074 if (p->pid) { 5075 update_one_process(p, 1, user, system, cpu); 5076 5077 p->counter -= 1; 5078 if (p->counter < 0) { 5079 p->counter = 0; 5080 p->need_resched = 1; 5081 } 5082 if (p->priority < DEF_PRIORITY) { 5083 kstat.cpu_nice += user; 5084 kstat.per_cpu_nice[cpu] += user; 5085 } else { 5086 kstat.cpu_user += user; 5087 kstat.per_cpu_user[cpu] += user; 5088 } 5089 5090 kstat.cpu_system += system; 5091 kstat.per_cpu_system[cpu] += system; 5092 5093 } 5094 prof_counter[cpu]=prof_multiplier[cpu]; 5095 irq_exit(cpu, 0); 5096 } 5097 5098 /* We take the 'long' return path, and there every 5099 * subsystem grabs the apropriate locks (kernel lock/ 5100 * irq lock). 5101 * 5102 * we might want to decouple profiling from the 'long 5103 * path', and do the profiling totally in assembly. 5104 * 5105 * Currently this isn't too much of an issue 5106 * (performance wise), we can take more than 100K local 5107 * irqs per second on a 100 MHz P5. */ 5108 } 5109 5110 /* Local APIC timer interrupt. This is the most natural 5111 * way for doing local interrupts, but local timer 5112 * interrupts can be emulated by broadcast interrupts 5113 * too. [in case the hw doesnt support APIC timers] 5114 * 5115 * [ if a single-CPU system runs an SMP kernel then we 5116 * call the local interrupt as well. Thus we cannot 5117 * inline the local irq ... ] */ 5118 void smp_apic_timer_interrupt(struct pt_regs * regs) 5119 { 5120 /* NOTE! We'd better ACK the irq immediately, because 5121 * timer handling can be slow, and we want to be able 5122 * to accept NMI tlb invalidates during this time. */ 5123 ack_APIC_irq(); 5124 smp_local_timer_interrupt(regs); 5125 } 5126 5127 /* Reschedule call back. Nothing to do, all the work is 5128 * done automatically when we return from the interrupt. 5129 */ 5130 asmlinkage void smp_reschedule_interrupt(void) 5131 { 5132 ack_APIC_irq(); 5133 } 5134 5135 /* Invalidate call-back */ 5136 asmlinkage void smp_invalidate_interrupt(void) 5137 { 5138 if (test_and_clear_bit(smp_processor_id(), 5139 &smp_invalidate_needed)) 5140 local_flush_tlb(); 5141 5142 ack_APIC_irq(); 5143 } 5144 5145 static void stop_this_cpu (void) 5146 { 5147 /* Remove this CPU: */ 5148 clear_bit(smp_processor_id(), &cpu_online_map); 5149 5150 if (cpu_data[smp_processor_id()].hlt_works_ok) 5151 for(;;) __asm__("hlt"); 5152 for (;;); 5153 } 5154 5155 /* CPU halt call-back */ 5156 asmlinkage void smp_stop_cpu_interrupt(void) 5157 { 5158 stop_this_cpu(); 5159 } 5160 5161 void (*mtrr_hook) (void) = NULL; 5162 5163 asmlinkage void smp_mtrr_interrupt(void) 5164 { 5165 ack_APIC_irq(); 5166 if (mtrr_hook) (*mtrr_hook)(); 5167 } 5168 5169 /* This interrupt should _never_ happen with our APIC/SMP 5170 * architecture */ 5171 asmlinkage void smp_spurious_interrupt(void) 5172 { 5173 ack_APIC_irq(); 5174 /* see sw-dev-man vol 3, chapter 7.4.13.5 */ 5175 printk("spurious APIC interrupt on CPU#%d, " 5176 "should never happen.\n", smp_processor_id()); 5177 } 5178 5179 /* This part sets up the APIC 32 bit clock in LVTT1, with 5180 * HZ interrupts per second. We assume that the caller 5181 * has already set up the local APIC. 5182 * 5183 * The APIC timer is not exactly sync with the external 5184 * timer chip, it closely follows bus clocks. */ 5185 5186 /* The timer chip is already set up at HZ interrupts per 5187 * second here, but we do not accept timer interrupts 5188 * yet. We only allow the BP to calibrate. */ 5189 static unsigned int __init get_8254_timer_count(void) 5190 { 5191 unsigned int count; 5192 5193 outb_p(0x00, 0x43); 5194 count = inb_p(0x40); 5195 count |= inb_p(0x40) << 8; 5196 5197 return count; 5198 } 5199 5200 /* This function sets up the local APIC timer, with a 5201 * timeout of 'clocks' APIC bus clock. During calibration 5202 * we actually call this function twice, once with a 5203 * bogus timeout value, second time for real. The other 5204 * (noncalibrating) CPUs call this function only once, 5205 * with the real value. 5206 * 5207 * We are strictly in irqs off mode here, as we do not 5208 * want to get an APIC interrupt go off accidentally. 5209 * 5210 * We do reads before writes even if unnecessary, to get 5211 * around the APIC double write bug. */ 5212 #define APIC_DIVISOR 16 5213 5214 void setup_APIC_timer(unsigned int clocks) 5215 { 5216 unsigned long lvtt1_value; 5217 unsigned int tmp_value; 5218 5219 /* Unfortunately the local APIC timer cannot be set up 5220 * into NMI mode. With the IO APIC we can re-route the 5221 * external timer interrupt and broadcast it as an NMI 5222 * to all CPUs, so no pain. */ 5223 tmp_value = apic_read(APIC_LVTT); 5224 lvtt1_value = APIC_LVT_TIMER_PERIODIC | 5225 LOCAL_TIMER_VECTOR; 5226 apic_write(APIC_LVTT , lvtt1_value); 5227 5228 /* Divide PICLK by 16 */ 5229 tmp_value = apic_read(APIC_TDCR); 5230 apic_write(APIC_TDCR , (tmp_value & ~APIC_TDR_DIV_1 ) 5231 | APIC_TDR_DIV_16); 5232 5233 tmp_value = apic_read(APIC_TMICT); 5234 apic_write(APIC_TMICT, clocks/APIC_DIVISOR); 5235 } 5236 5237 void __init wait_8254_wraparound(void) 5238 { 5239 unsigned int curr_count, prev_count=~0; 5240 int delta; 5241 5242 curr_count = get_8254_timer_count(); 5243 5244 do { 5245 prev_count = curr_count; 5246 curr_count = get_8254_timer_count(); 5247 delta = curr_count-prev_count; 5248 5249 /* This limit for delta seems arbitrary, but it 5250 * isn't, it's slightly above the level of error a 5251 * buggy Mercury/Neptune chipset timer can cause. */ 5252 } while (delta<300); 5253 } 5254 5255 /* In this function we calibrate APIC bus clocks to the 5256 * external timer. Unfortunately we cannot use jiffies 5257 * and the timer irq to calibrate, since some later 5258 * bootup code depends on getting the first irq? Ugh. 5259 * 5260 * We want to do the calibration only once since we want 5261 * to have local timer irqs syncron. CPUs connected by 5262 * the same APIC bus have the very same bus frequency. 5263 * And we want to have irqs off anyways, no accidental 5264 * APIC irq that way. */ 5265 5266 int __init calibrate_APIC_clock(void) 5267 { 5268 unsigned long long t1,t2; 5269 long tt1,tt2; 5270 long calibration_result; 5271 int i; 5272 5273 printk("calibrating APIC timer ... "); 5274 5275 /* Put whatever arbitrary (but long enough) timeout 5276 * value into the APIC clock, we just want to get the 5277 * counter running for calibration. */ 5278 setup_APIC_timer(1000000000); 5279 5280 /* The timer chip counts down to zero. Let's wait for a 5281 * wraparound to start exact measurement: (the current 5282 * tick might have been already half done) */ 5283 5284 wait_8254_wraparound (); 5285 5286 /* We wrapped around just now. Let's start: */ 5287 READ_TSC(t1); 5288 tt1=apic_read(APIC_TMCCT); 5289 5290 #define LOOPS (HZ/10) 5291 /* Let's wait LOOPS wraprounds: */ 5292 for (i=0; i<LOOPS; i++) 5293 wait_8254_wraparound (); 5294 5295 tt2=apic_read(APIC_TMCCT); 5296 READ_TSC(t2); 5297 5298 /* The APIC bus clock counter is 32 bits only, it might 5299 * have overflown, but note that we use signed longs, 5300 * thus no extra care needed. 5301 * 5302 * underflown to be exact, as the timer counts down ;) 5303 */ 5304 5305 calibration_result = (tt1-tt2)*APIC_DIVISOR/LOOPS; 5306 5307 SMP_PRINTK(("\n..... %ld CPU clocks in 1 timer chip " 5308 "tick.", (unsigned long)(t2-t1)/LOOPS)); 5309 5310 SMP_PRINTK(("\n..... %ld APIC bus clocks in 1 timer " 5311 "chip tick.", calibration_result)); 5312 5313 printk("\n..... CPU clock speed is %ld.%04ld MHz.\n", 5314 ((long)(t2-t1)/LOOPS)/(1000000/HZ), 5315 ((long)(t2-t1)/LOOPS)%(1000000/HZ)); 5316 5317 printk("..... system bus clock speed is %ld.%04ld " 5318 "MHz.\n", 5319 calibration_result/(1000000/HZ), 5320 calibration_result%(1000000/HZ) ); 5321 #undef LOOPS 5322 5323 return calibration_result; 5324 } 5325 5326 static unsigned int calibration_result; 5327 5328 void __init setup_APIC_clock(void) 5329 { 5330 unsigned long flags; 5331 5332 static volatile int calibration_lock; 5333 5334 __save_flags(flags); 5335 __cli(); 5336 5337 SMP_PRINTK(("setup_APIC_clock() called.\n")); 5338 5339 /* [ setup_APIC_clock() is called from all CPUs, but we 5340 * want to do this part of the setup only once ... and 5341 * it fits here best ] */ 5342 if (!test_and_set_bit(0,&calibration_lock)) { 5343 5344 calibration_result=calibrate_APIC_clock(); 5345 /* Signal completion to the other CPU[s]: */ 5346 calibration_lock = 3; 5347 5348 } else { 5349 /* Other CPU is calibrating, wait for finish: */ 5350 SMP_PRINTK(("waiting for other CPU " 5351 "calibrating APIC ... ")); 5352 while (calibration_lock == 1); 5353 SMP_PRINTK(("done, continuing.\n")); 5354 } 5355 5356 /* Now set up the timer for real. */ 5357 setup_APIC_timer (calibration_result); 5358 5359 /* We ACK the APIC, just in case there is something 5360 * pending. */ 5361 ack_APIC_irq (); 5362 5363 __restore_flags(flags); 5364 } 5365 5366 /* the frequency of the profiling timer can be changed by 5367 * writing a multiplier value into /proc/profile. 5368 * 5369 * usually you want to run this on all CPUs ;) */ 5370 int setup_profiling_timer(unsigned int multiplier) 5371 { 5372 int cpu = smp_processor_id(); 5373 unsigned long flags; 5374 5375 /* Sanity check. [at least 500 APIC cycles should be 5376 * between APIC interrupts as a rule of thumb, to avoid 5377 * irqs flooding us] */ 5378 if ( (!multiplier) 5379 (calibration_result/multiplier < 500)) 5380 return -EINVAL; 5381 5382 save_flags(flags); 5383 cli(); 5384 setup_APIC_timer(calibration_result/multiplier); 5385 prof_multiplier[cpu]=multiplier; 5386 restore_flags(flags); 5387 5388 return 0; 5389 } 5390 5391 #undef APIC_DIVISOR 5392


Содержание раздела