#module LAT$RATING_CALC "V1.0-003" /* ** // ************************************************************************* // * * // * © Copyright 2003 Hewlett-Packard Development Company, L.P. * // * * // * Confidential computer software. Valid license from HP and/or * // * its subsidiaries required for possession, use, or copying. * // * * // * Consistent with FAR 12.211 and 12.212, Commercial Computer Software, * // * Computer Software Documentation, and Technical Data for Commercial * // * Items are licensed to the U.S. Government under vendor's standard * // * commercial license. * // * * // * Neither HP nor any of its subsidiaries shall be liable for technical * // * or editorial errors or omissions contained herein. The information * // * in this document is provided "as is" without warranty of any kind and * // * is subject to change without notice. The warranties for HP products * // * are set forth in the express limited warranty statements accompanying * // * such products. Nothing herein should be construed as constituting an * // * additional warranty. * // * * // ************************************************************************* */ /* **++ ** FACILITY: ** ** LAT Driver - rating calculation image extension. ** ** MODULE DESCRIPTION: ** ** This module contains the guts of the LAT rating calculation. ** It must contain code only - all global storage should be placed ** in LAT$RATING_DPT.MAR and referred to from here. The code ** contained in this module has the default LAT rating calculation ** that has been used by LTDRIVER since OpenVMS VAX V5.3 and ** OpenVMS AXP V1.0. It is designed to allow site specific ** modifications in the event that the default rating algorithm ** is insufficient. ** ** System programmers can tailor the calculations of the LAT ** rating as they see fit for their environment. This module ** allows for easier customization of the LAT rating. ** ** AUTHORS: ** ** Michael D. Raspuzzi ** ** CREATION DATE: 13-May-1994 ** ** DESIGN ISSUES: ** ** This image uses an OpenVMS driver layout. Because of the ** lack of PSECT control with the C compiler, no global storage ** should be declared and reference in this module. All global ** storage should be maintained in the MACRO module. There is ** one global variable that has special meaning. It defines the ** end of the rating image that is loaded by SYSGEN. Do not ** modify the use of that single global variable. ** ** The created LAT rating image must be present in SYS$LOADABLE_IMAGES ** and it is loaded by SYSGEN during LAT configuration on the ** system (typically done in LAT$CONFIG.COM). This image can be ** reloaded providing LAT is first stopped on the local node. ** ** This module requires DEC V4.0 or higher for compilation. ** However, it is also possible to compile this module with ** VAX C (V3.2). ** ** Since both DEC C and VAX C have no provisions for accessing ** OpenVMS locking (like spinlocks) and operating system ** synchronization, the routines that require these are called ** from initial entry points written in MACRO. ** ** WARNING! WARNING! WARNING! ** ** DO NOT CALL ANY C RTL FUNCTIONS FROM THIS MODULE!!! The ** C run time library functions are contained in a seperate ** (shareable) image. Since this module is used to produce ** an OpenVMS device driver, it is not possible (and very ** dangerous) to call code in a shareable image (like the C ** run time library) from driver context. If you must use ** C RTL functions, you will have to create your own version ** and reference it. ** ** MODIFICATION HISTORY: ** ** V1.0-003 Guy Peleg 24-Mar-2004 ** 50-BIT PA project: Change SCH$GL_FREECNT to ** SCH$GI_FREECNT. Promote penalty to 64 bit. ** update copyright. ** ** V1.0-002 Guy Peleg 04-Aug-2002 ** Fix conditionalization for IPF port. ** ** V1.0-001 Michael D. Raspuzzi 13-May-1994 ** Module creation and routine additions. **-- */ /* ** ** INCLUDE FILES ** */ #ifdef VAX /* Verified for IA64 port - Guy Peleg */ /* ** On VAX, there is currently no support for system data ** structure definitions (like what is found in LIB). ** Therefore, we must manually define any data structure ** we need to reference. */ /*+ */ /* Wait queue header definitions */ /*- */ #define WQH$K_LENGTH 12 /* Length of wait queue header */ #define WQH$C_LENGTH 12 /* Length of wait queue header */ #define WQH$S_WQHDEF 12 typedef struct _wqh { struct _wqh *wqh$l_wqfl; /* Head or forward link */ struct _wqh *wqh$l_wqbl; /* Tail or backward link */ unsigned short int wqh$w_wqcnt; /* Wait queue count */ unsigned short int wqh$w_wqstate; /* State number for wait */ } WQH; #endif /* ** ** MACRO DEFINITIONS ** */ #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MAX(a, b) (((a) > (b)) ? (a) : (b)) /* ** External data storage references. */ globalref unsigned long int LAT$LOAD_AVERAGE; globalref long int LAT$RATING_VALUE; globalref unsigned short int SYS$GW_IJOBLIM; globalref unsigned long int SGN$GL_FREEGOAL; globalref unsigned __int64 SCH$GI_FREECNT; #ifndef VAX /* Verified for IA64 port - Guy Peleg */ globalref unsigned long int SYS$GL_IJOBCNT; unsigned long int SCH$CALC_CPU_LOAD (); #else globalref unsigned short int SYS$GW_IJOBCNT; globalref WQH SCH$GQ_COLPGWQ; globalref WQH SCH$GQ_MWAIT; globalref WQH SCH$GQ_PFWQ; globalref WQH SCH$GQ_FPGWQ; unsigned long int LAT$COUNT_COM_PCBS (); unsigned long int LAT$COUNT_COMO_PCBS (); #endif /* ** Global data storage. NOTE: Only 1 variable exists here to ** indicate the image end point. Do not change this and do not ** define any global variables in this module! */ /* ** Forces LAT$RATING_END into the $DATA psect */ globaldef unsigned long int LAT$RATING_END; /* **++ ** FUNCTIONAL DESCRIPTION: ** ** This routine is called to calculate the CPU load. That is ** the number of processes in compute bound wait states. This routine ** assumes the caller has obtained the SCHED spinlock. This routine ** is not called on Alpha systems. Routine SCH$CALC_CPU_LOAD is ** used. ** ** FORMAL PARAMETERS: ** ** None ** ** RETURN VALUE: ** ** Number of processes in computable wait states ** ** SIDE EFFECTS: ** ** None ** **-- */ #ifdef VAX /* Verified for IA64 port - Guy Peleg */ unsigned int LAT$RATING_CALC_LOAD () { unsigned long int com_processes; /* ** First add up all the jobs in the collided page wait queue, ** the mutex wait queue, the page fault wait queue and the ** free page wait queue. */ #ifndef VAX /* Verified for IA64 port - Guy Peleg */ com_processes = SCH$GQ_COLPGWQ->wqh$l_wqcnt; com_processes += SCH$GQ_MWAIT->wqh$l_wqcnt; com_processes += SCH$GQ_PFWQ->wqh$l_wqcnt; com_processes += SCH$GQ_FPGWQ->wqh$l_wqcnt; #else com_processes = SCH$GQ_COLPGWQ.wqh$w_wqcnt; com_processes += SCH$GQ_MWAIT.wqh$w_wqcnt; com_processes += SCH$GQ_PFWQ.wqh$w_wqcnt; com_processes += SCH$GQ_FPGWQ.wqh$w_wqcnt; #endif /* ** Next, call a macro routine to calculate the number of ** processes in the COM state that are higher than DEFPRI. */ com_processes += LAT$COUNT_COM_PCBS (); /* ** Last, call another macro routine to figure in the ** number of processes in the COMO state that are ** higher than DEFPRI. */ com_processes += LAT$COUNT_COMO_PCBS (); /* ** Return the number of computable processes to the ** caller so a new load average can be calculated. */ return (com_processes); } #endif /* **++ ** FUNCTIONAL DESCRIPTION: ** ** This routine is called to calculate the new CPU load. That is ** the number of processes in compute bound wait states (but it takes ** in account the previous load average value and feeds the new value ** in slowly to smooth out compute spikes) This routine assumes ** the caller has obtained the SCHED spinlock. This routine is called ** once per second. ** ** FORMAL PARAMETERS: ** ** current_cpu_load - unsigned longword indicating the current ** load average (used in calculating the new one) ** ** RETURN VALUE: ** ** None ** ** SIDE EFFECTS: ** ** New load average computed and stored in LAT$LOAD_AVERAGE ** **-- */ void LAT$NEW_LOAD_CALC (current_cpu_load) unsigned long int current_cpu_load; { /* ** A long time ago, LTDRIVER only calculated the load ** average every 5 seconds. This guaranteed a decay rate ** of approximately 60 to 70 seconds before an idle system ** would reach a load average of 0 from a value of 100. ** Back then, the new load was calculated to be 90% ** of the previous value of the load average plus 10% ** of the currently calculated CPU load. Now that the ** load average is calculated once per second, the precentages ** have changed (in order to maintain the same decay ** rate). This decay rate also works to smooth out any ** spikes at any given instant on the system of compute ** bound processes. When the load is calculated every ** second, the new percentages are 97.91% (1003 / 1024) for ** the current load average and 2.09% of (21 / 1024) the newly ** calculated load average. ** ** To maintain the same decay rate, the following formula is ** used: ** OLD_AVG_PCT = exp ((12/n) * ln(.9)) ** NEW_AVG_PCT = 1 - OLD_AVG_PCT ** ** Where: ** "exp" is the standard FORTRAN exp() function and ** "ln" is the natural log function. "n" is the ** number of samples per minute to be taken. */ LAT$LOAD_AVERAGE = ((1003 * LAT$LOAD_AVERAGE) + (21 * (100 * current_cpu_load))) / 1024; } /* **++ ** FUNCTIONAL DESCRIPTION: ** ** This routine is called directly by LTDRIVER any time it needs ** to know the current LAT rating. Typically, this is done when it ** is time to transmit a LAT service announcement message. Normally, ** this is done when the LAT multicast timer expires but LTDRIVER ** may decide to transmit a service announcement shortly after a user ** logs in or logs out. This is done when users log in/out to ** expedite the new rating to other LAT nodes. ** ** The logic contained in this routine used to be in LTDRIVER. It ** is placed here (since it is the crux of the LAT rating algorithm) ** in the event that the default rating algorithm is not sufficient ** for a specific system. ** ** FORMAL PARAMETERS: ** ** cpu_rating - unsigned longword between 0 and 100. A non-0 value ** indicates the system manager has set a /CPU_RATING ** with LATCP and this is used in the rating calculation ** accordingly. ** ** RETURN VALUE: ** ** LAT rating ** ** SIDE EFFECTS: ** ** New load average computed and stored in LAT$RATING_VALUE ** **-- */ unsigned int LAT$CALCULATE_RATING (cpu_rating) unsigned long cpu_rating; { register long int rating, load; register __int64 penalty; /* ** The LAT rating is calculated using system load average and number of ** free job slots: ** ** 20 * (IJOBLIM - IJOBCNT) min(235,IJOBLIM) * 100 ** RATING = ------------------------ + ---------------------- ** IJOBLIM (100 + LOAD_AVERAGE) ** ** LOAD_AVERAGE is a quantity that equals 100 if there is an average of ** 1 job waiting in a computable queue (200 if 2 jobs, etc.) and is ** a moving average taken every second. ** ** The first term in the above formula is dubbed the "AVAILABILITY" term, ** and represents the fraction of free job slots available. The second ** term is known as the "LOAD" term, and varies according to system load. ** The "LOAD" term can contribute up to 235 points to the LAT rating ** and the "AVAILABILITY" can contribute up to 20 points. ** ** The factor "min(235,IJOBLIM)" may be changed by the LATCP command ** "SET NODE/CPU_RATING=nnn". The quantity "nnn" represents the system ** manager's estimate of relative CPU power. Its value can range ** from 1 (for a low power CPU) to 100 (for the highest power CPU which ** offers the given LAT service). This range is scaled up to produce a ** factor which ranges from 1 to 235 in order to fit in the above formula. ** This routine does that scaling. Note, a CPU rating of 0 indicates that ** the default factor of "min(235,IJOBLIM)" is to be used. ** ** In addition to the above terms, there may be a term which represents ** a penalty of up to 40 rating points if the system is low on free ** memory. This term is: ** ** (FREEGOAL + 2048 - FREECNT) ** PENALTY = 40 * --------------------------- ** (FREEGOAL + 2048) ** ** This term is SUBTRACTED from the rating ONLY IF it is positive. If ** FREECNT is high enough, this term is not used. ** */ rating = SYS$GW_IJOBLIM; if (rating != 0) { /* ** Have a job limit set. Subtract out ** current job count. If system has all ** jobs consumed, set rating to 0. */ #ifndef VAX /* Verified for IA64 port - Guy Peleg */ rating -= SYS$GL_IJOBCNT; #else rating -= SYS$GW_IJOBCNT; #endif if (rating <= 0) rating = 0; else { /* ** Not all jobs in use, calculate the rating using the ** formula. Up to 235 points for load and 20 points for ** availability. If the system manager has set a CPU ** rating with LATCP, LTDRIVER will pass in the set value ** to this routine as a non-0 value. The /CPU_RATING is ** then scaled to 235. */ if (cpu_rating != 0) load = (cpu_rating * 235) / 100; else load = MIN (235, SYS$GW_IJOBLIM); rating = ((20 * rating) / SYS$GW_IJOBLIM) + ((100 * load) / (100 + LAT$LOAD_AVERAGE)); /* ** Calculate memory penalty and apply it if necessary. */ penalty = (2048 + SGN$GL_FREEGOAL - SCH$GI_FREECNT); if (penalty > 0) { /* ** To prevent the rating from going negative, we ** test if the memory penalty drops it to below 0. ** If so, the LAT rating is set to 1. Invoking ** the memory penalty and getting a negative rating ** is bad because the value is treated as an unsigned ** byte (which could be a very high - mistaken - rating). */ rating -= ((40 * penalty) / (SGN$GL_FREEGOAL + 2048)); rating = MAX (1, rating); } } } /* ** LAT rating is calculated. Store it internally and ** return the value to LTDRIVER. */ LAT$RATING_VALUE = rating; return (rating); }