5th set of fixes for apache-fastcgi-2.0b1

Sonya Rikhtverchik (rikhtver@OpenMarket.com)
Thu, 04 Sep 1997 10:01:41 -0400

Message-Id: <199709041401.KAA02189@u4-138.openmarket.com>
To: fastcgi-developers@OpenMarket.com
Subject: 5th set of fixes for apache-fastcgi-2.0b1
Date: Thu, 04 Sep 1997 10:01:41 -0400
From: Sonya Rikhtverchik <rikhtver@OpenMarket.com>


Date: Tue, 2 Sep 1997 15:25:20 -0400 (EDT)
Message-Id: <PAA23653.199709021925@bastille.va.pubnix.com>
From: "David J. MacKenzie" <djm@va.pubnix.com>
To: fastcgi-developers@openmarket.com
cc: djm@va.pubnix.com, Jonathan Roy <roy@atlantic.net>
Subject: 5th set of fixes for apache-fastcgi-2.0b1

This patch fixes the problems noted below.

Problems fixed by this patch, which should be applied after the others
I've sent:

1. Fix gcc -Wall warnings.

2. Better document the semantics of some variables and constants.

3. Fix some memory and file descriptor leaks.

4. Make the blocking kill of a server closer to working.

5. Fixes to calculations in dynamic application management.

After this patch, my version of mod_fastcgi mostly works for dynamic
applications.  There is a race condition I haven't figured out how to
fix yet.  The process manager sometimes kills an application process
that either apache is currently using to handle a request, or apache
has just chosen to use (based on a lock file existing) but hasn't yet
tried to contact (with connect()).  This results in a "500 Server
Error"; I see it about 1 in every 20 FastCGI requests when using
FCGIConfig parameters that kill processes rather aggressively.  The
other bug I haven't fixed yet is that if the server is not extremely
busy, and the FastCGI applications don't take a long time to process
requests, almost any set of FCGIConfig parameters results in short
application lifetimes (less than 3 minutes), because the calculated
load is almost always less than 1, on a scale of 0-100.  I'm
considering adding a logarithmic weighting to the load, but haven't
figured out how yet.

- --- mod_fastcgi.c	1997/08/27 22:26:28	1.5
+++ mod_fastcgi.c	1997/09/02 19:06:46
@@ -117,6 +117,9 @@
 #include "http_conf_globals.h"
 #include "md5.h"
 
+/* Can't include "util_md5.h" here without compiler errors... */
+char *md5(pool *a, unsigned char *string);
+
 #define TRUE 1
 #define FALSE 0
 #define ASSERT assert
@@ -138,6 +141,7 @@
     }
 }
 
+#if 0
 static char *StringCopy(char *str)
 {
     int strLen = strlen(str);
@@ -146,6 +150,7 @@
     newString[strLen] = '\000';
     return newString;
 }
+#endif
 
 static char *StrError(int errorCode)
 {
@@ -1926,7 +1931,12 @@
 					    * totalConnTime variable */
 #define FCGI_DEFAULT_GAIN 0.5              /* value used as an exponent in the
 					    * calculation of the exponentially
- -					    * decayed connection times */
+					    * decayed connection times;
+					    * old values are scaled by
+					    * (1-gain), so making it
+					    * smaller weights them more heavily
+					    * compared to the current value,
+					    * which is scaled by gain */
 #define FCGI_DEFAULT_THRESHHOLD_1 10       /* if load falls below this value
 					    * and we have only one instance
 					    * running, it is killed off */
@@ -1985,7 +1995,9 @@
                                      * managed app.
                                      */
     int listenQueueDepth;           /* size of listen queue for IPC */
- -    int numProcesses;               /* max allowed processes of this class 
*/
+    int numProcesses;               /* max allowed processes of this class,
+				     * or for dynamic apps, the number of
+				     * processes actually running */
     time_t restartTime;             /* most recent time when the process
                                      * manager started a process in this
                                      * class. */
@@ -2029,15 +2041,19 @@
                                      * persistent connections.  Not used
                                      * by Apache. */
     /* Dynamic FastCGI apps configuration parameters */
- -    unsigned long totalConnTime;    /* time spent by the web server waiting
- -				     * while fastcgi app performs request
- -				     * processing */
+    unsigned long totalConnTime;    /* microseconds spent by the web server
+				     * waiting while fastcgi app performs
+				     * request processing since the last
+				     * updateInterval */
     unsigned long smoothConnTime;   /* exponentially decayed values of the
 				     * connection times. */
- -    unsigned long totalQueueTime;   /* time spent by the web server waiting
- -				     * to connect to the fastcgi app */
+    unsigned long totalQueueTime;   /* microseconds spent by the web server
+				     * waiting to connect to the fastcgi app
+				     * since the last updateInterval.
+				     * Not used by Apache. */
     unsigned long avgQueueTime;     /* exponentially decayed average of the
- -				     * time spent in queue */
+				     * time spent in queue.  Not used by
+				     * Apache. */
     struct _FastCgiServerInfo *next;
 } FastCgiServerInfo;
 
@@ -2099,9 +2115,9 @@
 static char *ipcDynamicDir = "/tmp/dynamic";     /* directory for the dynamic
 						  * fastcgi apps' sockets */
 static int globalNumInstances = 0;               /* number of running apps */
- -static unsigned long epoch = 0;                  /* last time kill_procs was
+static time_t epoch = 0;                         /* last time kill_procs was
 						  * invoked by process mgr */
- -static unsigned long lastAnalyze = 0;            /* last time calculation 
was
+static time_t lastAnalyze = 0;                   /* last time calculation was
 						  * made for the dynamic procs*/
 static char *mbox = "/tmp/mbox";                 /* file through which the 
fcgi
 						  * procs communicate with WS */
@@ -2521,20 +2537,22 @@
         OS_FreeIpcAddr(processInfoPtr->ipcAddr);
     }
 
- -    /*
- -     * Remove the dead lock file and socket.
- -     */
- -    lockFileName = MakeLockFileName(DStringValue(&serverInfoPtr->execPath));
- -    unlink(lockFileName);
- -    Free(lockFileName);
- -
- -    ipcAddrPtr = (OS_IpcAddr *)serverInfoPtr->ipcAddr;
- -    fname = MakeSocketName(DStringValue(&serverInfoPtr->execPath),
- -			   NULL, -1, &ipcAddrPtr->bindPath, 1);
- -    /* Remove extraneous digit from end.  XXX why is this necessary?  */
- -    fname[strlen(fname)-1] = '\0';
- -    unlink(fname);
- -    Free(fname);
+    if (serverInfoPtr->directive == APP_CLASS_DYNAMIC) {
+      /*
+       * Remove the dead lock file and socket.
+       */
+      lockFileName = MakeLockFileName(DStringValue(&serverInfoPtr->execPath));
+      unlink(lockFileName);
+      Free(lockFileName);
+      
+      ipcAddrPtr = (OS_IpcAddr *)serverInfoPtr->ipcAddr;
+      fname = MakeSocketName(DStringValue(&serverInfoPtr->execPath),
+			     NULL, -1, &ipcAddrPtr->bindPath, 1);
+      /* Remove extraneous digit from end.  XXX why is this necessary?  */
+      fname[strlen(fname)-1] = '\0';
+      unlink(fname);
+      Free(fname);
+    }
 
     /*
      * Clean up server info structure resources.
@@ -3229,10 +3247,12 @@
 MissingValueReturn:
     sprintf(errMsg, "ExternalAppClass: missing value for %s\n", argv[i]);
     goto ErrorReturn;
+#if 0
 BadValueReturn:
     sprintf(errMsg, "ExternalAppClass: bad value \"%s\" for %s\n", 
             argv[i], argv[i-1]);
     goto ErrorReturn;
+#endif
 ErrorReturn:
     if(serverInfoPtr != NULL) {
         FreeFcgiServerInfo(serverInfoPtr);
@@ -3599,7 +3619,9 @@
     char *lockFileName=NULL;
     char *ptr1=NULL, *ptr2=NULL;
     char execName[TMP_BUFSIZ];
- -    unsigned long qsec = 0, ctime = 0, now = time(NULL);
+    unsigned long qsec = 0, ctime = 0; /* microseconds spent waiting for the
+					* application, and spent using it */
+    time_t now = time(NULL);
     int listenFd;
     
     /* Obtain the data from the mbox file */
@@ -3651,10 +3673,11 @@
     if(lastAnalyze == 0) {
         lastAnalyze = now;
     }
- -    if((long)(now-lastAnalyze)>updateInterval) {
+    if((long)(now-lastAnalyze)>=updateInterval) {
         for(s=fastCgiServers;s!=NULL;s=s->next) {
+	    /* XXX what does this adjustment do? */
 	    lastAnalyze += (((long)(now-lastAnalyze)/updateInterval)*updateInterval);
- -	    s->smoothConnTime = (1-gain)*s->smoothConnTime+
+	    s->smoothConnTime = (1.0-gain)*s->smoothConnTime+
 	        gain*s->totalConnTime;
 	    s->totalConnTime = ctime;
 	    s->totalQueueTime = qsec;
@@ -3781,6 +3804,7 @@
 #undef TMP_BUFSIZ
 
 
+/* Information about a process we are doing a blocking kill of.  */
 struct FuncData {
     char *lockFileName;  /* name of the lock file to lock */
     pid_t pid;           /* process to issue SIGTERM to   */
@@ -3794,7 +3818,7 @@
  *      Block on the lock file until it is available, and then
  *      issue a kill signal to the corresponding application.
  *      Since this function is executed in the child process, 
- - *      exit() is called upon completion.
+ *      _exit() is called upon completion.
  *  
  * Inputs
  *      Pointer to the data structure containing a process id to 
@@ -3824,9 +3848,8 @@
 	    Unlock(lockFd);
 	}
     }
- -    Free(funcData->lockFileName);
- -    Free(funcData);
- -    exit(0);
+    /* exit() may flush stdio buffers inherited from the parent. */
+    _exit(0);
 }
 
 /*
@@ -3848,9 +3871,14 @@
     FastCgiServerInfo *s;
     struct FuncData *funcData = NULL;
     time_t now = time(NULL);
- -    unsigned long  totalTime;
- -    int len;
- -    float loadFactor;
+    float connTime;		/* server's smoothed running time, or
+				 * if that's 0, the current total */
+    float totalTime;		/* maximum number of microseconds that all
+				 * of a server's running processes together
+				 * could have spent running since the
+				 * last check */
+    float loadFactor;		/* percentage, 0-100, of totalTime that
+				 * the processes actually used */
     int i, victims = 0;
     char *lockFileName;
     int lockFd;
@@ -3864,8 +3892,12 @@
         if((globalNumInstances-victims)<=minProcs) {
 	    break;
 	}
+	connTime = s->smoothConnTime ? s->smoothConnTime : s->totalConnTime;
         totalTime = (s->numProcesses)*(now-epoch)*1000000;
- -	loadFactor = (float)(s->totalConnTime)/totalTime*100;
+	/* XXX producing a heavy load with three clients, I haven't been
+	   able to achieve a loadFactor greater than 0.5.  Perhaps this
+	   should be scaled up by another order of magnitude or two.  */
+	loadFactor = connTime/totalTime*100.0;
 	if(((s->numProcesses>1) && 
 	        (((s->numProcesses/(s->numProcesses-1))*loadFactor)
 		 <threshholdN)) ||
@@ -3898,6 +3930,7 @@
 		     * corresponding lock file does not exist, then
 		     * that means we are in big trouble here 
 		     */
+		    Free(lockFileName);
 		    continue;
 		}
 		if(WriteLock(lockFd)<0) {
@@ -3912,6 +3945,7 @@
 		     */
 		    funcData = Malloc(sizeof(struct FuncData));
 		    funcData->lockFileName = lockFileName;
+		    funcData->pid = s->procInfo[i].pid;
 		    /* 
 		     * We can not call onto spawn_child() here
 		     * since we are completely disassociated from
@@ -3919,6 +3953,7 @@
 		     * directly.
 		     */
 		    if((pid=fork())<0) {
+			close(lockFd);
 			Free(funcData->lockFileName);
 			Free(funcData);
 			return;
@@ -3927,12 +3962,15 @@
 		        BlockingKill(funcData);
 		    } else {
 		        /* parent */
+			close(lockFd);
 			Free(funcData->lockFileName);
 			Free(funcData);
 		    }
 		} else {
 		    kill(s->procInfo[i].pid, SIGTERM);
 		    Unlock(lockFd);
+		    close(lockFd);
+		    Free(lockFileName);
 		    break;
 	        }
 	    } 
@@ -4214,7 +4252,7 @@
 	    if(epoch == 0) {
 	        epoch = now;
 	    }
- -	    if(((long)(now-epoch)>killInterval) ||
+	    if(((long)(now-epoch)>=killInterval) ||
 	            ((globalNumInstances+processSlack)>=maxProcs)) {
 	        KillDynamicProcs();
 	        epoch = now;
@@ -5789,9 +5827,16 @@
 		if((qtime.tv_sec-start.tv_sec)>=appConnTimeout) {
 		    status = SERVER_ERROR;
 		    Unlock(lockFd);
+		    close(lockFd);
 		    goto CleanupReturn;
 		}
 	    }
+	} else {
+	  if(gettimeofday(&qtime,NULL)<0) {
+	    sprintf(infoPtr->errorMsg,
+		    "mod_fastcgi: Unable to get the time of day");
+	    goto ErrorReturn;
+	  }
 	}
     } else {
         if(connect(infoPtr->fd, (struct sockaddr *) ipcAddrPtr->serverAddr,
- --- mod_fastcgi.html	1997/08/20 05:26:15	1.2
+++ mod_fastcgi.html	1997/09/02 19:19:11
@@ -381,8 +381,10 @@
   <li>
     <B>gainValue:</B> a floating point value between 0 and 1 that is
     used as an exponent in the computation of the exponentially decayed
- -    load factor of the currently running dynamic FastCGI
- -    application.  The default
+    connection times load factor of the currently running dynamic FastCGI
+    application.  Old values are scaled by (1-<B>gainValue</B>), so making it
+    smaller weights them more heavily compared to the current value,
+    which is scaled by <B>gainValue</B>.  The default
     value for this attribute is 0.5.<p>
 
   <li>

------- End of Forwarded Message