5th set of fixes for apache-fastcgi-2.0b1
Sonya Rikhtverchik (rikhtver@OpenMarket.com)
Thu, 04 Sep 1997 10:01:41 -0400
Message-Id: <199709041401.KAA02189@u4-138.openmarket.com>
To: fastcgi-developers@OpenMarket.com
Subject: 5th set of fixes for apache-fastcgi-2.0b1
Date: Thu, 04 Sep 1997 10:01:41 -0400
From: Sonya Rikhtverchik <rikhtver@OpenMarket.com>
Date: Tue, 2 Sep 1997 15:25:20 -0400 (EDT)
Message-Id: <PAA23653.199709021925@bastille.va.pubnix.com>
From: "David J. MacKenzie" <djm@va.pubnix.com>
To: fastcgi-developers@openmarket.com
cc: djm@va.pubnix.com, Jonathan Roy <roy@atlantic.net>
Subject: 5th set of fixes for apache-fastcgi-2.0b1
This patch fixes the problems noted below.
Problems fixed by this patch, which should be applied after the others
I've sent:
1. Fix gcc -Wall warnings.
2. Better document the semantics of some variables and constants.
3. Fix some memory and file descriptor leaks.
4. Make the blocking kill of a server closer to working.
5. Fixes to calculations in dynamic application management.
After this patch, my version of mod_fastcgi mostly works for dynamic
applications. There is a race condition I haven't figured out how to
fix yet. The process manager sometimes kills an application process
that either apache is currently using to handle a request, or apache
has just chosen to use (based on a lock file existing) but hasn't yet
tried to contact (with connect()). This results in a "500 Server
Error"; I see it about 1 in every 20 FastCGI requests when using
FCGIConfig parameters that kill processes rather aggressively. The
other bug I haven't fixed yet is that if the server is not extremely
busy, and the FastCGI applications don't take a long time to process
requests, almost any set of FCGIConfig parameters results in short
application lifetimes (less than 3 minutes), because the calculated
load is almost always less than 1, on a scale of 0-100. I'm
considering adding a logarithmic weighting to the load, but haven't
figured out how yet.
- --- mod_fastcgi.c 1997/08/27 22:26:28 1.5
+++ mod_fastcgi.c 1997/09/02 19:06:46
@@ -117,6 +117,9 @@
#include "http_conf_globals.h"
#include "md5.h"
+/* Can't include "util_md5.h" here without compiler errors... */
+char *md5(pool *a, unsigned char *string);
+
#define TRUE 1
#define FALSE 0
#define ASSERT assert
@@ -138,6 +141,7 @@
}
}
+#if 0
static char *StringCopy(char *str)
{
int strLen = strlen(str);
@@ -146,6 +150,7 @@
newString[strLen] = '\000';
return newString;
}
+#endif
static char *StrError(int errorCode)
{
@@ -1926,7 +1931,12 @@
* totalConnTime variable */
#define FCGI_DEFAULT_GAIN 0.5 /* value used as an exponent in the
* calculation of the exponentially
- - * decayed connection times */
+ * decayed connection times;
+ * old values are scaled by
+ * (1-gain), so making it
+ * smaller weights them more heavily
+ * compared to the current value,
+ * which is scaled by gain */
#define FCGI_DEFAULT_THRESHHOLD_1 10 /* if load falls below this value
* and we have only one instance
* running, it is killed off */
@@ -1985,7 +1995,9 @@
* managed app.
*/
int listenQueueDepth; /* size of listen queue for IPC */
- - int numProcesses; /* max allowed processes of this class
*/
+ int numProcesses; /* max allowed processes of this class,
+ * or for dynamic apps, the number of
+ * processes actually running */
time_t restartTime; /* most recent time when the process
* manager started a process in this
* class. */
@@ -2029,15 +2041,19 @@
* persistent connections. Not used
* by Apache. */
/* Dynamic FastCGI apps configuration parameters */
- - unsigned long totalConnTime; /* time spent by the web server waiting
- - * while fastcgi app performs request
- - * processing */
+ unsigned long totalConnTime; /* microseconds spent by the web server
+ * waiting while fastcgi app performs
+ * request processing since the last
+ * updateInterval */
unsigned long smoothConnTime; /* exponentially decayed values of the
* connection times. */
- - unsigned long totalQueueTime; /* time spent by the web server waiting
- - * to connect to the fastcgi app */
+ unsigned long totalQueueTime; /* microseconds spent by the web server
+ * waiting to connect to the fastcgi app
+ * since the last updateInterval.
+ * Not used by Apache. */
unsigned long avgQueueTime; /* exponentially decayed average of the
- - * time spent in queue */
+ * time spent in queue. Not used by
+ * Apache. */
struct _FastCgiServerInfo *next;
} FastCgiServerInfo;
@@ -2099,9 +2115,9 @@
static char *ipcDynamicDir = "/tmp/dynamic"; /* directory for the dynamic
* fastcgi apps' sockets */
static int globalNumInstances = 0; /* number of running apps */
- -static unsigned long epoch = 0; /* last time kill_procs was
+static time_t epoch = 0; /* last time kill_procs was
* invoked by process mgr */
- -static unsigned long lastAnalyze = 0; /* last time calculation
was
+static time_t lastAnalyze = 0; /* last time calculation was
* made for the dynamic procs*/
static char *mbox = "/tmp/mbox"; /* file through which the
fcgi
* procs communicate with WS */
@@ -2521,20 +2537,22 @@
OS_FreeIpcAddr(processInfoPtr->ipcAddr);
}
- - /*
- - * Remove the dead lock file and socket.
- - */
- - lockFileName = MakeLockFileName(DStringValue(&serverInfoPtr->execPath));
- - unlink(lockFileName);
- - Free(lockFileName);
- -
- - ipcAddrPtr = (OS_IpcAddr *)serverInfoPtr->ipcAddr;
- - fname = MakeSocketName(DStringValue(&serverInfoPtr->execPath),
- - NULL, -1, &ipcAddrPtr->bindPath, 1);
- - /* Remove extraneous digit from end. XXX why is this necessary? */
- - fname[strlen(fname)-1] = '\0';
- - unlink(fname);
- - Free(fname);
+ if (serverInfoPtr->directive == APP_CLASS_DYNAMIC) {
+ /*
+ * Remove the dead lock file and socket.
+ */
+ lockFileName = MakeLockFileName(DStringValue(&serverInfoPtr->execPath));
+ unlink(lockFileName);
+ Free(lockFileName);
+
+ ipcAddrPtr = (OS_IpcAddr *)serverInfoPtr->ipcAddr;
+ fname = MakeSocketName(DStringValue(&serverInfoPtr->execPath),
+ NULL, -1, &ipcAddrPtr->bindPath, 1);
+ /* Remove extraneous digit from end. XXX why is this necessary? */
+ fname[strlen(fname)-1] = '\0';
+ unlink(fname);
+ Free(fname);
+ }
/*
* Clean up server info structure resources.
@@ -3229,10 +3247,12 @@
MissingValueReturn:
sprintf(errMsg, "ExternalAppClass: missing value for %s\n", argv[i]);
goto ErrorReturn;
+#if 0
BadValueReturn:
sprintf(errMsg, "ExternalAppClass: bad value \"%s\" for %s\n",
argv[i], argv[i-1]);
goto ErrorReturn;
+#endif
ErrorReturn:
if(serverInfoPtr != NULL) {
FreeFcgiServerInfo(serverInfoPtr);
@@ -3599,7 +3619,9 @@
char *lockFileName=NULL;
char *ptr1=NULL, *ptr2=NULL;
char execName[TMP_BUFSIZ];
- - unsigned long qsec = 0, ctime = 0, now = time(NULL);
+ unsigned long qsec = 0, ctime = 0; /* microseconds spent waiting for the
+ * application, and spent using it */
+ time_t now = time(NULL);
int listenFd;
/* Obtain the data from the mbox file */
@@ -3651,10 +3673,11 @@
if(lastAnalyze == 0) {
lastAnalyze = now;
}
- - if((long)(now-lastAnalyze)>updateInterval) {
+ if((long)(now-lastAnalyze)>=updateInterval) {
for(s=fastCgiServers;s!=NULL;s=s->next) {
+ /* XXX what does this adjustment do? */
lastAnalyze += (((long)(now-lastAnalyze)/updateInterval)*updateInterval);
- - s->smoothConnTime = (1-gain)*s->smoothConnTime+
+ s->smoothConnTime = (1.0-gain)*s->smoothConnTime+
gain*s->totalConnTime;
s->totalConnTime = ctime;
s->totalQueueTime = qsec;
@@ -3781,6 +3804,7 @@
#undef TMP_BUFSIZ
+/* Information about a process we are doing a blocking kill of. */
struct FuncData {
char *lockFileName; /* name of the lock file to lock */
pid_t pid; /* process to issue SIGTERM to */
@@ -3794,7 +3818,7 @@
* Block on the lock file until it is available, and then
* issue a kill signal to the corresponding application.
* Since this function is executed in the child process,
- - * exit() is called upon completion.
+ * _exit() is called upon completion.
*
* Inputs
* Pointer to the data structure containing a process id to
@@ -3824,9 +3848,8 @@
Unlock(lockFd);
}
}
- - Free(funcData->lockFileName);
- - Free(funcData);
- - exit(0);
+ /* exit() may flush stdio buffers inherited from the parent. */
+ _exit(0);
}
/*
@@ -3848,9 +3871,14 @@
FastCgiServerInfo *s;
struct FuncData *funcData = NULL;
time_t now = time(NULL);
- - unsigned long totalTime;
- - int len;
- - float loadFactor;
+ float connTime; /* server's smoothed running time, or
+ * if that's 0, the current total */
+ float totalTime; /* maximum number of microseconds that all
+ * of a server's running processes together
+ * could have spent running since the
+ * last check */
+ float loadFactor; /* percentage, 0-100, of totalTime that
+ * the processes actually used */
int i, victims = 0;
char *lockFileName;
int lockFd;
@@ -3864,8 +3892,12 @@
if((globalNumInstances-victims)<=minProcs) {
break;
}
+ connTime = s->smoothConnTime ? s->smoothConnTime : s->totalConnTime;
totalTime = (s->numProcesses)*(now-epoch)*1000000;
- - loadFactor = (float)(s->totalConnTime)/totalTime*100;
+ /* XXX producing a heavy load with three clients, I haven't been
+ able to achieve a loadFactor greater than 0.5. Perhaps this
+ should be scaled up by another order of magnitude or two. */
+ loadFactor = connTime/totalTime*100.0;
if(((s->numProcesses>1) &&
(((s->numProcesses/(s->numProcesses-1))*loadFactor)
<threshholdN)) ||
@@ -3898,6 +3930,7 @@
* corresponding lock file does not exist, then
* that means we are in big trouble here
*/
+ Free(lockFileName);
continue;
}
if(WriteLock(lockFd)<0) {
@@ -3912,6 +3945,7 @@
*/
funcData = Malloc(sizeof(struct FuncData));
funcData->lockFileName = lockFileName;
+ funcData->pid = s->procInfo[i].pid;
/*
* We can not call onto spawn_child() here
* since we are completely disassociated from
@@ -3919,6 +3953,7 @@
* directly.
*/
if((pid=fork())<0) {
+ close(lockFd);
Free(funcData->lockFileName);
Free(funcData);
return;
@@ -3927,12 +3962,15 @@
BlockingKill(funcData);
} else {
/* parent */
+ close(lockFd);
Free(funcData->lockFileName);
Free(funcData);
}
} else {
kill(s->procInfo[i].pid, SIGTERM);
Unlock(lockFd);
+ close(lockFd);
+ Free(lockFileName);
break;
}
}
@@ -4214,7 +4252,7 @@
if(epoch == 0) {
epoch = now;
}
- - if(((long)(now-epoch)>killInterval) ||
+ if(((long)(now-epoch)>=killInterval) ||
((globalNumInstances+processSlack)>=maxProcs)) {
KillDynamicProcs();
epoch = now;
@@ -5789,9 +5827,16 @@
if((qtime.tv_sec-start.tv_sec)>=appConnTimeout) {
status = SERVER_ERROR;
Unlock(lockFd);
+ close(lockFd);
goto CleanupReturn;
}
}
+ } else {
+ if(gettimeofday(&qtime,NULL)<0) {
+ sprintf(infoPtr->errorMsg,
+ "mod_fastcgi: Unable to get the time of day");
+ goto ErrorReturn;
+ }
}
} else {
if(connect(infoPtr->fd, (struct sockaddr *) ipcAddrPtr->serverAddr,
- --- mod_fastcgi.html 1997/08/20 05:26:15 1.2
+++ mod_fastcgi.html 1997/09/02 19:19:11
@@ -381,8 +381,10 @@
<li>
<B>gainValue:</B> a floating point value between 0 and 1 that is
used as an exponent in the computation of the exponentially decayed
- - load factor of the currently running dynamic FastCGI
- - application. The default
+ connection times load factor of the currently running dynamic FastCGI
+ application. Old values are scaled by (1-<B>gainValue</B>), so making it
+ smaller weights them more heavily compared to the current value,
+ which is scaled by <B>gainValue</B>. The default
value for this attribute is 0.5.<p>
<li>
------- End of Forwarded Message