#!/opt/imh-python/bin/python3
Aggregate worker information from Apache's Full Status output and determines
whether the processes need to be ended.
from rads import setup_logging
THREAD_RE = re.compile(r'^(?P<worker>\d{1,3})\-(?P<generation>\d{1,3})\s+(?P<pid>[0-9\-]+)\s+(?P<acc>\d+\/\d+\/\d+)\s+(?P<status>[A-Z\.])\s+(?P<cpu>\d+\.\d+)\s+(?P<lastreqsecs>\d+)\s+(?P<req>\d+)\s+(?P<dur>\d+)\s+([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)\s+(?P<ipaddr>(\d+\.){3}\d+)\s+(?P<protocol>http\/[0-9\.]{3})\s+(?P<domain>[a-z\:0-9\.\-]+)\s+(?P<method>[A-Z]+)\s+(?P<path>.*$)')
parser = argparse.ArgumentParser(description="anaylze apache workers")
'--clean', action='store_true', default=False, dest='clean'
'--debug', action='store_true', default=False, dest='debug'
return parser.parse_args()
"""Run apachectl fullstatus and return its output in a list line by line.
list: apachectl fullstatus output in a list
['/usr/sbin/apachectl', 'fullstatus'],
stderr=subprocess.DEVNULL,
return proc.stdout.splitlines()
def check_apache(clean_procs = False):
"""Aggregate worker information from Apache's Full Status output.
clean_procs (bool): clean/kill processes deemed to be killed
logging.debug("Scanning Apache workers for stuck workers")
for line in get_apache_status():
if proc_match := THREAD_RE.match(line):
thread_data = proc_match.groupdict()
worker_status = thread_data.get('status')
worker_id = thread_data.get('worker')
worker_pid = thread_data.get('pid')
last_req = int(thread_data.get('lastreqsecs'))
req_domain = thread_data.get('domain').split(':')[0]
req_method = thread_data.get('method')
req_path = thread_data.get('path').split('HTTP/1')[0]
domains.setdefault(req_domain, 0)
domain_paths.setdefault(req_domain + req_path, 0)
domain_paths[req_domain + req_path] += 1
graceful_workers.setdefault(worker_pid, last_req)
if last_req < graceful_workers[worker_pid]:
graceful_workers[worker_pid] = last_req
"G state process: %s %s",
f"{worker_pid=} {worker_id=} {last_req=}",
f"{req_domain=} {req_method=} {req_path=}"
if len(graceful_workers) == 0:
for worker_pid, last_request in graceful_workers.items():
# final sanity check, if well above timeout then we can kill
# let Apache at least try to properly close workers
reclaim_workers.add(worker_pid)
"G state worker with request under 120 seconds: %s -> %s",
if len(reclaim_workers) == 0:
logging.info("Top six domains with G processes:")
for domain in sorted(domains, key=domains.get, reverse=True)[:6]:
logging.info("%s: %s", domain, domains[domain])
logging.info("Top six domain paths with G processes:")
for domain in sorted(domain_paths, key=domain_paths.get, reverse=True)[:6]:
logging.info("%s: %s", domain, domain_paths[domain])
"Sending kills to the following PIDs: %s",
', '.join(reclaim_workers)
for proc in reclaim_workers:
logging.debug("Sending SIGABRT to %s", proc)
# Nothing but SIGABRT or SIGKILL works here
# SIGABRT seems more sane, since it can be caught and handled
os.kill(int(proc), signal.SIGABRT)
"Would kill the following PIDs: %s",
', '.join(reclaim_workers)
if __name__ == '__main__':
path="/var/log/check_apache.log",
loglevel=logging.DEBUG if args.debug else logging.INFO,
print_out=sys.stdout if args.debug else None
check_apache(clean_procs=args.clean)