Project

General

Profile

« Previous | Next » 

Revision 51f90897

Added by Alexis Mousset over 7 years ago

Fixes #7381: Process management issues on nodes hosting LXC containers

View differences:

techniques/system/common/1.0/promises.st
{
vars:
windows::
# process_term defines how many maximum instances of this
# binary should be running before attempting to SIGTERM them.
# process_kill is the same for SIGKILL.
!windows::
# On windows, cf-execd is a service, and there can be only one instance of it running (by design)
"process_term[execd]" string => "2";
"process_kill[execd]" string => "5";
any::
"process_term[agent]" string => "5";
"process_kill[agent]" string => "8";
......
"pass1" expression => "any";
methods:
pass3::
pass3.!windows::
"any" usebundle => rudder_common_report("Common", "result_na", "&TRACKINGKEY&", "Process checking", "None", "CFEngine proccesses check is done by the rudder-agent CRON job");
pass3.windows::
"any" usebundle => rudder_common_report("Common", "result_success", "&TRACKINGKEY&", "Process checking", "None", "There is an acceptable number of CFEngine processes running on the machine"),
# Here, I can not use the binaries variable as CFEngine will iterate and output two reports, breaking the reporting.
ifvarclass => "!agent_has_gone_wild.!agent_has_gone_really_wild.!execd_has_gone_wild.!execd_has_gone_really_wild";
......
ifvarclass => "${binaries}_has_gone_really_wild";
processes:
!windows::
"${sys.workdir}/bin/cf-serverd" restart_class => "start_server";
"${sys.workdir}/bin/cf-execd" restart_class => "start_executor";
# If there are more than 2 cf-execd's, it means cf-execd is starting to
# go crazy, so we ask politely to these processes to shut down.
"${sys.workdir}/bin/cf-${binaries}"
process_count => check_range("${binaries}", "0","${process_term[${binaries}]}"),
signals => { "term" },
classes => if_repaired("${binaries}_has_gone_wild"),
comment => "Checking if cf-${binaries} has gone wild";
# If there are too much cf-execd's/cf-agents running, it means that they are really
# going crazy. Let's be a bit less polite and more violent about killing them.
#
# These two promises overlap, because when you go past the 2/5-limit treshold,
# you still leave a chance for them to die with SIGTERM before the SIGKILL.
#
# Reason: The backend databases that stores the classes and some runtime
# parameters do really not appreciate beeing killed violently and may prevent
# the agent from operating properly.
"${sys.workdir}/bin/cf-${binaries}"
process_count => check_range("${binaries}", "0","${process_kill[${binaries}]}"),
signals => { "kill" },
classes => if_repaired("${binaries}_has_gone_really_wild"),
comment => "Checking if cf-${binaries} has gone really wild";
!windows.should_disable_server::
"${sys.workdir}/bin/cf-serverd"
signals => { "term", "kill" },
action => immediate;
windows::
# Using the path on windows fails, as process are not reported the same way
# And unfortunately, the cf-serverd is not a service.
......
action => u_ifwin_bg,
classes => outcome("server");
start_executor::
"${sys.cf_execd}"
action => u_ifwin_bg,
classes => outcome("executor");
}

Also available in: Unified diff