Difference between revisions of "Monitoring (Nagios)"

From RHS Wiki
Jump to navigation Jump to search
Line 176: Line 176:
 
         notification_period            24x7
 
         notification_period            24x7
 
  }
 
  }
 
  
 
  define service {
 
  define service {
Line 186: Line 185:
  
 
  sudo systemctl restart nagios
 
  sudo systemctl restart nagios
 
  
 
==== How to check nagios connection to a client ====
 
==== How to check nagios connection to a client ====

Revision as of 07:15, 4 April 2018

https://www.digitalocean.com/community/tutorials/how-to-install-nagios-4-and-monitor-your-servers-on-ubuntu-16-04

Nagios

Configure Nagios server

  • Add users and groups
sudo useradd nagios
sudo groupadd nagcmd
sudo usermod -a -G nagcmd nagios
  • Install system requirements
sudo usermod -a -G nagcmd nagios
sudo apt-get install build-essential libgd2-xpm-dev openssl libssl-dev unzip
  • Install Nagios
cd ~
curl -L -O http://repos.rra.lan/nagios/nagios-4.3.4.tar.gz
tar zxf nagios-*.tar.gz
cd nagios-*
./configure --with-nagios-group=nagios --with-command-group=nagcmd
make all
sudo make install
sudo make install-commandmode
sudo make install-init
sudo make install-config
  • Enable the nagios conf
sudo /usr/bin/install -c -m 644 sample-config/httpd.conf /etc/apache2/sites-available/nagios.conf
  • Add group nagcmd to www-data
sudo usermod -G nagcmd www-data
  • Installing nrpe plugin
cd ~
curl -L -O http://repos.rra.lan/nagios/nrpe-3.2.1.tar.gz
tar zxf nrpe-*.tar.gz
cd nrpe-*
./configure
make check_nrpe
sudo make install-plugin
  • Configure Nagios
sudo nano /usr/local/nagios/etc/nagios.cfg
Uncomment this line by deleting the # character from the front of the line:
cfg_dir=/usr/local/nagios/etc/servers


sudo mkdir /usr/local/nagios/etc/servers
sudo nano /usr/local/nagios/etc/objects/contacts.cfg
Find the email directive and replace its value with your own email address.


sudo nano /usr/local/nagios/etc/objects/commands.cfg
Add the following to the end of the file to define a new command called check_nrpe.
define command{
       command_name check_nrpe
       command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
sudo a2enmod rewrite
sudo a2enmod cgi


  • Create nagios amdin
sudo htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
  • Enable nagios conf
sudo ln -s /etc/apache2/sites-available/nagios.conf /etc/apache2/sites-enabled/
  • Edit nagios conf
sudo nano /etc/apache2/sites-available/nagios.conf
Uncomment both occurrances by removing the # symbol.
#  SSLRequireSSL


  • Restart Apache
sudo systemctl restart apache2


  • Create service
sudo nano /etc/systemd/system/nagios.service


[Unit]
Description=Nagios
BindTo=network.target

[Install]
WantedBy=multi-user.target

[Service]
Type=simple
User=nagios
Group=nagios
ExecStart=/usr/local/nagios/bin/nagios /usr/local/nagios/etc/nagios.cfg


sudo systemctl enable /etc/systemd/system/nagios.service
sudo systemctl start nagios

Connect server to Nagios

On the client

  • Add user nagios
sudo useradd nagios
  • Install system requirements
sudo apt-get update
sudo apt-get install build-essential libgd2-xpm-dev openssl libssl-dev unzip
  • Install nagios-plugins
cd ~
curl -L -O http://repos.rra.lan/nagios/nagios-plugins-2.2.1.tar.gz
tar zxf nagios-plugins-*.tar.gz
cd nagios-plugins-*
./configure --with-nagios-user=nagios --with-nagios-group=nagios --with-openssl
make
sudo make install
  • Install nrpe
cd ~
curl -L -O http://repos.rra.lan/nagios/nrpe-3.2.1.tar.gz
tar zxf nrpe-*.tar.gz
cd nrpe-*
./configure --enable-command-args --with-nagios-user=nagios --with-nagios-group=nagios --with-ssl=/usr/bin/openssl --with-ssl-lib=/usr  /lib/x86_64-linux-gnu
make all
sudo make install
sudo make install-config
sudo make install-init
sudo nano /usr/local/nagios/etc/nrpe.cfg

allowed_hosts=127.0.0.1,::1,horus.rra.lan
sudo systemctl start nrpe.service
sudo systemctl status nrpe.service
sudo systemctl enable nrpe.service


  • To add a new service (it is an example of ow to add a check_disk service):
sudo nano /usr/local/nagios/etc/nrpe.cfg
server_address=[monitored_server_private_ip]
command[check_vda1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/vda1
sudo systemctl restart nrpe.service

On Nagios

On the server


sudo nano /usr/local/nagios/etc/servers/your_monitored_server_host_name.cfg
define host {
       use                             linux-server  # linux-server | 
       host_name                       your_monitored_server_host_name # Host name from DNS
       alias                           My client server  # Description
       address                         your_monitored_server_private_ip # Host DNS or IP
       max_check_attempts              5
       check_period                    24x7
       notification_interval           30
       notification_period             24x7
}
define service {
       use                             generic-service
       host_name                       your_monitored_server_host_name
       service_description             CPU load
       check_command                   check_nrpe!check_load
}
sudo systemctl restart nagios

How to check nagios connection to a client

/usr/local/nagios/libexec/check_nrpe -H [host]

GitLab

At the GitLab server

cd /usr/local/nagios/libexec  # /usr/lib/nagios/plugins/  for installations from package
sudo -u nagios -H wget http://repos.rra.lan/nagios/check_scripts/check_gitlab
sudo -u nagios -H wget http://repos.rra.lan/nagios/check_scripts/check_mem
sudo chmod 744 check_gitlab
sudo chmod 744 check_mem
sudo apt install ruby dc
sudo nano /usr/local/nagios/etc/nrpe.cfg
### GitLab ###
command[check_mem]=/usr/local/nagios/libexec/check_mem -w 80 -c 90 -W 40 -C 60 
command[check_gitlab_health]=/usr/local/nagios/libexec/check_gitlab -m health -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check>
command[check_gitlab_services]=/usr/local/nagios/libexec/check_gitlab -m services -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check>
# command[check_gitlab_cipipeline]=/usr/local/nagios/libexec/check_gitlab -m ci-pipeline -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check>
# command[check_gitlab_cirunner]=/usr/local/nagios/libexec/check_gitlab -m ci-runner -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check>

sudo service nrpe restart

At the Nagios server

sudo nano /usr/local/nagios/etc/servers/gitlab.rra.lan.cfg

define host {
       use                             linux-server
       host_name                       git.rra.lan
       alias                           GitLab VPN
       address                         git.rra.lan
       max_check_attempts              5
       check_period                    24x7
       notification_interval           30
       notification_period             24x7
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Load
        check_command                   check_nrpe!check_load
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Root Partition
        check_command                   check_nrpe!check_hd_root
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Memory
        check_command                   check_nrpe!check_mem
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Swap
        check_command                   check_nrpe!check_swap
}


define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Health
        check_command                   check_nrpe!check_gitlab_health
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Health
        check_command                   check_nrpe!check_gitlab_health
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Services
        check_command                   check_nrpe!check_gitlab_services
}


sudo systemctl restart nagios

ChiChi

At ChiChi.rra.lan

  • Follow the Install nrpe instructions
  • As nagios user:

Sources

https://www.digitalocean.com/community/tutorials/how-to-install-nagios-4-and-monitor-your-servers-on-ubuntu-16-04
https://gitlab.com/6uellerBpanda/check_gitlab