Difference between revisions of "Monitoring (Nagios)"

From RHS Wiki
Jump to navigation Jump to search
Line 1: Line 1:
 
https://www.digitalocean.com/community/tutorials/how-to-install-nagios-4-and-monitor-your-servers-on-ubuntu-16-04<br />
 
https://www.digitalocean.com/community/tutorials/how-to-install-nagios-4-and-monitor-your-servers-on-ubuntu-16-04<br />
  
== Debian Client installation ==
+
= [https://horus.rra.lan Nagios] =
  <nowiki>sudo useradd nagios
+
== Configure Nagios server ==
sudo apt-get update
+
 
sudo apt-get install build-essential libgd2-xpm-dev openssl libssl-dev unzip
+
* Add users and groups
cd ~
+
sudo useradd nagios
curl -L -O http://nagios-plugins.org/download/nagios-plugins-2.2.1.tar.gz
+
sudo groupadd nagcmd
tar zxf nagios-plugins-*.tar.gz
+
sudo usermod -a -G nagcmd nagios
cd nagios-plugins-*</nowiki>
+
 
 +
* Install system requirements
 +
 
 +
sudo usermod -a -G nagcmd nagios
 +
sudo apt-get install build-essential libgd2-xpm-dev openssl libssl-dev unzip
 +
 
 +
* Install Nagios
 +
 
 +
cd ~
 +
curl -L -O http://repos.rra.lan/nagios/nagios-4.3.4.tar.gz
 +
tar zxf nagios-*.tar.gz
 +
cd nagios-*
 +
./configure --with-nagios-group=nagios --with-command-group=nagcmd
 +
make all
 +
sudo make install
 +
sudo make install-commandmode
 +
sudo make install-init
 +
sudo make install-config
 +
 
 +
* Enable the nagios conf
 +
 
 +
sudo /usr/bin/install -c -m 644 sample-config/httpd.conf /etc/apache2/sites-available/nagios.conf
 +
 
 +
* Add group nagcmd to www-data
 +
 
 +
sudo usermod -G nagcmd www-data
 +
 
 +
* Installing nrpe plugin
 +
 
 +
cd ~
 +
curl -L -O http://repos.rra.lan/nagios/nrpe-3.2.1.tar.gz
 +
tar zxf nrpe-*.tar.gz
 +
cd nrpe-*
 +
./configure
 +
make check_nrpe
 +
sudo make install-plugin
 +
 
 +
* Configure Nagios
 +
 
 +
sudo nano /usr/local/nagios/etc/nagios.cfg
 +
 
 +
Uncomment this line by deleting the # character from the front of the line:
 +
cfg_dir=/usr/local/nagios/etc/servers
 +
 
 +
 
 +
sudo mkdir /usr/local/nagios/etc/servers
 +
sudo nano /usr/local/nagios/etc/objects/contacts.cfg
 +
 
 +
Find the email directive and replace its value with your own email address.
 +
 
 +
 
 +
sudo nano /usr/local/nagios/etc/objects/commands.cfg
 +
 
 +
Add the following to the end of the file to define a new command called check_nrpe.
 +
define command{
 +
        command_name check_nrpe
 +
        command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
 +
}
 +
 
 +
sudo a2enmod rewrite
 +
sudo a2enmod cgi
 +
 
 +
 
 +
* Create nagios amdin
 +
 
 +
sudo htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
 +
 
 +
* Enable nagios conf
 +
 
 +
sudo ln -s /etc/apache2/sites-available/nagios.conf /etc/apache2/sites-enabled/
 +
 
 +
* Edit nagios conf
 +
 
 +
sudo nano /etc/apache2/sites-available/nagios.conf
 +
 
 +
Uncomment both occurrances by removing the # symbol.
 +
#  SSLRequireSSL
 +
 
 +
 
 +
* Restart Apache
 +
 
 +
sudo systemctl restart apache2
 +
 
 +
 
 +
* Create service
 +
 
 +
sudo nano /etc/systemd/system/nagios.service
 +
 
 +
 
 +
[Unit]
 +
Description=Nagios
 +
BindTo=network.target
 +
 +
[Install]
 +
WantedBy=multi-user.target
 +
 +
[Service]
 +
Type=simple
 +
User=nagios
 +
Group=nagios
 +
ExecStart=/usr/local/nagios/bin/nagios /usr/local/nagios/etc/nagios.cfg
 +
 
 +
 
 +
sudo systemctl enable /etc/systemd/system/nagios.service
 +
sudo systemctl start nagios
 +
 
 +
== Connect server to Nagios ==
 +
==== On the client ====
 +
 
 +
* Add user nagios
 +
  sudo useradd nagios
 +
 
 +
* Install system requirements
 +
sudo apt-get update
 +
sudo apt-get install build-essential libgd2-xpm-dev openssl libssl-dev unzip
 +
 
 +
* Install nagios-plugins
 +
cd ~
 +
curl -L -O http://repos.rra.lan/nagios/nagios-plugins-2.2.1.tar.gz
 +
tar zxf nagios-plugins-*.tar.gz
 +
cd nagios-plugins-*
 +
./configure --with-nagios-user=nagios --with-nagios-group=nagios --with-openssl
 +
make
 +
sudo make install
 +
 
 +
* Install nrpe
 +
cd ~
 +
curl -L -O http://repos.rra.lan/nagios/nrpe-3.2.1.tar.gz
 +
tar zxf nrpe-*.tar.gz
 +
cd nrpe-*
 +
./configure --enable-command-args --with-nagios-user=nagios --with-nagios-group=nagios --with-ssl=/usr/bin/openssl --with-ssl-lib=/usr  /lib/x86_64-linux-gnu
 +
make all
 +
sudo make install
 +
sudo make install-config
 +
sudo make install-init
 +
 
 +
sudo nano /usr/local/nagios/etc/nrpe.cfg
 +
 +
allowed_hosts=127.0.0.1,::1,horus.rra.lan
 +
 
 +
sudo systemctl start nrpe.service
 +
sudo systemctl status nrpe.service
 +
sudo systemctl enable nrpe.service
 +
 
 +
 
 +
* To add a new service (it is an example of ow to add a check_disk service):
 +
sudo nano /usr/local/nagios/etc/nrpe.cfg
 +
 
 +
server_address=[monitored_server_private_ip]
 +
command[check_vda1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/vda1
 +
sudo systemctl restart nrpe.service
 +
 
 +
==== On Nagios ====
 +
 
 +
On the server
 +
-------------
 +
sudo nano /usr/local/nagios/etc/servers/your_monitored_server_host_name.cfg
 +
 
 +
define host {
 +
        use                            linux-server  # linux-server |
 +
        host_name                      your_monitored_server_host_name # Host name from DNS
 +
        alias                          My client server  # Description
 +
        address                        your_monitored_server_private_ip # Host DNS or IP
 +
        max_check_attempts              5
 +
        check_period                    24x7
 +
        notification_interval          30
 +
        notification_period            24x7
 +
}
 +
 
 +
 
 +
define service {
 +
        use                            generic-service
 +
        host_name                      your_monitored_server_host_name
 +
        service_description            CPU load
 +
        check_command                  check_nrpe!check_load
 +
}
 +
 
 +
sudo systemctl restart nagios
 +
 
 +
 
 +
==== How to check nagios connection to a client ====
 +
/usr/local/nagios/libexec/check_nrpe -H [host]
 +
 
 +
== GitLab ==
 +
=== At the GitLab server ===
 +
<nowiki>cd /usr/local/nagios/libexec
 +
sudo -u nagios -H wget http://repos.rra.lan/nagios/check_scripts/check_gitlab
 +
sudo -u nagios -H wget http://repos.rra.lan/nagios/check_scripts/check_mem
 +
sudo chmod 744 check_gitlab
 +
sudo chmod 744 check_mem
 +
sudo apt install ruby dc</nowiki>
 +
 
 +
sudo nano /usr/local/nagios/etc/nrpe.cfg
 +
<nowiki>### GitLab ###
 +
command[check_mem]=/usr/local/nagios/libexec/check_mem -w 80 -c 90 -W 40 -C 60
 +
command[check_gitlab_health]=/usr/local/nagios/libexec/check_gitlab -m health -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check>
 +
command[check_gitlab_services]=/usr/local/nagios/libexec/check_gitlab -m services -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check>
 +
# command[check_gitlab_cipipeline]=/usr/local/nagios/libexec/check_gitlab -m ci-pipeline -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check>
 +
# command[check_gitlab_cirunner]=/usr/local/nagios/libexec/check_gitlab -m ci-runner -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check></nowiki>
 +
sudo service nrpe restart
 +
 
 +
=== At the Nagios server ===
 +
sudo nano /usr/local/nagios/etc/servers/gitlab.rra.lan.cfg
 +
<nowiki>define host {
 +
      use                            linux-server
 +
      host_name                      git.rra.lan
 +
      alias                          GitLab VPN
 +
      address                        git.rra.lan
 +
      max_check_attempts              5
 +
      check_period                    24x7
 +
      notification_interval          30
 +
      notification_period            24x7
 +
}
 +
 
 +
define service {
 +
        use                            generic-service
 +
        host_name                      git.rra.lan
 +
        service_description            GitLab Load
 +
        check_command                  check_nrpe!check_load
 +
}
 +
 
 +
define service {
 +
        use                            generic-service
 +
        host_name                      git.rra.lan
 +
        service_description            GitLab Root Partition
 +
        check_command                  check_nrpe!check_hd_root
 +
}
 +
 
 +
define service {
 +
        use                            generic-service
 +
        host_name                      git.rra.lan
 +
        service_description            GitLab Memory
 +
        check_command                  check_nrpe!check_mem
 +
}
 +
 
 +
define service {
 +
        use                            generic-service
 +
        host_name                      git.rra.lan
 +
        service_description            GitLab Swap
 +
        check_command                  check_nrpe!check_swap
 +
}
 +
 
 +
 
 +
define service {
 +
        use                            generic-service
 +
        host_name                      git.rra.lan
 +
        service_description            GitLab Health
 +
        check_command                  check_nrpe!check_gitlab_health
 +
}
 +
 
 +
define service {
 +
        use                            generic-service
 +
        host_name                      git.rra.lan
 +
        service_description            GitLab Health
 +
        check_command                  check_nrpe!check_gitlab_health
 +
}
 +
 
 +
define service {
 +
        use                            generic-service
 +
        host_name                      git.rra.lan
 +
        service_description            GitLab Services
 +
        check_command                  check_nrpe!check_gitlab_services
 +
}
 +
 
 +
</nowiki>
 +
sudo systemctl restart nagios
 +
 
 +
== ChiChi ==
 +
=== At ChiChi.rra.lan ===
 +
* Follow the Install nrpe instructions
 +
* As nagios user:
 +
<nowiki></nowiki>
 +
 
 +
== Sources ==
 +
https://www.digitalocean.com/community/tutorials/how-to-install-nagios-4-and-monitor-your-servers-on-ubuntu-16-04<br />
 +
https://gitlab.com/6uellerBpanda/check_gitlab<br />

Revision as of 09:14, 21 March 2018

https://www.digitalocean.com/community/tutorials/how-to-install-nagios-4-and-monitor-your-servers-on-ubuntu-16-04

Nagios

Configure Nagios server

  • Add users and groups
sudo useradd nagios
sudo groupadd nagcmd
sudo usermod -a -G nagcmd nagios
  • Install system requirements
sudo usermod -a -G nagcmd nagios
sudo apt-get install build-essential libgd2-xpm-dev openssl libssl-dev unzip
  • Install Nagios
cd ~
curl -L -O http://repos.rra.lan/nagios/nagios-4.3.4.tar.gz
tar zxf nagios-*.tar.gz
cd nagios-*
./configure --with-nagios-group=nagios --with-command-group=nagcmd
make all
sudo make install
sudo make install-commandmode
sudo make install-init
sudo make install-config
  • Enable the nagios conf
sudo /usr/bin/install -c -m 644 sample-config/httpd.conf /etc/apache2/sites-available/nagios.conf
  • Add group nagcmd to www-data
sudo usermod -G nagcmd www-data
  • Installing nrpe plugin
cd ~
curl -L -O http://repos.rra.lan/nagios/nrpe-3.2.1.tar.gz
tar zxf nrpe-*.tar.gz
cd nrpe-*
./configure
make check_nrpe
sudo make install-plugin
  • Configure Nagios
sudo nano /usr/local/nagios/etc/nagios.cfg
Uncomment this line by deleting the # character from the front of the line:
cfg_dir=/usr/local/nagios/etc/servers


sudo mkdir /usr/local/nagios/etc/servers
sudo nano /usr/local/nagios/etc/objects/contacts.cfg
Find the email directive and replace its value with your own email address.


sudo nano /usr/local/nagios/etc/objects/commands.cfg
Add the following to the end of the file to define a new command called check_nrpe.
define command{
       command_name check_nrpe
       command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
sudo a2enmod rewrite
sudo a2enmod cgi


  • Create nagios amdin
sudo htpasswd -c /usr/local/nagios/etc/htpasswd.users nagiosadmin
  • Enable nagios conf
sudo ln -s /etc/apache2/sites-available/nagios.conf /etc/apache2/sites-enabled/
  • Edit nagios conf
sudo nano /etc/apache2/sites-available/nagios.conf
Uncomment both occurrances by removing the # symbol.
#  SSLRequireSSL


  • Restart Apache
sudo systemctl restart apache2


  • Create service
sudo nano /etc/systemd/system/nagios.service


[Unit]
Description=Nagios
BindTo=network.target

[Install]
WantedBy=multi-user.target

[Service]
Type=simple
User=nagios
Group=nagios
ExecStart=/usr/local/nagios/bin/nagios /usr/local/nagios/etc/nagios.cfg


sudo systemctl enable /etc/systemd/system/nagios.service
sudo systemctl start nagios

Connect server to Nagios

On the client

  • Add user nagios
sudo useradd nagios
  • Install system requirements
sudo apt-get update
sudo apt-get install build-essential libgd2-xpm-dev openssl libssl-dev unzip
  • Install nagios-plugins
cd ~
curl -L -O http://repos.rra.lan/nagios/nagios-plugins-2.2.1.tar.gz
tar zxf nagios-plugins-*.tar.gz
cd nagios-plugins-*
./configure --with-nagios-user=nagios --with-nagios-group=nagios --with-openssl
make
sudo make install
  • Install nrpe
cd ~
curl -L -O http://repos.rra.lan/nagios/nrpe-3.2.1.tar.gz
tar zxf nrpe-*.tar.gz
cd nrpe-*
./configure --enable-command-args --with-nagios-user=nagios --with-nagios-group=nagios --with-ssl=/usr/bin/openssl --with-ssl-lib=/usr  /lib/x86_64-linux-gnu
make all
sudo make install
sudo make install-config
sudo make install-init
sudo nano /usr/local/nagios/etc/nrpe.cfg

allowed_hosts=127.0.0.1,::1,horus.rra.lan
sudo systemctl start nrpe.service
sudo systemctl status nrpe.service
sudo systemctl enable nrpe.service


  • To add a new service (it is an example of ow to add a check_disk service):
sudo nano /usr/local/nagios/etc/nrpe.cfg
server_address=[monitored_server_private_ip]
command[check_vda1]=/usr/lib/nagios/plugins/check_disk -w 20% -c 10% -p /dev/vda1
sudo systemctl restart nrpe.service

On Nagios

On the server


sudo nano /usr/local/nagios/etc/servers/your_monitored_server_host_name.cfg
define host {
       use                             linux-server  # linux-server | 
       host_name                       your_monitored_server_host_name # Host name from DNS
       alias                           My client server  # Description
       address                         your_monitored_server_private_ip # Host DNS or IP
       max_check_attempts              5
       check_period                    24x7
       notification_interval           30
       notification_period             24x7
}


define service {
       use                             generic-service
       host_name                       your_monitored_server_host_name
       service_description             CPU load
       check_command                   check_nrpe!check_load
}
sudo systemctl restart nagios


How to check nagios connection to a client

/usr/local/nagios/libexec/check_nrpe -H [host]

GitLab

At the GitLab server

cd /usr/local/nagios/libexec
sudo -u nagios -H wget http://repos.rra.lan/nagios/check_scripts/check_gitlab
sudo -u nagios -H wget http://repos.rra.lan/nagios/check_scripts/check_mem
sudo chmod 744 check_gitlab
sudo chmod 744 check_mem
sudo apt install ruby dc
sudo nano /usr/local/nagios/etc/nrpe.cfg
### GitLab ###
command[check_mem]=/usr/local/nagios/libexec/check_mem -w 80 -c 90 -W 40 -C 60 
command[check_gitlab_health]=/usr/local/nagios/libexec/check_gitlab -m health -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check>
command[check_gitlab_services]=/usr/local/nagios/libexec/check_gitlab -m services -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check>
# command[check_gitlab_cipipeline]=/usr/local/nagios/libexec/check_gitlab -m ci-pipeline -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check>
# command[check_gitlab_cirunner]=/usr/local/nagios/libexec/check_gitlab -m ci-runner -s https://localhost -k -t <token_from: https://git.rra.lan/admin/health_check>

sudo service nrpe restart

At the Nagios server

sudo nano /usr/local/nagios/etc/servers/gitlab.rra.lan.cfg

define host {
       use                             linux-server
       host_name                       git.rra.lan
       alias                           GitLab VPN
       address                         git.rra.lan
       max_check_attempts              5
       check_period                    24x7
       notification_interval           30
       notification_period             24x7
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Load
        check_command                   check_nrpe!check_load
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Root Partition
        check_command                   check_nrpe!check_hd_root
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Memory
        check_command                   check_nrpe!check_mem
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Swap
        check_command                   check_nrpe!check_swap
}


define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Health
        check_command                   check_nrpe!check_gitlab_health
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Health
        check_command                   check_nrpe!check_gitlab_health
}

define service {
        use                             generic-service
        host_name                       git.rra.lan
        service_description             GitLab Services
        check_command                   check_nrpe!check_gitlab_services
}


sudo systemctl restart nagios

ChiChi

At ChiChi.rra.lan

  • Follow the Install nrpe instructions
  • As nagios user:

Sources

https://www.digitalocean.com/community/tutorials/how-to-install-nagios-4-and-monitor-your-servers-on-ubuntu-16-04
https://gitlab.com/6uellerBpanda/check_gitlab