First somewhat working prototype - only monitoring
This commit is contained in:
commit
009f00a422
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
print_facts/
|
||||
roles/common
|
||||
debug.yml
|
||||
4
cluster.yml
Normal file
4
cluster.yml
Normal file
@ -0,0 +1,4 @@
|
||||
---
|
||||
- hosts: all
|
||||
roles:
|
||||
- monitoring
|
||||
7
host_vars/rock64.yml
Normal file
7
host_vars/rock64.yml
Normal file
@ -0,0 +1,7 @@
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
31363536343633356337316532313364373738373938386537623030353663356636643332306565
|
||||
6264633065623966323638366334316333373334363935300a666132353262326532616437653266
|
||||
33396338326266373662646133333539356366396133316262326266363962366236383639346366
|
||||
3336363035383931310a656138313033303662363935313039303837653233323265343832383935
|
||||
64633635623431623561633665616436616231306264353465353637343039363432636432333634
|
||||
3361633938323730333337353264363761326633383864303464
|
||||
7
host_vars/rockpro64.yml
Normal file
7
host_vars/rockpro64.yml
Normal file
@ -0,0 +1,7 @@
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
35336163336431326538313432323733383261653562323139363036663263653939633437323232
|
||||
3134393539633036383563643563656238626164376337660a613630306164396133633831306630
|
||||
34323831323631633064363634616530353730396238383031646333366463653231393638643462
|
||||
3662633431306238370a373663353966313162373937333238653838393739376334616135336133
|
||||
61393166373136343839383363613439633062646138656636643161366533636330393633343333
|
||||
6634326334303933306233613833616232376462306437663165
|
||||
5
hosts
Normal file
5
hosts
Normal file
@ -0,0 +1,5 @@
|
||||
[main]
|
||||
rock64 ansible_connection=local ansible_host=localhost ansible_user=doddo hostname=rock64_1
|
||||
|
||||
[worker]
|
||||
rockpro64 ansible_host=192.168.0.2 ansible_user=doddo hostname=rockpro64_4G
|
||||
1
roles/monitoring/README.md
Normal file
1
roles/monitoring/README.md
Normal file
@ -0,0 +1 @@
|
||||
See https://github.com/netdata/netdata/tree/master/packaging/installer/methods/ansible.md
|
||||
6
roles/monitoring/handlers/main.yml
Normal file
6
roles/monitoring/handlers/main.yml
Normal file
@ -0,0 +1,6 @@
|
||||
# Restart Netdata
|
||||
- name: Restart Netdata
|
||||
become: true
|
||||
service:
|
||||
name: netdata
|
||||
state: restarted
|
||||
31
roles/monitoring/tasks/claim.yml
Normal file
31
roles/monitoring/tasks/claim.yml
Normal file
@ -0,0 +1,31 @@
|
||||
---
|
||||
- name: Claim to Netdata Cloud
|
||||
block:
|
||||
|
||||
- name: Claim to Netdata Cloud if not already
|
||||
shell:
|
||||
cmd: netdata-claim.sh -token={{ claim_token }} -rooms={{ claim_rooms }} -url={{ claim_url }}
|
||||
creates: /var/lib/netdata/cloud.d/claimed_id
|
||||
become: yes
|
||||
|
||||
when: reclaim == false
|
||||
|
||||
- name: Re-claim a node to Netdata Cloud
|
||||
block:
|
||||
|
||||
- name: Ensure `uuidgen` is installed
|
||||
stat:
|
||||
path: /usr/bin/uuidgen
|
||||
register: uuidgen_result
|
||||
|
||||
- name: Fail if `uuidgen` is not installed
|
||||
fail:
|
||||
msg: The system needs `uuidgen` installed to enable re-claiming.
|
||||
when: uuidgen_result.stat.exists == false
|
||||
|
||||
- name: Reclaim the node with `-id=`
|
||||
shell: netdata-claim.sh -token={{ claim_token }} -rooms={{ claim_rooms }} -url={{ claim_url }} -id=$(uuidgen)
|
||||
when: uuidgen_result.stat.exists == true
|
||||
notify: Restart Netdata
|
||||
become: yes
|
||||
when: reclaim == true
|
||||
15
roles/monitoring/tasks/configure.yml
Normal file
15
roles/monitoring/tasks/configure.yml
Normal file
@ -0,0 +1,15 @@
|
||||
---
|
||||
- template:
|
||||
src: ../templates/netdata.conf.j2
|
||||
dest: /etc/netdata/netdata.conf
|
||||
owner: root
|
||||
group: root
|
||||
mode: u=wrx,g=rx,o=r,+x
|
||||
- template:
|
||||
src: ../templates/stream.conf.j2
|
||||
dest: /etc/netdata/stream.conf
|
||||
owner: root
|
||||
group: root
|
||||
mode: u=wrx,g=rx,o=r,+x
|
||||
notify: Restart Netdata
|
||||
become: true
|
||||
14
roles/monitoring/tasks/install.yml
Normal file
14
roles/monitoring/tasks/install.yml
Normal file
@ -0,0 +1,14 @@
|
||||
---
|
||||
- name: Download the installation script
|
||||
get_url:
|
||||
url: https://my-netdata.io/kickstart.sh
|
||||
dest: ~/kickstart.sh
|
||||
mode: +x
|
||||
|
||||
- name: Install Netdata
|
||||
command: ~/kickstart.sh --dont-wait
|
||||
|
||||
- name: Cleanup installation script
|
||||
file:
|
||||
path: ~/kickstart.sh
|
||||
state: absent
|
||||
16
roles/monitoring/tasks/main.yml
Normal file
16
roles/monitoring/tasks/main.yml
Normal file
@ -0,0 +1,16 @@
|
||||
---
|
||||
# Tasks file for Netdata
|
||||
- name: Install Netdata
|
||||
become: true
|
||||
become_method: sudo
|
||||
import_tasks: install.yml
|
||||
|
||||
- name: Configure Netdata
|
||||
become: true
|
||||
become_method: sudo
|
||||
import_tasks: configure.yml
|
||||
|
||||
- name: Claim the node to Netdata Cloud
|
||||
become: true
|
||||
become_method: sudo
|
||||
import_tasks: claim.yml
|
||||
21
roles/monitoring/templates/netdata.conf.j2
Normal file
21
roles/monitoring/templates/netdata.conf.j2
Normal file
@ -0,0 +1,21 @@
|
||||
# Netdata configuration
|
||||
|
||||
[global]
|
||||
{% if hostvars[inventory_hostname].hostname %}
|
||||
hostname = {{ hostvars[inventory_hostname].hostname }}
|
||||
{% endif %}
|
||||
dbengine multihost disk space = {{ dbengine_multihost_disk_space }}
|
||||
{% if 'worker' in hostvars[inventory_hostname].group_names %}
|
||||
memory mode = none
|
||||
{% endif %}
|
||||
|
||||
[web]
|
||||
mode = {{ 'none' if 'worker' in hostvars[inventory_hostname].group_names else 'static-threaded' }}
|
||||
{% if 'main' in hostvars[inventory_hostname].group_names %}
|
||||
bind to = localhost {{ hostvars['rock64']['ansible_facts']['end0']['ipv4']['address'] }}:19998=streaming
|
||||
{% endif %}
|
||||
|
||||
{% if 'worker' in hostvars[inventory_hostname].group_names %}
|
||||
[cloud]
|
||||
proxy = http://{{ hostvars['rock64']['ansible_facts']['end0']['ipv4']['address'] }}:3128
|
||||
{% endif %}
|
||||
265
roles/monitoring/templates/stream.conf.j2
Normal file
265
roles/monitoring/templates/stream.conf.j2
Normal file
@ -0,0 +1,265 @@
|
||||
# netdata configuration for aggregating data from remote hosts
|
||||
#
|
||||
# API keys authorize a pair of sending-receiving netdata servers.
|
||||
# Once their communication is authorized, they can exchange metrics for any
|
||||
# number of hosts.
|
||||
#
|
||||
# You can generate API keys, with the linux command: uuidgen
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 1. ON CHILD NETDATA - THE ONE THAT WILL BE SENDING METRICS
|
||||
|
||||
[stream]
|
||||
# Enable this on child nodes, to have them send metrics.
|
||||
enabled = {{ 'no' if 'main' in hostvars[inventory_hostname].group_names else 'yes' }}
|
||||
|
||||
# Where is the receiving netdata?
|
||||
# A space separated list of:
|
||||
#
|
||||
# [PROTOCOL:]HOST[%INTERFACE][:PORT][:SSL]
|
||||
#
|
||||
# If many are given, the first available will get the metrics.
|
||||
#
|
||||
# PROTOCOL = tcp, udp, or unix (only tcp and unix are supported by parent nodes)
|
||||
# HOST = an IPv4, IPv6 IP, or a hostname, or a unix domain socket path.
|
||||
# IPv6 IPs should be given with brackets [ip:address]
|
||||
# INTERFACE = the network interface to use (only for IPv6)
|
||||
# PORT = the port number or service name (/etc/services)
|
||||
# SSL = when this word appear at the end of the destination string
|
||||
# the Netdata will encrypt the connection with the parent.
|
||||
#
|
||||
# This communication is not HTTP (it cannot be proxied by web proxies).
|
||||
destination = {{ '' if 'main' in hostvars[inventory_hostname].group_names else 'tcp:' ~ hostvars['rock64']['ansible_facts']['end0']['ipv4']['address'] ~ ':19998' }}
|
||||
|
||||
# Skip Certificate verification?
|
||||
# The netdata child is configurated to avoid invalid SSL/TLS certificate,
|
||||
# so certificates that are self-signed or expired will stop the streaming.
|
||||
# Case the server certificate is not valid, you can enable the use of
|
||||
# 'bad' certificates setting the next option as 'yes'.
|
||||
#ssl skip certificate verification = yes
|
||||
|
||||
# Certificate Authority Path
|
||||
# OpenSSL has a default directory where the known certificates are stored.
|
||||
# In case it is necessary, it is possible to change this rule using the variable
|
||||
# "CApath", e.g. CApath = /etc/ssl/certs/
|
||||
#
|
||||
#CApath =
|
||||
|
||||
# Certificate Authority file
|
||||
# When the Netdata parent has a certificate that is not recognized as valid,
|
||||
# we can add it to the list of known certificates in "CApath" and give it to
|
||||
# Netdata as an argument, e.g. CAfile = /etc/ssl/certs/cert.pem
|
||||
#
|
||||
#CAfile =
|
||||
|
||||
# The API_KEY to use (as the sender)
|
||||
api key = {{ '' if 'main' in hostvars[inventory_hostname].group_names else api_key }}
|
||||
|
||||
# Stream Compression
|
||||
# The default is enabled
|
||||
# You can control stream compression in this agent with options: yes | no
|
||||
#enable compression = yes
|
||||
|
||||
# The timeout to connect and send metrics
|
||||
timeout seconds = 60
|
||||
|
||||
# If the destination line above does not specify a port, use this
|
||||
default port = 19999
|
||||
|
||||
# filter the charts to be streamed
|
||||
# netdata SIMPLE PATTERN:
|
||||
# - space separated list of patterns (use \ to include spaces in patterns)
|
||||
# - use * as wildcard, any number of times within each pattern
|
||||
# - prefix a pattern with ! for a negative match (ie not stream the charts it matches)
|
||||
# - the order of patterns is important (left to right)
|
||||
# To send all except a few, use: !this !that * (ie append a wildcard pattern)
|
||||
send charts matching = *
|
||||
|
||||
# The buffer to use for sending metrics.
|
||||
# 10MB is good for 60 seconds of data, so increase this if you expect latencies.
|
||||
# The buffer is flushed on reconnects (this will not prevent gaps at the charts).
|
||||
buffer size bytes = 10485760
|
||||
|
||||
# If the connection fails, or it disconnects,
|
||||
# retry after that many seconds.
|
||||
reconnect delay seconds = 5
|
||||
|
||||
# Sync the clock of the charts for that many iterations, when starting.
|
||||
# It is ignored when replication is enabled
|
||||
initial clock resync iterations = 60
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 2. ON PARENT NETDATA - THE ONE THAT WILL BE RECEIVING METRICS
|
||||
|
||||
# You can have one API key per child,
|
||||
# or the same API key for all child nodes.
|
||||
#
|
||||
# netdata searches for options in this order:
|
||||
#
|
||||
# a) parent netdata settings (netdata.conf)
|
||||
# b) [stream] section (above)
|
||||
# c) [API_KEY] section (below, settings for the API key)
|
||||
# d) [MACHINE_GUID] section (below, settings for each machine)
|
||||
#
|
||||
# You can combine the above (the more specific setting will be used).
|
||||
|
||||
# API key authentication
|
||||
# If the key is not listed here, it will not be able to push metrics.
|
||||
|
||||
# [API_KEY] is [YOUR-API-KEY], i.e [11111111-2222-3333-4444-555555555555]
|
||||
{{ '[' ~ api_key ~ ']' if 'main' in hostvars[inventory_hostname].group_names else '[API_KEY]' }}
|
||||
# Default settings for this API key
|
||||
|
||||
# This GUID is to be used as an API key from remote agents connecting
|
||||
# to this machine. Failure to match such a key, denies access.
|
||||
# YOU MUST SET THIS FIELD ON ALL API KEYS.
|
||||
type = api
|
||||
|
||||
# You can disable the API key, by setting this to: no
|
||||
# The default (for unknown API keys) is: no
|
||||
enabled = {{ 'yes' if 'main' in hostvars[inventory_hostname].group_names else 'no' }}
|
||||
|
||||
# A list of simple patterns matching the IPs of the servers that
|
||||
# will be pushing metrics using this API key.
|
||||
# The metrics are received via the API port, so the same IPs
|
||||
# should also be matched at netdata.conf [web].allow connections from
|
||||
allow from = *
|
||||
|
||||
# The default history in entries, for all hosts using this API key.
|
||||
# You can also set it per host below.
|
||||
# For the default db mode (dbengine), this is ignored.
|
||||
#default history = 3600
|
||||
|
||||
# The default memory mode to be used for all hosts using this API key.
|
||||
# You can also set it per host below.
|
||||
# If you don't set it here, the memory mode of netdata.conf will be used.
|
||||
# Valid modes:
|
||||
# save save on exit, load on start
|
||||
# map like swap (continuously syncing to disks - you need SSD)
|
||||
# ram keep it in RAM, don't touch the disk
|
||||
# none no database at all (use this on headless proxies)
|
||||
# dbengine like a traditional database
|
||||
{{ 'default memory mode = dbengine' if 'main' in hostvars[inventory_hostname].group_names else '' }}
|
||||
|
||||
# Shall we enable health monitoring for the hosts using this API key?
|
||||
# 3 possible values:
|
||||
# yes enable alarms
|
||||
# no do not enable alarms
|
||||
# auto enable alarms, only when the sending netdata is connected.
|
||||
# Health monitoring will be disabled as soon as the connection is closed.
|
||||
# You can also set it per host, below.
|
||||
# The default is taken from [health].enabled of netdata.conf
|
||||
#health enabled by default = auto
|
||||
|
||||
# postpone alarms for a short period after the sender is connected
|
||||
default postpone alarms on connect seconds = 60
|
||||
|
||||
# seconds of health log events to keep
|
||||
#default health log history = 432000
|
||||
|
||||
# need to route metrics differently? set these.
|
||||
# the defaults are the ones at the [stream] section (above)
|
||||
#default proxy enabled = yes | no
|
||||
#default proxy destination = IP:PORT IP:PORT ...
|
||||
#default proxy api key = API_KEY
|
||||
#default proxy send charts matching = *
|
||||
|
||||
# Stream Compression
|
||||
# By default it is enabled.
|
||||
# You can control stream compression in this parent agent stream with options: yes | no
|
||||
#enable compression = yes
|
||||
|
||||
# select the order the compression algorithms will be used, when multiple are offered by the child
|
||||
#compression algorithms order = zstd lz4 brotli gzip
|
||||
|
||||
# Replication
|
||||
# Enable replication for all hosts using this api key. Default: enabled
|
||||
#enable replication = yes
|
||||
|
||||
# How many seconds to replicate from each child. Default: a day
|
||||
#seconds to replicate = 86400
|
||||
|
||||
# The duration we want to replicate per each step.
|
||||
#replication_step = 600
|
||||
|
||||
# Indicate whether this child is an ephemeral node. An ephemeral node will become unavailable
|
||||
# after the specified duration of "cleanup ephemeral hosts after secs" (as defined in the db section of netdata.conf)
|
||||
# from the time of the node's last connection.
|
||||
#is ephemeral node = false
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 3. PER SENDING HOST SETTINGS, ON PARENT NETDATA
|
||||
# THIS IS OPTIONAL - YOU DON'T HAVE TO CONFIGURE IT
|
||||
|
||||
# This section exists to give you finer control of the parent settings for each
|
||||
# child host, when the same API key is used by many netdata child nodes / proxies.
|
||||
#
|
||||
# Each netdata has a unique GUID - generated the first time netdata starts.
|
||||
# You can find it at /var/lib/netdata/registry/netdata.public.unique.id
|
||||
# (at the child).
|
||||
#
|
||||
# The host sending data will have one. If the host is not ephemeral,
|
||||
# you can give settings for each sending host here.
|
||||
|
||||
[MACHINE_GUID]
|
||||
# This GUID is to be used as a MACHINE GUID from remote agents connecting
|
||||
# to this machine, not an API key.
|
||||
# YOU MUST SET THIS FIELD ON ALL MACHINE GUIDs.
|
||||
type = machine
|
||||
|
||||
# enable this host: yes | no
|
||||
# When disabled, the parent will not receive metrics for this host.
|
||||
# THIS IS NOT A SECURITY MECHANISM - AN ATTACKER CAN SET ANY OTHER GUID.
|
||||
# Use only the API key for security.
|
||||
enabled = no
|
||||
|
||||
# A list of simple patterns matching the IPs of the servers that
|
||||
# will be pushing metrics using this MACHINE GUID.
|
||||
# The metrics are received via the API port, so the same IPs
|
||||
# should also be matched at netdata.conf [web].allow connections from
|
||||
# and at stream.conf [API_KEY].allow from
|
||||
allow from = *
|
||||
|
||||
# The number of entries in the database.
|
||||
# This is ignored for db mode dbengine.
|
||||
#history = 3600
|
||||
|
||||
# The memory mode of the database: save | map | ram | none | dbengine
|
||||
#memory mode = dbengine
|
||||
|
||||
# Health / alarms control: yes | no | auto
|
||||
#health enabled = auto
|
||||
|
||||
# postpone alarms when the sender connects
|
||||
postpone alarms on connect seconds = 60
|
||||
|
||||
# seconds of health log events to keep
|
||||
#health log history = 432000
|
||||
|
||||
# need to route metrics differently?
|
||||
# the defaults are the ones at the [API KEY] section
|
||||
#proxy enabled = yes | no
|
||||
#proxy destination = IP:PORT IP:PORT ...
|
||||
#proxy api key = API_KEY
|
||||
#proxy send charts matching = *
|
||||
|
||||
# Stream Compression
|
||||
# By default, enabled.
|
||||
# You can control stream compression in this parent agent stream with options: yes | no
|
||||
#enable compression = yes
|
||||
|
||||
# Replication
|
||||
# Enable replication for all hosts using this api key.
|
||||
#enable replication = yes
|
||||
|
||||
# How many seconds to replicate from each child.
|
||||
#seconds to replicate = 86400
|
||||
|
||||
# The duration we want to replicate per each step.
|
||||
#replication_step = 600
|
||||
|
||||
# Indicate whether this child is an ephemeral node. An ephemeral node will become unavailable
|
||||
# after the specified duration of "cleanup ephemeral hosts after secs" (as defined in the db section of netdata.conf)
|
||||
# from the time of the node's last connection.
|
||||
#is ephemeral node = false
|
||||
27
roles/monitoring/vars/main/main.yml
Normal file
27
roles/monitoring/vars/main/main.yml
Normal file
@ -0,0 +1,27 @@
|
||||
---
|
||||
# Variables for Netdata
|
||||
|
||||
# Set Netdata Cloud claiming details. To find your `claim_token` and
|
||||
# `claim_room`, go to Netdata Cloud, then click on your Space's name in the top
|
||||
# navigation, then click on `Manage your Space`. Click on the `Nodes` tab in the
|
||||
# panel that appears, which displays a script with `token` and `room` strings.
|
||||
# Copy those strings into the variables below. `claim_url` should be
|
||||
# `https://app.netdata.cloud`. Read more:
|
||||
# https://learn.netdata.cloud/docs/agent/claim
|
||||
claim_url: https://app.netdata.cloud
|
||||
|
||||
# Force re-claiming of nodes to Netdata Cloud. Read more:
|
||||
# https://learn.netdata.cloud/docs/agent/claim#remove-and-reclaim-a-node
|
||||
reclaim: false
|
||||
|
||||
# Set Netdata's metrics retention policy via the disk size for the database
|
||||
# engine. Value is in MiB. Read more:
|
||||
# https://learn.netdata.cloud/docs/store/change-metrics-storage
|
||||
dbengine_multihost_disk_space: 2048
|
||||
|
||||
# Set whether to run the Agent web server/dashboard/API, or disable them.
|
||||
# Because we're connecting this node to Netdata Cloud and will view dashboards
|
||||
# there, we'll set this to `none` to disable the local dashboard. Set to
|
||||
# `static-threaded` if you want to keep it running. Read more:
|
||||
# https://learn.netdata.cloud/docs/configure/secure-nodes
|
||||
web_mode: none
|
||||
29
roles/monitoring/vars/main/vault.yml
Normal file
29
roles/monitoring/vars/main/vault.yml
Normal file
@ -0,0 +1,29 @@
|
||||
claim_token: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
62653264633831346161393763666666636535386239636231393831353130633138313666336435
|
||||
6530306263613836356163376537393165633963376563390a316164373033373162646266613164
|
||||
39646237323830626539386231313435393131363239376538383732646636303439616132353266
|
||||
6634386563383837630a626266623337353932316666366538323835663136633930623636333131
|
||||
66636537363731313232626666323264366464343261333633333233326165663434353136623334
|
||||
35323261613866643139303432646537376132656237323462396237346166306666653531616462
|
||||
61663864656130386562623136613166303462666237333230343132363864306165623631373034
|
||||
63323666383362326431323539363633346464626163666435363236316439366338336339646636
|
||||
62336633386438653834653361326462383234386466663335633064663638666461666365363461
|
||||
30353735376566323861663431396164646665323563393363663637653134346130343336363631
|
||||
336164663430653563353835336464346530
|
||||
claim_rooms: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
66396339663662373339323136386635306130656365343263666235653630636663336233383162
|
||||
6335643763383037386636376362656565383365626435370a623266636136356334396335306135
|
||||
31346162346662383033373031653766356436343037353534383939396163333739633964636463
|
||||
6663343665303562330a313961626165333762646136356131333466643364373038353735346462
|
||||
65366533353733333264383534653734663932643765393863623934316461383034666137653366
|
||||
3033636130363731343337643763336536663437343865386131
|
||||
api_key: !vault |
|
||||
$ANSIBLE_VAULT;1.1;AES256
|
||||
64626631376635366130646332386139646661386538653737383632303732653735613766613664
|
||||
3133623739643763386537383537623837343762376265370a663837653135363732313231626664
|
||||
33333765663039313866303665623663363062646432343539383434633631303239306664636537
|
||||
3835643563663638360a636636393130656463353563343233373864356266363564663735373934
|
||||
65326233656166386638616564373266393434623261653037353435373133663261353233353832
|
||||
3234353835616133396565646439653363303133613932633065
|
||||
Loading…
x
Reference in New Issue
Block a user