From 693184bd7c55b2d3dcbcf2332b73617b47e5edd3 Mon Sep 17 00:00:00 2001 From: Saibotk Date: Wed, 14 Aug 2024 02:04:32 +0200 Subject: [PATCH] refactor!(monitoring): Replace monitoring with monitoring_ng Not much changed regarding variable names, only the role name changed. --- playbooks/monitoring.yml | 42 +-- roles/monitoring/README.md | 37 -- roles/monitoring/defaults/main.yml | 174 +++++---- roles/monitoring/handlers/main.yml | 47 +++ roles/monitoring/meta/main.yml | 49 +-- roles/monitoring/tasks/influxdb/database.yml | 49 --- roles/monitoring/tasks/influxdb/main.yml | 55 --- roles/monitoring/tasks/main.yml | 347 ++++++++++++++---- roles/monitoring/templates/docker-compose.yml | 143 -------- .../templates/grafana/grafana.caddy.j2 | 24 ++ .../templates/grafana/grafana.container.j2 | 40 ++ .../templates/grafana/grafana.ini.j2 | 103 ++++++ .../grafana/image-renderer.container.j2 | 36 ++ .../templates/loki/datasource.yml.j2 | 15 + .../templates/loki/loki-config.yml.j2 | 102 +++++ roles/monitoring/templates/loki/loki.caddy.j2 | 42 +++ .../templates/loki/loki.container.j2 | 40 ++ .../templates/prometheus/datasource.yml.j2 | 21 ++ .../templates/prometheus/prometheus.caddy.j2 | 42 +++ .../prometheus/prometheus.container.j2 | 47 +++ .../templates/prometheus/prometheus.yml.j2 | 9 + 21 files changed, 977 insertions(+), 487 deletions(-) delete mode 100644 roles/monitoring/README.md create mode 100644 roles/monitoring/handlers/main.yml delete mode 100644 roles/monitoring/tasks/influxdb/database.yml delete mode 100644 roles/monitoring/tasks/influxdb/main.yml delete mode 100644 roles/monitoring/templates/docker-compose.yml create mode 100644 roles/monitoring/templates/grafana/grafana.caddy.j2 create mode 100644 roles/monitoring/templates/grafana/grafana.container.j2 create mode 100644 roles/monitoring/templates/grafana/grafana.ini.j2 create mode 100644 roles/monitoring/templates/grafana/image-renderer.container.j2 create mode 100644 roles/monitoring/templates/loki/datasource.yml.j2 create mode 100644 roles/monitoring/templates/loki/loki-config.yml.j2 create mode 100644 roles/monitoring/templates/loki/loki.caddy.j2 create mode 100644 roles/monitoring/templates/loki/loki.container.j2 create mode 100644 roles/monitoring/templates/prometheus/datasource.yml.j2 create mode 100644 roles/monitoring/templates/prometheus/prometheus.caddy.j2 create mode 100644 roles/monitoring/templates/prometheus/prometheus.container.j2 create mode 100644 roles/monitoring/templates/prometheus/prometheus.yml.j2 diff --git a/playbooks/monitoring.yml b/playbooks/monitoring.yml index d590ddd..8a8b7cb 100644 --- a/playbooks/monitoring.yml +++ b/playbooks/monitoring.yml @@ -1,33 +1,17 @@ ---- -# Infrastructure -# Ansible instructions to deploy the infrastructure -# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern -# Copyright (C) 2020 Saibotk -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . +- name: Install Monitoring Suite with Grafana, Loki and Prometheus. -- name: Install & configure monitoring servers hosts: monitoring - roles: - - docker - - docker_cleanup - - traefik - - monitoring -- name: Install & configure monitoring clients - hosts: all - serial: 1 roles: - - docker - - docker_cleanup - - telegraf + - role: podman + become: true + tags: + - always + - podman + - role: caddy + become: true + tags: + - always + - caddy + - role: monitoring + become: true diff --git a/roles/monitoring/README.md b/roles/monitoring/README.md deleted file mode 100644 index ba38666..0000000 --- a/roles/monitoring/README.md +++ /dev/null @@ -1,37 +0,0 @@ -Monitoring -========= - -This will setup an [InfluxDB](https://www.influxdata.com/products/influxdb-overview/) and a [Grafana](https://grafana.com) instance using their official docker container and traefik as a reverse proxy. This also will install a grafana-renderer, which is used to render graphs to be sent via alerts etc. - -Requirements ------------- - -> NOTE: You need the `python-influxdb` package installed locally with the Ansible client, to run this role! - -You will need to have docker, docker-compose and traefik installed or declared as dependencies with their respective roles. - -**This role assumes that you have setup traefik with an endpoint called `websecure`.** - -Role Variables --------------- - -**Please look at the [defaults/main.yml](defaults/main.yml) for all available variables and their description.** - -**Note: Lines that are commented out via `#` are usually still valid/used variables, but they are not defined by default, so they might enable a feature, when uncommenting/defining them!** - -### Global variables, that are used: - -- `proxy_network`: Defined by the local traefik installation, this is the shared proxy network used by traefik to reach the containers. (optional) -- `proxy_hiddenservice`: Defined by the local traefik installation, this is used to generate the alt-svc header for the alternative Tor domain. (optional) - -Dependencies ------------- - -- docker -- docker-compose -- traefik - -License -------- - -GPL-3.0-only diff --git a/roles/monitoring/defaults/main.yml b/roles/monitoring/defaults/main.yml index c2a5fdd..4dfa2fa 100644 --- a/roles/monitoring/defaults/main.yml +++ b/roles/monitoring/defaults/main.yml @@ -1,54 +1,60 @@ ---- -# Default variables for the monitoring role +monitoring_install_dir: "/srv/monitoring" -# Infrastructure -# Ansible instructions to deploy the infrastructure -# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern -# Copyright (C) 2020 Saibotk -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . +monitoring_grafana_selinux_level: "{{ omit }}" +monitoring_loki_selinux_level: "{{ omit }}" +monitoring_prometheus_selinux_level: "{{ omit }}" -# Install locations -monitoring_install_location: "/srv/monitoring" -monitoring_grafana_location: "{{ monitoring_install_location }}/grafana" -monitoring_influxdb_location: "{{ monitoring_install_location }}/influxdb" - -# The certresolver for traefik to use on this domain -monitoring_traefik_certresolver: letsencrypt_http - -# The domain that traefik should serve grafana on -monitoring_grafana_domain: "grafana.example.com" - -# The domain traefik should serve influxdb on -monitoring_influxdb_domain: influxdb.example.com - -# The grafana version +monitoring_grafana_containerimage: "docker.io/grafana/grafana" # renovate: depName=docker.io/grafana/grafana -monitoring_grafana_version: 11.1.3 +monitoring_grafana_image_tag: "10.4.1" -# The influxdb version -# renovate: depName=docker.io/library/influxdb -monitoring_influxdb_version: 1.8.10 +monitoring_loki_containerimage: "docker.io/grafana/loki" +# renovate: depName=docker.io/grafana/loki +monitoring_loki_image_tag: "2.9.6" -# The influxdb/grafana image tag -monitoring_influxdb_image_version: "{{ monitoring_influxdb_version }}" -monitoring_grafana_image_version: "{{ monitoring_grafana_version }}" +monitoring_prometheus_containerimage: quay.io/prometheus/prometheus +# renovate: depName=quay.io/prometheus/prometheus +monitoring_prometheus_image_tag: "v2.51.1" -# Additional plugins, that should be installed (see https://grafana.com/docs/grafana/latest/installation/docker/) -monitoring_grafana_plugins: [] -monitoring_grafana_renderer_enabled: true +monitoring_image_renderer_containerimage: docker.io/grafana/grafana-image-renderer +# renovate: depName=docker.io/grafana/grafana-image-renderer +monitoring_image_renderer_image_tag: "3.10.1" -# OAuth settings for grafana +# These settings allow resource management of the container workload. +# While memory_high and memory_low are quite straightforward, swap_max is sadly not. +# Memory_high and memory_low allow to 'directly' set the corresponding cgroupv2 setting, +# while swap_max sets swap.max to `memory_high - swap_max`. +# Special values: memory_high = 0 and swap_max = -1 set no limit and unlimited swap respectively. +# A unit can be appended to all these. +# A unit can be b (bytes), k (kibibytes), m (mebibytes), or g (gibibytes). +monitoring_grafana_memory_low: 128m +monitoring_grafana_memory_high: 0 +monitoring_grafana_swap_max: -1 + +monitoring_loki_memory_low: 256m +monitoring_loki_memory_high: 0 +monitoring_loki_swap_max: -1 + +monitoring_prometheus_memory_low: 256m +monitoring_prometheus_memory_high: 0 +monitoring_prometheus_swap_max: -1 + +monitoring_image_renderer_memory_low: 256m +monitoring_image_renderer_memory_high: 0 +monitoring_image_renderer_swap_max: -1 + +monitoring_grafana_domain: "grafana.example.com" +monitoring_grafana_loglevel: "info" + +monitoring_loki_domain: "loki.example.com" +# Supported values [debug, info, warn, error] +monitoring_loki_loglevel: "info" + +monitoring_prometheus_domain: "prometheus.example.com" +# Supported values [debug, info, warn, error] +monitoring_prometheus_loglevel: "info" + +### Grafana specific options ### monitoring_grafana_oauth: enabled: false name: "OAuth" @@ -59,30 +65,66 @@ monitoring_grafana_oauth: api_url: "https://auth.example.com/auth/realms/sso/protocol/openid-connect/userinfo" client_id: "grafana" client_secret: "something-secret123" + allow_assign_grafana_admin: false -# Additional feature toggles to enable (See https://grafana.com/docs/grafana/latest/administration/configuration/#feature_toggles) monitoring_grafana_feature_toggles: [] -# The influxdb admin credentials that should be created -monitoring_influxdb_admin_username: "admin" -monitoring_influxdb_admin_password: "{{ lookup('passwordstore', monitoring_influxdb_domain + '/db-admin create=true length=42') }}" +# When true this will remove all alerting provisioning files not managed by this ansible role. +# This won't make any backups so be warned. +monitoring_grafana_remove_unmanaged_alerting_files: false -# The influxdb databases that should be created and their policies -monitoring_influxdb_databases: - - name: "telegraf" - policies: - - name: autogen - duration: 30d - replication: 1 - default: "true" - - name: logs - duration: 14d - replication: 1 +monitoring_loki_schema_config: + configs: + - from: "2023-11-30" + store: tsdb + object_store: filesystem + schema: v12 + index: + prefix: index_ + period: 24h + chunks: + prefix: chunks_ + period: 24h -# Other influxdb users, that should be created -monitoring_influxdb_users: - - username: "telegraf" - password: "{{ lookup('passwordstore', monitoring_influxdb_domain + '/db create=true length=42') }}" - grants: - - database: "telegraf" - privilege: "ALL" +monitoring_loki_retention_period: 15d + +# `hashed_password` has to be hashed using md5, sha1 or BCrypt +# e.g. using `mkpasswd --method=bcrypt --stdin` +# e.g. using `htpasswd -Bin ` +# Ref.: https://caddyserver.com/docs/caddyfile/directives/basicauth +monitoring_loki_basic_auth: [] +# - username: "{{ }}" +# hashed_password: "{{ }}" + +### Prometheus specific options ### + +# Prometheus native TLS and basic auth is experimental. So we are using caddy (for now). +# `hashed_password` has to be hashed using md5, sha1 or BCrypt +# e.g. using `mkpasswd --method=bcrypt --stdin` +# e.g. using `htpasswd -Bin ` +# Ref.: https://caddyserver.com/docs/caddyfile/directives/basicauth +monitoring_prometheus_basic_auth: [] +# - username: "{{ }}" +# hashed_password: "{{ }}" + +monitoring_prometheus_retention_time: 15d +monitoring_prometheus_write_receiver_enable: false + +monitoring_prometheus_scrape_configs: + - job_name: prometheus + static_configs: + - targets: ["prometheus:9090"] + + - job_name: grafana + static_configs: + - targets: ["grafana:3000"] + + - job_name: loki + static_configs: + - targets: ["loki:3100"] + + - job_name: file_configs + file_sd_configs: + - files: + - /etc/prometheus/file_configs/*.yml + - /etc/prometheus/file_configs/*.json diff --git a/roles/monitoring/handlers/main.yml b/roles/monitoring/handlers/main.yml new file mode 100644 index 0000000..9309f31 --- /dev/null +++ b/roles/monitoring/handlers/main.yml @@ -0,0 +1,47 @@ +- name: Apply new SELinux file context to filesystem. + ansible.builtin.command: "restorecon -irF {{ monitoring_install_dir }}" + become: true + changed_when: true + listen: "monitoring selinux context changed" + +- name: Restart grafana + ansible.builtin.systemd: + state: restarted + daemon_reload: true + name: grafana.service + listen: + - "grafana config changed" + - "grafana container definition changed" + - "grafana datasources changed" + - "grafana alert provisioning changed" + - "monitoring selinux context changed" + +- name: Restart grafana image renderer + ansible.builtin.systemd: + state: restarted + daemon_reload: true + name: image-renderer.service + listen: + - "image-renderer config changed" + - "image-renderer container definition changed" + - "monitoring selinux context changed" + +- name: Restart loki + ansible.builtin.systemd: + state: restarted + daemon_reload: true + name: loki.service + listen: + - "loki config changed" + - "loki container definition changed" + - "monitoring selinux context changed" + +- name: Restart prometheus + ansible.builtin.systemd: + state: restarted + daemon_reload: true + name: prometheus.service + listen: + - "prometheus config changed" + - "prometheus container definition changed" + - "monitoring selinux context changed" diff --git a/roles/monitoring/meta/main.yml b/roles/monitoring/meta/main.yml index 8d8de48..2dfe7e9 100644 --- a/roles/monitoring/meta/main.yml +++ b/roles/monitoring/meta/main.yml @@ -1,44 +1,23 @@ galaxy_info: - author: saibotk - description: "Deploys an influxdb and grafana via docker and traefik." + author: histalek + description: > + Deploy monitoring stack with podman and systemd. + The monitoring stack consists of Grafana, Prometheus, Loki and the grafana image renderer. + + issue_tracker_url: https://git.histalek.de/histalek-de/infrastructure/-/issues + license: GPL-3.0-only - min_ansible_version: "2.9" - standalone: true + + min_ansible_version: "2.10" platforms: - - name: EL - versions: - - all - - name: GenericUNIX - versions: - - all - name: Fedora versions: - - all - - name: opensuse - versions: - - all - - name: GenericBSD - versions: - - all - - name: FreeBSD - versions: - - all - - name: Ubuntu - versions: - - all - - name: SLES - versions: - - all - - name: GenericLinux - versions: - - all - - name: Debian - versions: - - all + - "38" + - "39" + + standalone: true galaxy_tags: [] -dependencies: - - role: docker - - role: traefik +dependencies: [] diff --git a/roles/monitoring/tasks/influxdb/database.yml b/roles/monitoring/tasks/influxdb/database.yml deleted file mode 100644 index 6e998bf..0000000 --- a/roles/monitoring/tasks/influxdb/database.yml +++ /dev/null @@ -1,49 +0,0 @@ ---- -# Task file influxdb/database.yml for the monitoring role - -# Infrastructure -# Ansible instructions to deploy the infrastructure -# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -- name: Create database - community.general.influxdb_database: - state: present - login_username: "{{ monitoring_influxdb_admin_username }}" - login_password: "{{ monitoring_influxdb_admin_password }}" - hostname: "{{ monitoring_influxdb_domain }}" - port: 443 - ssl: true - validate_certs: true - database_name: "{{ database.name }}" - delegate_to: 127.0.0.1 - no_log: true - -- name: Create retention policies - community.general.influxdb_retention_policy: - login_username: "{{ monitoring_influxdb_admin_username }}" - login_password: "{{ monitoring_influxdb_admin_password }}" - hostname: "{{ monitoring_influxdb_domain }}" - port: 443 - ssl: true - validate_certs: true - database_name: "{{ database.name }}" - policy_name: "{{ policy.name }}" - duration: "{{ policy.duration }}" - replication: "{{ policy.replication }}" - default: "{{ policy.default | default(omit) }}" - loop: "{{ database.policies }}" - loop_control: - loop_var: "policy" - delegate_to: 127.0.0.1 diff --git a/roles/monitoring/tasks/influxdb/main.yml b/roles/monitoring/tasks/influxdb/main.yml deleted file mode 100644 index ea1a88a..0000000 --- a/roles/monitoring/tasks/influxdb/main.yml +++ /dev/null @@ -1,55 +0,0 @@ ---- -# Task file influxdb/main.yml for the monitoring role - -# Infrastructure -# Ansible instructions to deploy the infrastructure -# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -- name: Create admin user - community.general.influxdb_user: - state: present - login_username: "{{ monitoring_influxdb_admin_username }}" - login_password: "{{ monitoring_influxdb_admin_password }}" - hostname: "{{ monitoring_influxdb_domain }}" - port: 443 - ssl: true - validate_certs: true - user_name: "{{ monitoring_influxdb_admin_username }}" - user_password: "{{ monitoring_influxdb_admin_password }}" - admin: true - delegate_to: 127.0.0.1 - no_log: true - -- name: Configure databases - ansible.builtin.include_tasks: database.yml - loop: "{{ monitoring_influxdb_databases }}" - loop_control: - loop_var: "database" - -- name: Create database user - community.general.influxdb_user: - state: present - login_username: "{{ monitoring_influxdb_admin_username }}" - login_password: "{{ monitoring_influxdb_admin_password }}" - hostname: "{{ monitoring_influxdb_domain }}" - port: 443 - ssl: true - validate_certs: true - user_name: "{{ item.username }}" - user_password: "{{ item.password }}" - grants: "{{ item.grants }}" # noqa args[module] This has to be escaped to a string - loop: "{{ monitoring_influxdb_users }}" - delegate_to: 127.0.0.1 - no_log: true diff --git a/roles/monitoring/tasks/main.yml b/roles/monitoring/tasks/main.yml index 060af25..a5ea6c5 100644 --- a/roles/monitoring/tasks/main.yml +++ b/roles/monitoring/tasks/main.yml @@ -1,92 +1,293 @@ ---- -# Tasks file for the monitoring role - -# Infrastructure -# Ansible instructions to deploy the infrastructure -# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - name: Update default SELinux contexts community.general.sefcontext: - target: "{{ item }}(/.*)?" + target: "{{ item.target }}(/.*)?" setype: "container_file_t" + selevel: "{{ item.selevel }}" state: present - with_items: - - "{{ monitoring_grafana_location }}" - - "{{ monitoring_influxdb_location }}" + loop: + - target: "{{ monitoring_install_dir }}/grafana" + selevel: "{{ monitoring_grafana_selinux_level }}" + - target: "{{ monitoring_install_dir }}/loki" + selevel: "{{ monitoring_loki_selinux_level }}" + - target: "{{ monitoring_install_dir }}/prometheus" + selevel: "{{ monitoring_prometheus_selinux_level }}" become: true + notify: "monitoring selinux context changed" -- name: Create install directory +- name: Create monitoring directories. ansible.builtin.file: path: "{{ item }}" + owner: "root" + group: "root" state: directory mode: "0700" - owner: "root" - group: "root" - with_items: - - "{{ monitoring_install_location }}" + loop: + - "{{ monitoring_install_dir }}" become: true -- name: Create grafana directory - ansible.builtin.file: - path: "{{ item }}" - state: directory - mode: "0750" - owner: "472" - group: "472" - setype: "container_file_t" - with_items: - - "{{ monitoring_grafana_location }}" - become: true +- name: Ensure monitoring directories and config files exist. + block: + - name: Stat grafana data directory. + ansible.builtin.stat: + path: "{{ monitoring_install_dir }}/grafana/data" + become: true + register: monitoring_grafana_stat_dir -- name: Create influxdb directory - ansible.builtin.file: - path: "{{ item }}" - state: directory - mode: "0750" - owner: "root" - group: "root" - setype: "container_file_t" - with_items: - - "{{ monitoring_influxdb_location }}" - become: true + - name: Stat loki data directory. + ansible.builtin.stat: + path: "{{ monitoring_install_dir }}/loki/data" + become: true + register: monitoring_loki_stat_dir -- name: Deploy docker-compose.yml and config - ansible.builtin.template: - src: "docker-compose.yml" - dest: "{{ monitoring_install_location }}/docker-compose.yml" - mode: "0600" - owner: "root" - group: "root" - validate: docker compose -f %s config -q - become: true + - name: Stat prometheus data directory. + ansible.builtin.stat: + path: "{{ monitoring_install_dir }}/prometheus/data" + become: true + register: monitoring_prometheus_stat_dir -- name: Compose monitoring - community.docker.docker_compose_v2: + - name: Create monitoring directories. + ansible.builtin.file: + path: "{{ item.path }}" + state: directory + owner: "{{ item.owner }}" + group: "{{ item.group }}" + mode: "0700" + loop: + - path: "{{ monitoring_install_dir }}/grafana" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/grafana/data" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/grafana/config" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/grafana/config/provisioning" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/grafana/config/provisioning/alerting" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/grafana/config/provisioning/dashboards" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/grafana/config/provisioning/datasources" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/grafana/config/provisioning/notifiers" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/grafana/config/provisioning/plugins" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/loki" + owner: "{{ monitoring_loki_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_loki_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/loki/data" + owner: "{{ monitoring_loki_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_loki_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/loki/config" + owner: "{{ monitoring_loki_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_loki_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/prometheus" + owner: "{{ monitoring_prometheus_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_prometheus_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/prometheus/data" + owner: "{{ monitoring_prometheus_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_prometheus_stat_dir.stat.gid | default('root') }}" + - path: "{{ monitoring_install_dir }}/prometheus/config" + owner: "{{ monitoring_prometheus_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_prometheus_stat_dir.stat.gid | default('root') }}" + become: true + + - name: Deploy grafana config. + ansible.builtin.template: + src: grafana/grafana.ini.j2 + dest: "{{ monitoring_install_dir }}/grafana/config/grafana.ini" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + mode: "0600" + become: true + notify: "grafana config changed" + + - name: Deploy loki config. + ansible.builtin.template: + src: loki/loki-config.yml.j2 + dest: "{{ monitoring_install_dir }}/loki/config/loki-config.yaml" + owner: "{{ monitoring_loki_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_loki_stat_dir.stat.gid | default('root') }}" + mode: "0600" + become: true + notify: "loki config changed" + + - name: Deploy prometheus config. + ansible.builtin.template: + src: prometheus/prometheus.yml.j2 + dest: "{{ monitoring_install_dir }}/prometheus/config/prometheus.yml" + owner: "{{ monitoring_prometheus_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_prometheus_stat_dir.stat.gid | default('root') }}" + mode: "0600" + become: true + notify: "prometheus config changed" + + - name: Create grafana datasource provisioning files. + ansible.builtin.template: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + mode: "0600" + loop: + - src: loki/datasource.yml.j2 + dest: "{{ monitoring_install_dir }}/grafana/config/provisioning/datasources/loki.yml" + - src: prometheus/datasource.yml.j2 + dest: "{{ monitoring_install_dir }}/grafana/config/provisioning/datasources/prometheus.yml" + become: true + notify: "grafana datasources changed" + + - name: Copy grafana alerting provisioning files. + ansible.builtin.copy: + src: "{{ item }}" + dest: "{{ monitoring_install_dir }}/grafana/config/provisioning/alerting/" + owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}" + group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}" + mode: "0600" + loop: "{{ query('fileglob', inventory_hostname + '/monitoring/alerting/*.yml') }}" + become: true + notify: "grafana alert provisioning changed" + register: monitoring_grafana_managed_alerting_files + +- name: Remove unmanaged grafana alert provisioning files + when: monitoring_grafana_remove_unmanaged_alerting_files + notify: "grafana alert provisioning changed" + block: + - name: Find all remote grafana alert provisioning files. + ansible.builtin.find: + paths: "{{ monitoring_install_dir }}/grafana/config/provisioning/alerting/" + become: true + register: monitoring_grafana_found_alerting_files + + - name: Remove unmanaged grafana alert provisioning files. + ansible.builtin.file: + state: absent + path: "{{ item }}" + loop: "{{ monitoring_grafana_found_alerting_files.files | map(attribute='path') }}" + when: item not in monitoring_grafana_managed_alerting_files.results | map(attribute='dest') + become: true + +- name: Ensure container images are present on the host. + containers.podman.podman_image: + name: "{{ item.name }}" state: present - project_src: "{{ monitoring_install_location }}" - pull: always - remove_orphans: true - register: monitoring_compose + tag: "{{ item.tag }}" + loop: + - name: "{{ monitoring_grafana_containerimage }}" + tag: "{{ monitoring_grafana_image_tag }}" + - name: "{{ monitoring_image_renderer_containerimage }}" + tag: "{{ monitoring_image_renderer_image_tag }}" + - name: "{{ monitoring_loki_containerimage }}" + tag: "{{ monitoring_loki_image_tag }}" + - name: "{{ monitoring_prometheus_containerimage }}" + tag: "{{ monitoring_prometheus_image_tag }}" become: true -- name: Wait 30 seconds for influxdb to become healthy - ansible.builtin.wait_for: - timeout: 30 - delegate_to: localhost - # noqa no-handler - when: monitoring_compose is changed +- name: Add caddy config file. + block: + - name: Check caddy config dir. + ansible.builtin.stat: + path: "{{ caddy_install_dir }}/config" + become: true + register: caddy_stat_config_dir -- name: Include influxdb management - ansible.builtin.include_tasks: influxdb/main.yml + - name: Template caddy config for monitoring. + ansible.builtin.template: + src: "{{ item.src }}" + dest: "{{ item.dest }}" + mode: "0600" + setype: "container_file_t" + selevel: "{{ caddy_selinux_level }}" + owner: "{{ caddy_stat_config_dir.stat.uid | default('root') }}" + group: "{{ caddy_stat_config_dir.stat.gid | default('root') }}" + loop: + - src: grafana/grafana.caddy.j2 + dest: "{{ caddy_install_dir }}/config/grafana.caddy" + - src: loki/loki.caddy.j2 + dest: "{{ caddy_install_dir }}/config/loki.caddy" + become: true + notify: "caddy config changed" + + - name: Template prometheus caddy config. + ansible.builtin.template: + src: prometheus/prometheus.caddy.j2 + dest: "{{ caddy_install_dir }}/config/prometheus.caddy" + mode: "0600" + setype: "container_file_t" + selevel: "{{ caddy_selinux_level }}" + owner: "{{ caddy_stat_config_dir.stat.uid | default('root') }}" + group: "{{ caddy_stat_config_dir.stat.gid | default('root') }}" + become: true + notify: "caddy config changed" + when: monitoring_prometheus_write_receiver_enable + + - name: Remove unused prometheus caddy config. + ansible.builtin.file: + path: "{{ caddy_install_dir }}/config/prometheus.caddy" + state: absent + become: true + when: not monitoring_prometheus_write_receiver_enable + +- name: Create grafana container definition file. + ansible.builtin.template: + src: grafana/grafana.container.j2 + dest: "/etc/containers/systemd/grafana.container" + owner: "root" + group: "root" + mode: "0644" + become: true + notify: "grafana container definition changed" + +- name: Create image-renderer container definition file. + ansible.builtin.template: + src: grafana/image-renderer.container.j2 + dest: "/etc/containers/systemd/image-renderer.container" + owner: "root" + group: "root" + mode: "0644" + become: true + notify: "image-renderer container definition changed" + +- name: Create loki container definition file. + ansible.builtin.template: + src: loki/loki.container.j2 + dest: "/etc/containers/systemd/loki.container" + owner: "root" + group: "root" + mode: "0644" + become: true + notify: "loki container definition changed" + +- name: Create prometheus container definition file. + ansible.builtin.template: + src: prometheus/prometheus.container.j2 + dest: "/etc/containers/systemd/prometheus.container" + owner: "root" + group: "root" + mode: "0644" + become: true + notify: "prometheus container definition changed" + +- name: Flush handlers + ansible.builtin.meta: flush_handlers + +- name: Ensure monitoring services are started and enabled. + ansible.builtin.systemd: + state: started + enabled: true + name: "{{ item }}" + daemon_reload: true + loop: + - grafana.service + - image-renderer.service + - loki.service + - prometheus.service + become: true diff --git a/roles/monitoring/templates/docker-compose.yml b/roles/monitoring/templates/docker-compose.yml deleted file mode 100644 index 3027950..0000000 --- a/roles/monitoring/templates/docker-compose.yml +++ /dev/null @@ -1,143 +0,0 @@ -{{ ansible_managed | comment }} - -# Infrastructure -# Ansible instructions to deploy the infrastructure -# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -version: "2" -services: - grafana: - image: docker.io/grafana/grafana:{{ monitoring_grafana_image_version }} - mem_limit: 512mb - memswap_limit: 768mb - read_only: true - security_opt: - - no-new-privileges - tmpfs: - - "/tmp:size=64M" - environment: - - "GF_SERVER_ROOT_URL=https://{{ monitoring_grafana_domain }}" - - "GF_RENDERING_SERVER_URL=http://renderer:8081/render" - - "GF_RENDERING_CALLBACK_URL=http://grafana:3000/" - - "GF_INSTALL_PLUGINS={{ monitoring_grafana_plugins | join(',') }}" - - "GF_UNIFIED_ALERTING_ENABLED=true" - -{% if monitoring_grafana_oauth is defined and monitoring_grafana_oauth.enabled %} - - "GF_AUTH_DISABLE_LOGIN_FORM=true" - - "GF_AUTH_SIGNOUT_REDIRECT_URL={{ monitoring_grafana_oauth.signout_url }}" - - "GF_AUTH_GENERIC_OAUTH_ENABLED=true" - - "GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP={{ monitoring_grafana_oauth.allow_sign_up }}" - - "GF_AUTH_GENERIC_OAUTH_AUTO_LOGIN=true" - - "GF_AUTH_GENERIC_OAUTH_USE_PKCE=true" - - "GF_AUTH_GENERIC_OAUTH_EMPTY_SCOPES=false" - - "GF_AUTH_GENERIC_OAUTH_SKIP_ORG_ROLE_SYNC=true" - - "GF_AUTH_GENERIC_OAUTH_NAME={{ monitoring_grafana_oauth.name }}" - - "GF_AUTH_GENERIC_OAUTH_CLIENT_ID={{ monitoring_grafana_oauth.client_id }}" - - "GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET={{ monitoring_grafana_oauth.client_secret }}" - - "GF_AUTH_GENERIC_OAUTH_SCOPES=openid email profile roles offline_access" - - "GF_AUTH_GENERIC_OAUTH_AUTH_URL={{ monitoring_grafana_oauth.auth_url }}" - - "GF_AUTH_GENERIC_OAUTH_TOKEN_URL={{ monitoring_grafana_oauth.token_url }}" - - "GF_AUTH_GENERIC_OAUTH_API_URL={{ monitoring_grafana_oauth.api_url }}" - - "GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH=contains(roles[*], 'Admin') && 'Admin' || contains(roles[*], 'Editor') && 'Editor' || 'Viewer'" -{% endif %} - -{% if monitoring_grafana_feature_toggles is defined and monitoring_grafana_feature_toggles is iterable and monitoring_grafana_feature_toggles | length > 0 %} - - "GF_FEATURE_TOGGLES_ENABLE={% for item in monitoring_grafana_feature_toggles %}{{item}} {% endfor %}" -{% endif %} - - restart: always - labels: - - "traefik.enable=true" - - "traefik.http.routers.grafana.rule=Host(`{{ monitoring_grafana_domain }}`) && PathPrefix(`/`)" - - "traefik.http.routers.grafana.entrypoints=websecure" - - "traefik.http.routers.grafana.tls=true" - - "traefik.http.routers.grafana.tls.certresolver={{ monitoring_traefik_certresolver }}" - - "traefik.http.routers.grafana.middlewares=grafana,compress" - - "traefik.http.middlewares.grafana.headers.sslredirect=true" - - "traefik.http.middlewares.grafana.headers.stsSeconds=63072000" - - "traefik.http.middlewares.grafana.headers.referrerPolicy=no-referrer" - - "traefik.http.middlewares.grafana.headers.contentTypeNosniff=true" - - "traefik.http.middlewares.grafana.headers.browserXssFilter=true" - -{% if proxy_network is defined %} - - "traefik.docker.network={{ proxy_network }}" -{% endif %} - networks: - grafana: -{% if proxy_network is defined %} - {{ proxy_network }}: -{% endif %} - volumes: - - "{{ monitoring_grafana_location }}:/var/lib/grafana" - -{% if monitoring_grafana_renderer_enabled %} - renderer: - image: docker.io/grafana/grafana-image-renderer:latest - mem_limit: 512mb - memswap_limit: 768mb - depends_on: - - grafana - restart: always - security_opt: - - no-new-privileges - networks: - grafana: -{% endif %} - - influxdb: - image: docker.io/library/influxdb:{{ monitoring_influxdb_image_version }} - mem_limit: 1536mb - memswap_limit: 2048mb - read_only: true - security_opt: - - no-new-privileges - tmpfs: - - "/tmp:size=64M" - restart: always - labels: - - "traefik.enable=true" - - "traefik.http.routers.influxdb.rule=Host(`{{ monitoring_influxdb_domain }}`) && PathPrefix(`/`)" - - "traefik.http.routers.influxdb.entrypoints=websecure" - - "traefik.http.routers.influxdb.tls=true" - - "traefik.http.routers.influxdb.tls.certresolver={{ monitoring_traefik_certresolver }}" - - "traefik.http.routers.influxdb.middlewares=influxdb,compress" - - "traefik.http.middlewares.influxdb.headers.sslredirect=true" - - "traefik.http.middlewares.influxdb.headers.stsSeconds=63072000" - - "traefik.http.middlewares.influxdb.headers.referrerPolicy=no-referrer" - - "traefik.http.middlewares.influxdb.headers.contentTypeNosniff=true" - -{% if proxy_network is defined %} - - "traefik.docker.network={{ proxy_network }}" -{% endif %} - - networks: -{% if proxy_network is defined %} - {{ proxy_network }}: -{% endif %} - - volumes: - - "{{ monitoring_influxdb_location }}:/var/lib/influxdb" - environment: - - INFLUXDB_HTTP_AUTH_ENABLED=true - - INFLUXDB_HTTP_PPROF_ENABLED=true - - INFLUXDB_HTTP_PPROF_AUTH_ENABLED=true - - - INFLUXDB_REPORTING_DISABLED=true -networks: - grafana: -{% if proxy_network is defined %} - {{ proxy_network }}: - external: true -{% endif %} diff --git a/roles/monitoring/templates/grafana/grafana.caddy.j2 b/roles/monitoring/templates/grafana/grafana.caddy.j2 new file mode 100644 index 0000000..1e5d1d2 --- /dev/null +++ b/roles/monitoring/templates/grafana/grafana.caddy.j2 @@ -0,0 +1,24 @@ +{{ ansible_managed | comment }} + +{{ monitoring_grafana_domain }} { + encode gzip + + header { + # enable HSTS + Strict-Transport-Security "max-age=31536000; preload;" + + # disable clients from sniffing the media type + X-Content-Type-Options nosniff + + # clickjacking protection + X-Frame-Options DENY + + # keep referrer data off of HTTP connections + Referrer-Policy no-referrer-when-downgrade + + # Server name removing + -Server + } + + reverse_proxy grafana:3000 +} diff --git a/roles/monitoring/templates/grafana/grafana.container.j2 b/roles/monitoring/templates/grafana/grafana.container.j2 new file mode 100644 index 0000000..06b281b --- /dev/null +++ b/roles/monitoring/templates/grafana/grafana.container.j2 @@ -0,0 +1,40 @@ +{{ ansible_managed | comment }} + +[Unit] +Description = Grafana Server + +[Service] +Restart = always +RestartSec = 5s + +[Container] +Image={{ monitoring_grafana_containerimage }}:{{ monitoring_grafana_image_tag }} +ContainerName = grafana + +AutoUpdate = registry +LogDriver = journald + +NoNewPrivileges = true +ReadOnly = true +DropCapability = all +UserNS = auto:size=65535 +{% if monitoring_grafana_selinux_level != omit %} +SecurityLabelLevel = {{ monitoring_grafana_selinux_level }} +{% endif %} + +Network = caddy.network + +ExposeHostPort = 3000 + +Volume = {{ monitoring_install_dir }}/grafana/data:/var/lib/grafana:U +Volume = {{ monitoring_install_dir }}/grafana/config/grafana.ini:/etc/grafana/grafana.ini:ro,U +Volume = {{ monitoring_install_dir }}/grafana/config/provisioning:/etc/grafana/provisioning:ro,U + +Tmpfs = /tmp:rw,noexec,nosuid,nodev,size=64m + +PodmanArgs = --memory={{ monitoring_grafana_memory_high }} +PodmanArgs = --memory-swap={{ monitoring_grafana_swap_max }} +PodmanArgs = --memory-reservation={{ monitoring_grafana_memory_low }} + +[Install] +WantedBy = default.target diff --git a/roles/monitoring/templates/grafana/grafana.ini.j2 b/roles/monitoring/templates/grafana/grafana.ini.j2 new file mode 100644 index 0000000..acb6bff --- /dev/null +++ b/roles/monitoring/templates/grafana/grafana.ini.j2 @@ -0,0 +1,103 @@ +{{ ansible_managed | comment }} + +instance_name = ${HOSTNAME} + +[paths] +provisioning = /etc/grafana/provisioning + +[server] +domain = "{{ monitoring_grafana_domain }}" +root_url = "https://{{ monitoring_grafana_domain }}" +enable_gzip = true + +[dataproxy] +logging = false +timeout = 60 + +[analytics] +enabled = false +reporting_enabled = false +check_for_updates = false +check_for_plugin_updates = false +feedback_links_enabled = false + +[security] +admin_user = admin +disable_gravatar = true +cookie_secure = true +cookie_samesite = lax +allow_embedding = false +content_security_policy_report_only = true +content_security_policy_report_only_template = "require-trusted-types-for 'script';" +angular_support_enabled = false + +[dashboard] +min_refresh_interval = 10s + +[users] +allow_sign_up = false + +[auth] +disable_login_form = true +signout_redirect_url = "{{ monitoring_grafana_oauth.signout_url }}" + +[auth.generic_oauth] +name = "{{ monitoring_grafana_oauth.name }}" +enabled = true +allow_sign_up = {{ monitoring_grafana_oauth.allow_sign_up | bool | lower }} +auto_login = false +client_id = "{{ monitoring_grafana_oauth.client_id }}" +client_secret = "{{ monitoring_grafana_oauth.client_secret }}" +scopes = "openid email profile offline_access" +empty_scopes = false +auth_url = "{{ monitoring_grafana_oauth.auth_url }}" +token_url = "{{ monitoring_grafana_oauth.token_url }}" +api_url = "{{ monitoring_grafana_oauth.api_url }}" +allow_assign_grafana_admin = {{ monitoring_grafana_oauth.allow_assign_grafana_admin | bool | lower }} +role_attribute_path = "contains(roles[*], 'GrafanaAdmin') && 'GrafanaAdmin' || contains(roles[*], 'Admin') && 'Admin' || contains(roles[*], 'Editor') && 'Editor' || 'Viewer'" +use_pkce = true + +[log] +mode = "console" +level = "{{ monitoring_grafana_loglevel }}" +;filters = "rendering:debug" + +[log.console] +format = "json" + +[log.frontend] +provider = "grafana" + +[unified_alerting] +enabled = true + +[unified_alerting.state_history] +enabled = true +backend = "loki" +loki_remote_url = "http://loki:3100" + +[unified_alerting.reserved_labels] +disabled_labels = grafana_folder + +[unified_alerting.screenshots] +capture = true + +[rendering] +server_url = http://image-renderer:8081/render +callback_url = http://grafana:3000/ + +[alerting] +enabled = false + +[news] +news_feed_enabled = false + +[feature_toggles] +alertStateHistoryLokiOnly = true +alertStateHistoryLokiPrimary = true +alertStateHistoryLokiSecondary = true +{% if monitoring_grafana_feature_toggles is mapping %} +{% for key, value in monitoring_grafana_feature_toggles.items() %} +{{ key }} = {{ value | lower }} +{% endfor %} +{% endif %} diff --git a/roles/monitoring/templates/grafana/image-renderer.container.j2 b/roles/monitoring/templates/grafana/image-renderer.container.j2 new file mode 100644 index 0000000..f19020d --- /dev/null +++ b/roles/monitoring/templates/grafana/image-renderer.container.j2 @@ -0,0 +1,36 @@ +{{ ansible_managed | comment }} + +[Unit] +Description = Grafana Image Renderer service + +[Service] +Restart = always +RestartSec = 5s + +[Container] +Image={{ monitoring_image_renderer_containerimage }}:{{ monitoring_image_renderer_image_tag }} +ContainerName = image-renderer + +Exec = "" + +AutoUpdate = registry +LogDriver = journald + +NoNewPrivileges = true +ReadOnly = true +DropCapability = all +UserNS = auto:size=65535 +{% if monitoring_grafana_selinux_level != omit %} +SecurityLabelLevel = {{ monitoring_grafana_selinux_level }} +{% endif %} + +Network = caddy.network + +ExposeHostPort = 8081 + +PodmanArgs = --memory={{ monitoring_image_renderer_memory_high }} +PodmanArgs = --memory-swap={{ monitoring_image_renderer_swap_max }} +PodmanArgs = --memory-reservation={{ monitoring_image_renderer_memory_low }} + +[Install] +WantedBy = default.target diff --git a/roles/monitoring/templates/loki/datasource.yml.j2 b/roles/monitoring/templates/loki/datasource.yml.j2 new file mode 100644 index 0000000..62e7fd0 --- /dev/null +++ b/roles/monitoring/templates/loki/datasource.yml.j2 @@ -0,0 +1,15 @@ +{{ ansible_managed | comment }} + +apiVersion: 1 + +datasources: + - name: Loki + type: loki + access: proxy + url: 'http://loki:3100' + isDefault: false + editable: false + jsonData: + timeout: 60 + manageAlerts: true + maxLines: 1000 diff --git a/roles/monitoring/templates/loki/loki-config.yml.j2 b/roles/monitoring/templates/loki/loki-config.yml.j2 new file mode 100644 index 0000000..c5c8cd5 --- /dev/null +++ b/roles/monitoring/templates/loki/loki-config.yml.j2 @@ -0,0 +1,102 @@ +{{ ansible_managed | comment }} + +target: "all" + +auth_enabled: false + +server: + http_listen_port: 3100 + log_level: "{{ monitoring_loki_loglevel }}" + log_format: json + +querier: + max_concurrent: 16 + +frontend: + address: 127.0.0.1 + compress_responses: true + max_outstanding_per_tenant: 30000 + +query_scheduler: + max_outstanding_requests_per_tenant: 30000 + +query_range: + align_queries_with_step: true + parallelise_shardable_queries: true + cache_results: true + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 512 + ttl: 24h + +ruler: + storage: + type: local + local: + directory: /data/loki/ruler + rule_path: /data/loki/rules + alertmanager_url: http://localhost:9093 + enable_alertmanager_v2: true + enable_api: true + remote_write: + enabled: true + client: + url: http://prometheus:9090/api/v1/write + wal: + dir: /data/loki/ruler-wal + +ingester: + wal: + enabled: true + dir: /data/loki/wal + replay_memory_ceiling: 1GB + +storage_config: + tsdb_shipper: + active_index_directory: /data/loki/tsdb-index + cache_location: /data/loki/tsdb-cache + shared_store: filesystem + cache_ttl: 24h + filesystem: + directory: /data/loki/chunks + index_queries_cache_config: + embedded_cache: + enabled: true + +chunk_store_config: + chunk_cache_config: + enable_fifocache: false + embedded_cache: + enabled: true + write_dedupe_cache_config: + embedded_cache: + enabled: true + +schema_config: + {{ monitoring_loki_schema_config | to_nice_yaml(indent=2) | indent(2) }} + +compactor: + working_directory: /data/loki/compactor + shared_store: filesystem + retention_enabled: true + +limits_config: + enforce_metric_name: false + retention_period: {{ monitoring_loki_retention_period }} + +tracing: + enabled: false + +common: + instance_addr: 127.0.0.1 + instance_interface_names: + - lo + ring: + kvstore: + store: inmemory + replication_factor: 1 + +analytics: + reporting_enabled: false diff --git a/roles/monitoring/templates/loki/loki.caddy.j2 b/roles/monitoring/templates/loki/loki.caddy.j2 new file mode 100644 index 0000000..ccb4308 --- /dev/null +++ b/roles/monitoring/templates/loki/loki.caddy.j2 @@ -0,0 +1,42 @@ +{{ ansible_managed | comment }} + +{{ monitoring_loki_domain }} { + encode gzip + + header { + # enable HSTS + Strict-Transport-Security "max-age=31536000; preload;" + + # disable clients from sniffing the media type + X-Content-Type-Options nosniff + + # clickjacking protection + X-Frame-Options DENY + + # keep referrer data off of HTTP connections + Referrer-Policy no-referrer-when-downgrade + + # Server name removing + -Server + } + + @validLoki { + method POST + path /loki/api/v1/push + } + + handle @validLoki { + reverse_proxy loki:3100 +{% if monitoring_loki_basic_auth is defined and monitoring_loki_basic_auth is iterable %} + basicauth { +{% for item in monitoring_loki_basic_auth %} + {{ item.username }} {{ item.hashed_password }} +{% endfor%} + } +{% endif %} + } + + handle { + respond "Not Found" 404 + } +} diff --git a/roles/monitoring/templates/loki/loki.container.j2 b/roles/monitoring/templates/loki/loki.container.j2 new file mode 100644 index 0000000..8832e71 --- /dev/null +++ b/roles/monitoring/templates/loki/loki.container.j2 @@ -0,0 +1,40 @@ +{{ ansible_managed | comment }} + +[Unit] +Description = Grafana Loki logging server + +[Service] +Restart = always +RestartSec = 5s + +[Container] +Image = {{ monitoring_loki_containerimage }}:{{ monitoring_loki_image_tag }} +ContainerName = loki + +Exec = -config.file=/etc/loki/local-config.yaml \ + -log-config-reverse-order + +AutoUpdate = registry +LogDriver = journald + +NoNewPrivileges = true +ReadOnly = true +DropCapability = all +UserNS = auto:size=65535 +{% if monitoring_loki_selinux_level != omit %} +SecurityLabelLevel = {{ monitoring_loki_selinux_level }} +{% endif %} + +Network = caddy.network + +ExposeHostPort = 3100 + +Volume = {{ monitoring_install_dir }}/loki/data:/data/loki:U +Volume = {{ monitoring_install_dir }}/loki/config/loki-config.yaml:/etc/loki/local-config.yaml:ro,U + +PodmanArgs = --memory={{ monitoring_loki_memory_high }} +PodmanArgs = --memory-swap={{ monitoring_loki_swap_max }} +PodmanArgs = --memory-reservation={{ monitoring_loki_memory_low }} + +[Install] +WantedBy = default.target diff --git a/roles/monitoring/templates/prometheus/datasource.yml.j2 b/roles/monitoring/templates/prometheus/datasource.yml.j2 new file mode 100644 index 0000000..9ac6db9 --- /dev/null +++ b/roles/monitoring/templates/prometheus/datasource.yml.j2 @@ -0,0 +1,21 @@ +{{ ansible_managed | comment }} + +apiVersion: 1 + +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: 'http://prometheus:9090' + isDefault: true + editable: false + jsonData: + timeout: 60 + manageAlerts: true + httpMethod: POST + prometheusType: Prometheus + prometheusVersion: 2.50.0 + cacheLevel: 'Low' + disableRecordingRules: false + incrementalQuerying: true + incrementalQueryOverlapWindow: 10m diff --git a/roles/monitoring/templates/prometheus/prometheus.caddy.j2 b/roles/monitoring/templates/prometheus/prometheus.caddy.j2 new file mode 100644 index 0000000..8cec4eb --- /dev/null +++ b/roles/monitoring/templates/prometheus/prometheus.caddy.j2 @@ -0,0 +1,42 @@ +{{ ansible_managed | comment }} + +{{ monitoring_prometheus_domain }} { + encode gzip + + header { + # enable HSTS + Strict-Transport-Security "max-age=31536000; preload;" + + # disable clients from sniffing the media type + X-Content-Type-Options nosniff + + # clickjacking protection + X-Frame-Options DENY + + # keep referrer data off of HTTP connections + Referrer-Policy no-referrer-when-downgrade + + # Server name removing + -Server + } + + @validPrometheus { + method POST + path /api/v1/write + } + + handle @validPrometheus { + reverse_proxy prometheus:9090 +{% if monitoring_prometheus_basic_auth is defined and monitoring_prometheus_basic_auth is iterable %} + basicauth { +{% for item in monitoring_prometheus_basic_auth %} + {{ item.username }} {{ item.hashed_password }} +{% endfor%} + } +{% endif %} + } + + handle { + respond "Not Found" 404 + } +} diff --git a/roles/monitoring/templates/prometheus/prometheus.container.j2 b/roles/monitoring/templates/prometheus/prometheus.container.j2 new file mode 100644 index 0000000..4e6cc89 --- /dev/null +++ b/roles/monitoring/templates/prometheus/prometheus.container.j2 @@ -0,0 +1,47 @@ +{{ ansible_managed | comment }} + +[Unit] +Description = Prometheus timeseries database + +[Service] +Restart = always +RestartSec = 5s + +[Container] +Image = {{ monitoring_prometheus_containerimage }}:{{ monitoring_prometheus_image_tag }} +ContainerName = prometheus + +Exec = --config.file=/etc/prometheus/prometheus.yml \ + --storage.tsdb.path=/prometheus \ + --storage.tsdb.retention.time={{ monitoring_prometheus_retention_time }} \ + --web.console.libraries=/usr/share/prometheus/console_libraries \ + --web.console.templates=/usr/share/prometheus/consoles \ +{% if monitoring_prometheus_write_receiver_enable %} + --web.enable-remote-write-receiver \ +{% endif %} + --log.level={{ monitoring_prometheus_loglevel }} + +AutoUpdate = registry +LogDriver = journald + +NoNewPrivileges = true +ReadOnly = true +DropCapability = all +UserNS = auto:size=65535 +{% if monitoring_prometheus_selinux_level != omit %} +SecurityLabelLevel = {{ monitoring_prometheus_selinux_level }} +{% endif %} + +Network = caddy.network + +ExposeHostPort = 9090 + +Volume = {{ monitoring_install_dir }}/prometheus/data:/prometheus:U +Volume = {{ monitoring_install_dir }}/prometheus/config:/etc/prometheus:ro,U + +PodmanArgs = --memory={{ monitoring_prometheus_memory_high }} +PodmanArgs = --memory-swap={{ monitoring_prometheus_swap_max }} +PodmanArgs = --memory-reservation={{ monitoring_prometheus_memory_low }} + +[Install] +WantedBy = default.target diff --git a/roles/monitoring/templates/prometheus/prometheus.yml.j2 b/roles/monitoring/templates/prometheus/prometheus.yml.j2 new file mode 100644 index 0000000..a9f201f --- /dev/null +++ b/roles/monitoring/templates/prometheus/prometheus.yml.j2 @@ -0,0 +1,9 @@ +{{ ansible_managed | comment }} + +global: + scrape_interval: 60s + scrape_timeout: 10s + evaluation_interval: 60s + +scrape_configs: + {{ monitoring_prometheus_scrape_configs | to_nice_yaml(indent=2) | indent(2) }}