diff --git a/playbooks/monitoring.yml b/playbooks/monitoring.yml
index d590ddd..8a8b7cb 100644
--- a/playbooks/monitoring.yml
+++ b/playbooks/monitoring.yml
@@ -1,33 +1,17 @@
----
-# Infrastructure
-# Ansible instructions to deploy the infrastructure
-# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern
-# Copyright (C) 2020 Saibotk
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
+- name: Install Monitoring Suite with Grafana, Loki and Prometheus.
-- name: Install & configure monitoring servers
hosts: monitoring
- roles:
- - docker
- - docker_cleanup
- - traefik
- - monitoring
-- name: Install & configure monitoring clients
- hosts: all
- serial: 1
roles:
- - docker
- - docker_cleanup
- - telegraf
+ - role: podman
+ become: true
+ tags:
+ - always
+ - podman
+ - role: caddy
+ become: true
+ tags:
+ - always
+ - caddy
+ - role: monitoring
+ become: true
diff --git a/roles/monitoring/README.md b/roles/monitoring/README.md
deleted file mode 100644
index ba38666..0000000
--- a/roles/monitoring/README.md
+++ /dev/null
@@ -1,37 +0,0 @@
-Monitoring
-=========
-
-This will setup an [InfluxDB](https://www.influxdata.com/products/influxdb-overview/) and a [Grafana](https://grafana.com) instance using their official docker container and traefik as a reverse proxy. This also will install a grafana-renderer, which is used to render graphs to be sent via alerts etc.
-
-Requirements
-------------
-
-> NOTE: You need the `python-influxdb` package installed locally with the Ansible client, to run this role!
-
-You will need to have docker, docker-compose and traefik installed or declared as dependencies with their respective roles.
-
-**This role assumes that you have setup traefik with an endpoint called `websecure`.**
-
-Role Variables
---------------
-
-**Please look at the [defaults/main.yml](defaults/main.yml) for all available variables and their description.**
-
-**Note: Lines that are commented out via `#` are usually still valid/used variables, but they are not defined by default, so they might enable a feature, when uncommenting/defining them!**
-
-### Global variables, that are used:
-
-- `proxy_network`: Defined by the local traefik installation, this is the shared proxy network used by traefik to reach the containers. (optional)
-- `proxy_hiddenservice`: Defined by the local traefik installation, this is used to generate the alt-svc header for the alternative Tor domain. (optional)
-
-Dependencies
-------------
-
-- docker
-- docker-compose
-- traefik
-
-License
--------
-
-GPL-3.0-only
diff --git a/roles/monitoring/defaults/main.yml b/roles/monitoring/defaults/main.yml
index c2a5fdd..4dfa2fa 100644
--- a/roles/monitoring/defaults/main.yml
+++ b/roles/monitoring/defaults/main.yml
@@ -1,54 +1,60 @@
----
-# Default variables for the monitoring role
+monitoring_install_dir: "/srv/monitoring"
-# Infrastructure
-# Ansible instructions to deploy the infrastructure
-# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern
-# Copyright (C) 2020 Saibotk
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
+monitoring_grafana_selinux_level: "{{ omit }}"
+monitoring_loki_selinux_level: "{{ omit }}"
+monitoring_prometheus_selinux_level: "{{ omit }}"
-# Install locations
-monitoring_install_location: "/srv/monitoring"
-monitoring_grafana_location: "{{ monitoring_install_location }}/grafana"
-monitoring_influxdb_location: "{{ monitoring_install_location }}/influxdb"
-
-# The certresolver for traefik to use on this domain
-monitoring_traefik_certresolver: letsencrypt_http
-
-# The domain that traefik should serve grafana on
-monitoring_grafana_domain: "grafana.example.com"
-
-# The domain traefik should serve influxdb on
-monitoring_influxdb_domain: influxdb.example.com
-
-# The grafana version
+monitoring_grafana_containerimage: "docker.io/grafana/grafana"
# renovate: depName=docker.io/grafana/grafana
-monitoring_grafana_version: 11.1.3
+monitoring_grafana_image_tag: "10.4.1"
-# The influxdb version
-# renovate: depName=docker.io/library/influxdb
-monitoring_influxdb_version: 1.8.10
+monitoring_loki_containerimage: "docker.io/grafana/loki"
+# renovate: depName=docker.io/grafana/loki
+monitoring_loki_image_tag: "2.9.6"
-# The influxdb/grafana image tag
-monitoring_influxdb_image_version: "{{ monitoring_influxdb_version }}"
-monitoring_grafana_image_version: "{{ monitoring_grafana_version }}"
+monitoring_prometheus_containerimage: quay.io/prometheus/prometheus
+# renovate: depName=quay.io/prometheus/prometheus
+monitoring_prometheus_image_tag: "v2.51.1"
-# Additional plugins, that should be installed (see https://grafana.com/docs/grafana/latest/installation/docker/)
-monitoring_grafana_plugins: []
-monitoring_grafana_renderer_enabled: true
+monitoring_image_renderer_containerimage: docker.io/grafana/grafana-image-renderer
+# renovate: depName=docker.io/grafana/grafana-image-renderer
+monitoring_image_renderer_image_tag: "3.10.1"
-# OAuth settings for grafana
+# These settings allow resource management of the container workload.
+# While memory_high and memory_low are quite straightforward, swap_max is sadly not.
+# Memory_high and memory_low allow to 'directly' set the corresponding cgroupv2 setting,
+# while swap_max sets swap.max to `memory_high - swap_max`.
+# Special values: memory_high = 0 and swap_max = -1 set no limit and unlimited swap respectively.
+# A unit can be appended to all these.
+# A unit can be b (bytes), k (kibibytes), m (mebibytes), or g (gibibytes).
+monitoring_grafana_memory_low: 128m
+monitoring_grafana_memory_high: 0
+monitoring_grafana_swap_max: -1
+
+monitoring_loki_memory_low: 256m
+monitoring_loki_memory_high: 0
+monitoring_loki_swap_max: -1
+
+monitoring_prometheus_memory_low: 256m
+monitoring_prometheus_memory_high: 0
+monitoring_prometheus_swap_max: -1
+
+monitoring_image_renderer_memory_low: 256m
+monitoring_image_renderer_memory_high: 0
+monitoring_image_renderer_swap_max: -1
+
+monitoring_grafana_domain: "grafana.example.com"
+monitoring_grafana_loglevel: "info"
+
+monitoring_loki_domain: "loki.example.com"
+# Supported values [debug, info, warn, error]
+monitoring_loki_loglevel: "info"
+
+monitoring_prometheus_domain: "prometheus.example.com"
+# Supported values [debug, info, warn, error]
+monitoring_prometheus_loglevel: "info"
+
+### Grafana specific options ###
monitoring_grafana_oauth:
enabled: false
name: "OAuth"
@@ -59,30 +65,66 @@ monitoring_grafana_oauth:
api_url: "https://auth.example.com/auth/realms/sso/protocol/openid-connect/userinfo"
client_id: "grafana"
client_secret: "something-secret123"
+ allow_assign_grafana_admin: false
-# Additional feature toggles to enable (See https://grafana.com/docs/grafana/latest/administration/configuration/#feature_toggles)
monitoring_grafana_feature_toggles: []
-# The influxdb admin credentials that should be created
-monitoring_influxdb_admin_username: "admin"
-monitoring_influxdb_admin_password: "{{ lookup('passwordstore', monitoring_influxdb_domain + '/db-admin create=true length=42') }}"
+# When true this will remove all alerting provisioning files not managed by this ansible role.
+# This won't make any backups so be warned.
+monitoring_grafana_remove_unmanaged_alerting_files: false
-# The influxdb databases that should be created and their policies
-monitoring_influxdb_databases:
- - name: "telegraf"
- policies:
- - name: autogen
- duration: 30d
- replication: 1
- default: "true"
- - name: logs
- duration: 14d
- replication: 1
+monitoring_loki_schema_config:
+ configs:
+ - from: "2023-11-30"
+ store: tsdb
+ object_store: filesystem
+ schema: v12
+ index:
+ prefix: index_
+ period: 24h
+ chunks:
+ prefix: chunks_
+ period: 24h
-# Other influxdb users, that should be created
-monitoring_influxdb_users:
- - username: "telegraf"
- password: "{{ lookup('passwordstore', monitoring_influxdb_domain + '/db create=true length=42') }}"
- grants:
- - database: "telegraf"
- privilege: "ALL"
+monitoring_loki_retention_period: 15d
+
+# `hashed_password` has to be hashed using md5, sha1 or BCrypt
+# e.g. using `mkpasswd --method=bcrypt --stdin`
+# e.g. using `htpasswd -Bin `
+# Ref.: https://caddyserver.com/docs/caddyfile/directives/basicauth
+monitoring_loki_basic_auth: []
+# - username: "{{ }}"
+# hashed_password: "{{ }}"
+
+### Prometheus specific options ###
+
+# Prometheus native TLS and basic auth is experimental. So we are using caddy (for now).
+# `hashed_password` has to be hashed using md5, sha1 or BCrypt
+# e.g. using `mkpasswd --method=bcrypt --stdin`
+# e.g. using `htpasswd -Bin `
+# Ref.: https://caddyserver.com/docs/caddyfile/directives/basicauth
+monitoring_prometheus_basic_auth: []
+# - username: "{{ }}"
+# hashed_password: "{{ }}"
+
+monitoring_prometheus_retention_time: 15d
+monitoring_prometheus_write_receiver_enable: false
+
+monitoring_prometheus_scrape_configs:
+ - job_name: prometheus
+ static_configs:
+ - targets: ["prometheus:9090"]
+
+ - job_name: grafana
+ static_configs:
+ - targets: ["grafana:3000"]
+
+ - job_name: loki
+ static_configs:
+ - targets: ["loki:3100"]
+
+ - job_name: file_configs
+ file_sd_configs:
+ - files:
+ - /etc/prometheus/file_configs/*.yml
+ - /etc/prometheus/file_configs/*.json
diff --git a/roles/monitoring/handlers/main.yml b/roles/monitoring/handlers/main.yml
new file mode 100644
index 0000000..9309f31
--- /dev/null
+++ b/roles/monitoring/handlers/main.yml
@@ -0,0 +1,47 @@
+- name: Apply new SELinux file context to filesystem.
+ ansible.builtin.command: "restorecon -irF {{ monitoring_install_dir }}"
+ become: true
+ changed_when: true
+ listen: "monitoring selinux context changed"
+
+- name: Restart grafana
+ ansible.builtin.systemd:
+ state: restarted
+ daemon_reload: true
+ name: grafana.service
+ listen:
+ - "grafana config changed"
+ - "grafana container definition changed"
+ - "grafana datasources changed"
+ - "grafana alert provisioning changed"
+ - "monitoring selinux context changed"
+
+- name: Restart grafana image renderer
+ ansible.builtin.systemd:
+ state: restarted
+ daemon_reload: true
+ name: image-renderer.service
+ listen:
+ - "image-renderer config changed"
+ - "image-renderer container definition changed"
+ - "monitoring selinux context changed"
+
+- name: Restart loki
+ ansible.builtin.systemd:
+ state: restarted
+ daemon_reload: true
+ name: loki.service
+ listen:
+ - "loki config changed"
+ - "loki container definition changed"
+ - "monitoring selinux context changed"
+
+- name: Restart prometheus
+ ansible.builtin.systemd:
+ state: restarted
+ daemon_reload: true
+ name: prometheus.service
+ listen:
+ - "prometheus config changed"
+ - "prometheus container definition changed"
+ - "monitoring selinux context changed"
diff --git a/roles/monitoring/meta/main.yml b/roles/monitoring/meta/main.yml
index 8d8de48..2dfe7e9 100644
--- a/roles/monitoring/meta/main.yml
+++ b/roles/monitoring/meta/main.yml
@@ -1,44 +1,23 @@
galaxy_info:
- author: saibotk
- description: "Deploys an influxdb and grafana via docker and traefik."
+ author: histalek
+ description: >
+ Deploy monitoring stack with podman and systemd.
+ The monitoring stack consists of Grafana, Prometheus, Loki and the grafana image renderer.
+
+ issue_tracker_url: https://git.histalek.de/histalek-de/infrastructure/-/issues
+
license: GPL-3.0-only
- min_ansible_version: "2.9"
- standalone: true
+
+ min_ansible_version: "2.10"
platforms:
- - name: EL
- versions:
- - all
- - name: GenericUNIX
- versions:
- - all
- name: Fedora
versions:
- - all
- - name: opensuse
- versions:
- - all
- - name: GenericBSD
- versions:
- - all
- - name: FreeBSD
- versions:
- - all
- - name: Ubuntu
- versions:
- - all
- - name: SLES
- versions:
- - all
- - name: GenericLinux
- versions:
- - all
- - name: Debian
- versions:
- - all
+ - "38"
+ - "39"
+
+ standalone: true
galaxy_tags: []
-dependencies:
- - role: docker
- - role: traefik
+dependencies: []
diff --git a/roles/monitoring/tasks/influxdb/database.yml b/roles/monitoring/tasks/influxdb/database.yml
deleted file mode 100644
index 6e998bf..0000000
--- a/roles/monitoring/tasks/influxdb/database.yml
+++ /dev/null
@@ -1,49 +0,0 @@
----
-# Task file influxdb/database.yml for the monitoring role
-
-# Infrastructure
-# Ansible instructions to deploy the infrastructure
-# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-- name: Create database
- community.general.influxdb_database:
- state: present
- login_username: "{{ monitoring_influxdb_admin_username }}"
- login_password: "{{ monitoring_influxdb_admin_password }}"
- hostname: "{{ monitoring_influxdb_domain }}"
- port: 443
- ssl: true
- validate_certs: true
- database_name: "{{ database.name }}"
- delegate_to: 127.0.0.1
- no_log: true
-
-- name: Create retention policies
- community.general.influxdb_retention_policy:
- login_username: "{{ monitoring_influxdb_admin_username }}"
- login_password: "{{ monitoring_influxdb_admin_password }}"
- hostname: "{{ monitoring_influxdb_domain }}"
- port: 443
- ssl: true
- validate_certs: true
- database_name: "{{ database.name }}"
- policy_name: "{{ policy.name }}"
- duration: "{{ policy.duration }}"
- replication: "{{ policy.replication }}"
- default: "{{ policy.default | default(omit) }}"
- loop: "{{ database.policies }}"
- loop_control:
- loop_var: "policy"
- delegate_to: 127.0.0.1
diff --git a/roles/monitoring/tasks/influxdb/main.yml b/roles/monitoring/tasks/influxdb/main.yml
deleted file mode 100644
index ea1a88a..0000000
--- a/roles/monitoring/tasks/influxdb/main.yml
+++ /dev/null
@@ -1,55 +0,0 @@
----
-# Task file influxdb/main.yml for the monitoring role
-
-# Infrastructure
-# Ansible instructions to deploy the infrastructure
-# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-- name: Create admin user
- community.general.influxdb_user:
- state: present
- login_username: "{{ monitoring_influxdb_admin_username }}"
- login_password: "{{ monitoring_influxdb_admin_password }}"
- hostname: "{{ monitoring_influxdb_domain }}"
- port: 443
- ssl: true
- validate_certs: true
- user_name: "{{ monitoring_influxdb_admin_username }}"
- user_password: "{{ monitoring_influxdb_admin_password }}"
- admin: true
- delegate_to: 127.0.0.1
- no_log: true
-
-- name: Configure databases
- ansible.builtin.include_tasks: database.yml
- loop: "{{ monitoring_influxdb_databases }}"
- loop_control:
- loop_var: "database"
-
-- name: Create database user
- community.general.influxdb_user:
- state: present
- login_username: "{{ monitoring_influxdb_admin_username }}"
- login_password: "{{ monitoring_influxdb_admin_password }}"
- hostname: "{{ monitoring_influxdb_domain }}"
- port: 443
- ssl: true
- validate_certs: true
- user_name: "{{ item.username }}"
- user_password: "{{ item.password }}"
- grants: "{{ item.grants }}" # noqa args[module] This has to be escaped to a string
- loop: "{{ monitoring_influxdb_users }}"
- delegate_to: 127.0.0.1
- no_log: true
diff --git a/roles/monitoring/tasks/main.yml b/roles/monitoring/tasks/main.yml
index 060af25..a5ea6c5 100644
--- a/roles/monitoring/tasks/main.yml
+++ b/roles/monitoring/tasks/main.yml
@@ -1,92 +1,293 @@
----
-# Tasks file for the monitoring role
-
-# Infrastructure
-# Ansible instructions to deploy the infrastructure
-# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
- name: Update default SELinux contexts
community.general.sefcontext:
- target: "{{ item }}(/.*)?"
+ target: "{{ item.target }}(/.*)?"
setype: "container_file_t"
+ selevel: "{{ item.selevel }}"
state: present
- with_items:
- - "{{ monitoring_grafana_location }}"
- - "{{ monitoring_influxdb_location }}"
+ loop:
+ - target: "{{ monitoring_install_dir }}/grafana"
+ selevel: "{{ monitoring_grafana_selinux_level }}"
+ - target: "{{ monitoring_install_dir }}/loki"
+ selevel: "{{ monitoring_loki_selinux_level }}"
+ - target: "{{ monitoring_install_dir }}/prometheus"
+ selevel: "{{ monitoring_prometheus_selinux_level }}"
become: true
+ notify: "monitoring selinux context changed"
-- name: Create install directory
+- name: Create monitoring directories.
ansible.builtin.file:
path: "{{ item }}"
+ owner: "root"
+ group: "root"
state: directory
mode: "0700"
- owner: "root"
- group: "root"
- with_items:
- - "{{ monitoring_install_location }}"
+ loop:
+ - "{{ monitoring_install_dir }}"
become: true
-- name: Create grafana directory
- ansible.builtin.file:
- path: "{{ item }}"
- state: directory
- mode: "0750"
- owner: "472"
- group: "472"
- setype: "container_file_t"
- with_items:
- - "{{ monitoring_grafana_location }}"
- become: true
+- name: Ensure monitoring directories and config files exist.
+ block:
+ - name: Stat grafana data directory.
+ ansible.builtin.stat:
+ path: "{{ monitoring_install_dir }}/grafana/data"
+ become: true
+ register: monitoring_grafana_stat_dir
-- name: Create influxdb directory
- ansible.builtin.file:
- path: "{{ item }}"
- state: directory
- mode: "0750"
- owner: "root"
- group: "root"
- setype: "container_file_t"
- with_items:
- - "{{ monitoring_influxdb_location }}"
- become: true
+ - name: Stat loki data directory.
+ ansible.builtin.stat:
+ path: "{{ monitoring_install_dir }}/loki/data"
+ become: true
+ register: monitoring_loki_stat_dir
-- name: Deploy docker-compose.yml and config
- ansible.builtin.template:
- src: "docker-compose.yml"
- dest: "{{ monitoring_install_location }}/docker-compose.yml"
- mode: "0600"
- owner: "root"
- group: "root"
- validate: docker compose -f %s config -q
- become: true
+ - name: Stat prometheus data directory.
+ ansible.builtin.stat:
+ path: "{{ monitoring_install_dir }}/prometheus/data"
+ become: true
+ register: monitoring_prometheus_stat_dir
-- name: Compose monitoring
- community.docker.docker_compose_v2:
+ - name: Create monitoring directories.
+ ansible.builtin.file:
+ path: "{{ item.path }}"
+ state: directory
+ owner: "{{ item.owner }}"
+ group: "{{ item.group }}"
+ mode: "0700"
+ loop:
+ - path: "{{ monitoring_install_dir }}/grafana"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/grafana/data"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/grafana/config"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/grafana/config/provisioning"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/grafana/config/provisioning/alerting"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/grafana/config/provisioning/dashboards"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/grafana/config/provisioning/datasources"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/grafana/config/provisioning/notifiers"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/grafana/config/provisioning/plugins"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/loki"
+ owner: "{{ monitoring_loki_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_loki_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/loki/data"
+ owner: "{{ monitoring_loki_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_loki_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/loki/config"
+ owner: "{{ monitoring_loki_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_loki_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/prometheus"
+ owner: "{{ monitoring_prometheus_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_prometheus_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/prometheus/data"
+ owner: "{{ monitoring_prometheus_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_prometheus_stat_dir.stat.gid | default('root') }}"
+ - path: "{{ monitoring_install_dir }}/prometheus/config"
+ owner: "{{ monitoring_prometheus_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_prometheus_stat_dir.stat.gid | default('root') }}"
+ become: true
+
+ - name: Deploy grafana config.
+ ansible.builtin.template:
+ src: grafana/grafana.ini.j2
+ dest: "{{ monitoring_install_dir }}/grafana/config/grafana.ini"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ mode: "0600"
+ become: true
+ notify: "grafana config changed"
+
+ - name: Deploy loki config.
+ ansible.builtin.template:
+ src: loki/loki-config.yml.j2
+ dest: "{{ monitoring_install_dir }}/loki/config/loki-config.yaml"
+ owner: "{{ monitoring_loki_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_loki_stat_dir.stat.gid | default('root') }}"
+ mode: "0600"
+ become: true
+ notify: "loki config changed"
+
+ - name: Deploy prometheus config.
+ ansible.builtin.template:
+ src: prometheus/prometheus.yml.j2
+ dest: "{{ monitoring_install_dir }}/prometheus/config/prometheus.yml"
+ owner: "{{ monitoring_prometheus_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_prometheus_stat_dir.stat.gid | default('root') }}"
+ mode: "0600"
+ become: true
+ notify: "prometheus config changed"
+
+ - name: Create grafana datasource provisioning files.
+ ansible.builtin.template:
+ src: "{{ item.src }}"
+ dest: "{{ item.dest }}"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ mode: "0600"
+ loop:
+ - src: loki/datasource.yml.j2
+ dest: "{{ monitoring_install_dir }}/grafana/config/provisioning/datasources/loki.yml"
+ - src: prometheus/datasource.yml.j2
+ dest: "{{ monitoring_install_dir }}/grafana/config/provisioning/datasources/prometheus.yml"
+ become: true
+ notify: "grafana datasources changed"
+
+ - name: Copy grafana alerting provisioning files.
+ ansible.builtin.copy:
+ src: "{{ item }}"
+ dest: "{{ monitoring_install_dir }}/grafana/config/provisioning/alerting/"
+ owner: "{{ monitoring_grafana_stat_dir.stat.uid | default('root') }}"
+ group: "{{ monitoring_grafana_stat_dir.stat.gid | default('root') }}"
+ mode: "0600"
+ loop: "{{ query('fileglob', inventory_hostname + '/monitoring/alerting/*.yml') }}"
+ become: true
+ notify: "grafana alert provisioning changed"
+ register: monitoring_grafana_managed_alerting_files
+
+- name: Remove unmanaged grafana alert provisioning files
+ when: monitoring_grafana_remove_unmanaged_alerting_files
+ notify: "grafana alert provisioning changed"
+ block:
+ - name: Find all remote grafana alert provisioning files.
+ ansible.builtin.find:
+ paths: "{{ monitoring_install_dir }}/grafana/config/provisioning/alerting/"
+ become: true
+ register: monitoring_grafana_found_alerting_files
+
+ - name: Remove unmanaged grafana alert provisioning files.
+ ansible.builtin.file:
+ state: absent
+ path: "{{ item }}"
+ loop: "{{ monitoring_grafana_found_alerting_files.files | map(attribute='path') }}"
+ when: item not in monitoring_grafana_managed_alerting_files.results | map(attribute='dest')
+ become: true
+
+- name: Ensure container images are present on the host.
+ containers.podman.podman_image:
+ name: "{{ item.name }}"
state: present
- project_src: "{{ monitoring_install_location }}"
- pull: always
- remove_orphans: true
- register: monitoring_compose
+ tag: "{{ item.tag }}"
+ loop:
+ - name: "{{ monitoring_grafana_containerimage }}"
+ tag: "{{ monitoring_grafana_image_tag }}"
+ - name: "{{ monitoring_image_renderer_containerimage }}"
+ tag: "{{ monitoring_image_renderer_image_tag }}"
+ - name: "{{ monitoring_loki_containerimage }}"
+ tag: "{{ monitoring_loki_image_tag }}"
+ - name: "{{ monitoring_prometheus_containerimage }}"
+ tag: "{{ monitoring_prometheus_image_tag }}"
become: true
-- name: Wait 30 seconds for influxdb to become healthy
- ansible.builtin.wait_for:
- timeout: 30
- delegate_to: localhost
- # noqa no-handler
- when: monitoring_compose is changed
+- name: Add caddy config file.
+ block:
+ - name: Check caddy config dir.
+ ansible.builtin.stat:
+ path: "{{ caddy_install_dir }}/config"
+ become: true
+ register: caddy_stat_config_dir
-- name: Include influxdb management
- ansible.builtin.include_tasks: influxdb/main.yml
+ - name: Template caddy config for monitoring.
+ ansible.builtin.template:
+ src: "{{ item.src }}"
+ dest: "{{ item.dest }}"
+ mode: "0600"
+ setype: "container_file_t"
+ selevel: "{{ caddy_selinux_level }}"
+ owner: "{{ caddy_stat_config_dir.stat.uid | default('root') }}"
+ group: "{{ caddy_stat_config_dir.stat.gid | default('root') }}"
+ loop:
+ - src: grafana/grafana.caddy.j2
+ dest: "{{ caddy_install_dir }}/config/grafana.caddy"
+ - src: loki/loki.caddy.j2
+ dest: "{{ caddy_install_dir }}/config/loki.caddy"
+ become: true
+ notify: "caddy config changed"
+
+ - name: Template prometheus caddy config.
+ ansible.builtin.template:
+ src: prometheus/prometheus.caddy.j2
+ dest: "{{ caddy_install_dir }}/config/prometheus.caddy"
+ mode: "0600"
+ setype: "container_file_t"
+ selevel: "{{ caddy_selinux_level }}"
+ owner: "{{ caddy_stat_config_dir.stat.uid | default('root') }}"
+ group: "{{ caddy_stat_config_dir.stat.gid | default('root') }}"
+ become: true
+ notify: "caddy config changed"
+ when: monitoring_prometheus_write_receiver_enable
+
+ - name: Remove unused prometheus caddy config.
+ ansible.builtin.file:
+ path: "{{ caddy_install_dir }}/config/prometheus.caddy"
+ state: absent
+ become: true
+ when: not monitoring_prometheus_write_receiver_enable
+
+- name: Create grafana container definition file.
+ ansible.builtin.template:
+ src: grafana/grafana.container.j2
+ dest: "/etc/containers/systemd/grafana.container"
+ owner: "root"
+ group: "root"
+ mode: "0644"
+ become: true
+ notify: "grafana container definition changed"
+
+- name: Create image-renderer container definition file.
+ ansible.builtin.template:
+ src: grafana/image-renderer.container.j2
+ dest: "/etc/containers/systemd/image-renderer.container"
+ owner: "root"
+ group: "root"
+ mode: "0644"
+ become: true
+ notify: "image-renderer container definition changed"
+
+- name: Create loki container definition file.
+ ansible.builtin.template:
+ src: loki/loki.container.j2
+ dest: "/etc/containers/systemd/loki.container"
+ owner: "root"
+ group: "root"
+ mode: "0644"
+ become: true
+ notify: "loki container definition changed"
+
+- name: Create prometheus container definition file.
+ ansible.builtin.template:
+ src: prometheus/prometheus.container.j2
+ dest: "/etc/containers/systemd/prometheus.container"
+ owner: "root"
+ group: "root"
+ mode: "0644"
+ become: true
+ notify: "prometheus container definition changed"
+
+- name: Flush handlers
+ ansible.builtin.meta: flush_handlers
+
+- name: Ensure monitoring services are started and enabled.
+ ansible.builtin.systemd:
+ state: started
+ enabled: true
+ name: "{{ item }}"
+ daemon_reload: true
+ loop:
+ - grafana.service
+ - image-renderer.service
+ - loki.service
+ - prometheus.service
+ become: true
diff --git a/roles/monitoring/templates/docker-compose.yml b/roles/monitoring/templates/docker-compose.yml
deleted file mode 100644
index 3027950..0000000
--- a/roles/monitoring/templates/docker-compose.yml
+++ /dev/null
@@ -1,143 +0,0 @@
-{{ ansible_managed | comment }}
-
-# Infrastructure
-# Ansible instructions to deploy the infrastructure
-# Copyright (C) 2019-2020 Christoph (Sheogorath) Kern
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, version 3 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-
-version: "2"
-services:
- grafana:
- image: docker.io/grafana/grafana:{{ monitoring_grafana_image_version }}
- mem_limit: 512mb
- memswap_limit: 768mb
- read_only: true
- security_opt:
- - no-new-privileges
- tmpfs:
- - "/tmp:size=64M"
- environment:
- - "GF_SERVER_ROOT_URL=https://{{ monitoring_grafana_domain }}"
- - "GF_RENDERING_SERVER_URL=http://renderer:8081/render"
- - "GF_RENDERING_CALLBACK_URL=http://grafana:3000/"
- - "GF_INSTALL_PLUGINS={{ monitoring_grafana_plugins | join(',') }}"
- - "GF_UNIFIED_ALERTING_ENABLED=true"
-
-{% if monitoring_grafana_oauth is defined and monitoring_grafana_oauth.enabled %}
- - "GF_AUTH_DISABLE_LOGIN_FORM=true"
- - "GF_AUTH_SIGNOUT_REDIRECT_URL={{ monitoring_grafana_oauth.signout_url }}"
- - "GF_AUTH_GENERIC_OAUTH_ENABLED=true"
- - "GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP={{ monitoring_grafana_oauth.allow_sign_up }}"
- - "GF_AUTH_GENERIC_OAUTH_AUTO_LOGIN=true"
- - "GF_AUTH_GENERIC_OAUTH_USE_PKCE=true"
- - "GF_AUTH_GENERIC_OAUTH_EMPTY_SCOPES=false"
- - "GF_AUTH_GENERIC_OAUTH_SKIP_ORG_ROLE_SYNC=true"
- - "GF_AUTH_GENERIC_OAUTH_NAME={{ monitoring_grafana_oauth.name }}"
- - "GF_AUTH_GENERIC_OAUTH_CLIENT_ID={{ monitoring_grafana_oauth.client_id }}"
- - "GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET={{ monitoring_grafana_oauth.client_secret }}"
- - "GF_AUTH_GENERIC_OAUTH_SCOPES=openid email profile roles offline_access"
- - "GF_AUTH_GENERIC_OAUTH_AUTH_URL={{ monitoring_grafana_oauth.auth_url }}"
- - "GF_AUTH_GENERIC_OAUTH_TOKEN_URL={{ monitoring_grafana_oauth.token_url }}"
- - "GF_AUTH_GENERIC_OAUTH_API_URL={{ monitoring_grafana_oauth.api_url }}"
- - "GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH=contains(roles[*], 'Admin') && 'Admin' || contains(roles[*], 'Editor') && 'Editor' || 'Viewer'"
-{% endif %}
-
-{% if monitoring_grafana_feature_toggles is defined and monitoring_grafana_feature_toggles is iterable and monitoring_grafana_feature_toggles | length > 0 %}
- - "GF_FEATURE_TOGGLES_ENABLE={% for item in monitoring_grafana_feature_toggles %}{{item}} {% endfor %}"
-{% endif %}
-
- restart: always
- labels:
- - "traefik.enable=true"
- - "traefik.http.routers.grafana.rule=Host(`{{ monitoring_grafana_domain }}`) && PathPrefix(`/`)"
- - "traefik.http.routers.grafana.entrypoints=websecure"
- - "traefik.http.routers.grafana.tls=true"
- - "traefik.http.routers.grafana.tls.certresolver={{ monitoring_traefik_certresolver }}"
- - "traefik.http.routers.grafana.middlewares=grafana,compress"
- - "traefik.http.middlewares.grafana.headers.sslredirect=true"
- - "traefik.http.middlewares.grafana.headers.stsSeconds=63072000"
- - "traefik.http.middlewares.grafana.headers.referrerPolicy=no-referrer"
- - "traefik.http.middlewares.grafana.headers.contentTypeNosniff=true"
- - "traefik.http.middlewares.grafana.headers.browserXssFilter=true"
-
-{% if proxy_network is defined %}
- - "traefik.docker.network={{ proxy_network }}"
-{% endif %}
- networks:
- grafana:
-{% if proxy_network is defined %}
- {{ proxy_network }}:
-{% endif %}
- volumes:
- - "{{ monitoring_grafana_location }}:/var/lib/grafana"
-
-{% if monitoring_grafana_renderer_enabled %}
- renderer:
- image: docker.io/grafana/grafana-image-renderer:latest
- mem_limit: 512mb
- memswap_limit: 768mb
- depends_on:
- - grafana
- restart: always
- security_opt:
- - no-new-privileges
- networks:
- grafana:
-{% endif %}
-
- influxdb:
- image: docker.io/library/influxdb:{{ monitoring_influxdb_image_version }}
- mem_limit: 1536mb
- memswap_limit: 2048mb
- read_only: true
- security_opt:
- - no-new-privileges
- tmpfs:
- - "/tmp:size=64M"
- restart: always
- labels:
- - "traefik.enable=true"
- - "traefik.http.routers.influxdb.rule=Host(`{{ monitoring_influxdb_domain }}`) && PathPrefix(`/`)"
- - "traefik.http.routers.influxdb.entrypoints=websecure"
- - "traefik.http.routers.influxdb.tls=true"
- - "traefik.http.routers.influxdb.tls.certresolver={{ monitoring_traefik_certresolver }}"
- - "traefik.http.routers.influxdb.middlewares=influxdb,compress"
- - "traefik.http.middlewares.influxdb.headers.sslredirect=true"
- - "traefik.http.middlewares.influxdb.headers.stsSeconds=63072000"
- - "traefik.http.middlewares.influxdb.headers.referrerPolicy=no-referrer"
- - "traefik.http.middlewares.influxdb.headers.contentTypeNosniff=true"
-
-{% if proxy_network is defined %}
- - "traefik.docker.network={{ proxy_network }}"
-{% endif %}
-
- networks:
-{% if proxy_network is defined %}
- {{ proxy_network }}:
-{% endif %}
-
- volumes:
- - "{{ monitoring_influxdb_location }}:/var/lib/influxdb"
- environment:
- - INFLUXDB_HTTP_AUTH_ENABLED=true
- - INFLUXDB_HTTP_PPROF_ENABLED=true
- - INFLUXDB_HTTP_PPROF_AUTH_ENABLED=true
-
- - INFLUXDB_REPORTING_DISABLED=true
-networks:
- grafana:
-{% if proxy_network is defined %}
- {{ proxy_network }}:
- external: true
-{% endif %}
diff --git a/roles/monitoring/templates/grafana/grafana.caddy.j2 b/roles/monitoring/templates/grafana/grafana.caddy.j2
new file mode 100644
index 0000000..1e5d1d2
--- /dev/null
+++ b/roles/monitoring/templates/grafana/grafana.caddy.j2
@@ -0,0 +1,24 @@
+{{ ansible_managed | comment }}
+
+{{ monitoring_grafana_domain }} {
+ encode gzip
+
+ header {
+ # enable HSTS
+ Strict-Transport-Security "max-age=31536000; preload;"
+
+ # disable clients from sniffing the media type
+ X-Content-Type-Options nosniff
+
+ # clickjacking protection
+ X-Frame-Options DENY
+
+ # keep referrer data off of HTTP connections
+ Referrer-Policy no-referrer-when-downgrade
+
+ # Server name removing
+ -Server
+ }
+
+ reverse_proxy grafana:3000
+}
diff --git a/roles/monitoring/templates/grafana/grafana.container.j2 b/roles/monitoring/templates/grafana/grafana.container.j2
new file mode 100644
index 0000000..06b281b
--- /dev/null
+++ b/roles/monitoring/templates/grafana/grafana.container.j2
@@ -0,0 +1,40 @@
+{{ ansible_managed | comment }}
+
+[Unit]
+Description = Grafana Server
+
+[Service]
+Restart = always
+RestartSec = 5s
+
+[Container]
+Image={{ monitoring_grafana_containerimage }}:{{ monitoring_grafana_image_tag }}
+ContainerName = grafana
+
+AutoUpdate = registry
+LogDriver = journald
+
+NoNewPrivileges = true
+ReadOnly = true
+DropCapability = all
+UserNS = auto:size=65535
+{% if monitoring_grafana_selinux_level != omit %}
+SecurityLabelLevel = {{ monitoring_grafana_selinux_level }}
+{% endif %}
+
+Network = caddy.network
+
+ExposeHostPort = 3000
+
+Volume = {{ monitoring_install_dir }}/grafana/data:/var/lib/grafana:U
+Volume = {{ monitoring_install_dir }}/grafana/config/grafana.ini:/etc/grafana/grafana.ini:ro,U
+Volume = {{ monitoring_install_dir }}/grafana/config/provisioning:/etc/grafana/provisioning:ro,U
+
+Tmpfs = /tmp:rw,noexec,nosuid,nodev,size=64m
+
+PodmanArgs = --memory={{ monitoring_grafana_memory_high }}
+PodmanArgs = --memory-swap={{ monitoring_grafana_swap_max }}
+PodmanArgs = --memory-reservation={{ monitoring_grafana_memory_low }}
+
+[Install]
+WantedBy = default.target
diff --git a/roles/monitoring/templates/grafana/grafana.ini.j2 b/roles/monitoring/templates/grafana/grafana.ini.j2
new file mode 100644
index 0000000..acb6bff
--- /dev/null
+++ b/roles/monitoring/templates/grafana/grafana.ini.j2
@@ -0,0 +1,103 @@
+{{ ansible_managed | comment }}
+
+instance_name = ${HOSTNAME}
+
+[paths]
+provisioning = /etc/grafana/provisioning
+
+[server]
+domain = "{{ monitoring_grafana_domain }}"
+root_url = "https://{{ monitoring_grafana_domain }}"
+enable_gzip = true
+
+[dataproxy]
+logging = false
+timeout = 60
+
+[analytics]
+enabled = false
+reporting_enabled = false
+check_for_updates = false
+check_for_plugin_updates = false
+feedback_links_enabled = false
+
+[security]
+admin_user = admin
+disable_gravatar = true
+cookie_secure = true
+cookie_samesite = lax
+allow_embedding = false
+content_security_policy_report_only = true
+content_security_policy_report_only_template = "require-trusted-types-for 'script';"
+angular_support_enabled = false
+
+[dashboard]
+min_refresh_interval = 10s
+
+[users]
+allow_sign_up = false
+
+[auth]
+disable_login_form = true
+signout_redirect_url = "{{ monitoring_grafana_oauth.signout_url }}"
+
+[auth.generic_oauth]
+name = "{{ monitoring_grafana_oauth.name }}"
+enabled = true
+allow_sign_up = {{ monitoring_grafana_oauth.allow_sign_up | bool | lower }}
+auto_login = false
+client_id = "{{ monitoring_grafana_oauth.client_id }}"
+client_secret = "{{ monitoring_grafana_oauth.client_secret }}"
+scopes = "openid email profile offline_access"
+empty_scopes = false
+auth_url = "{{ monitoring_grafana_oauth.auth_url }}"
+token_url = "{{ monitoring_grafana_oauth.token_url }}"
+api_url = "{{ monitoring_grafana_oauth.api_url }}"
+allow_assign_grafana_admin = {{ monitoring_grafana_oauth.allow_assign_grafana_admin | bool | lower }}
+role_attribute_path = "contains(roles[*], 'GrafanaAdmin') && 'GrafanaAdmin' || contains(roles[*], 'Admin') && 'Admin' || contains(roles[*], 'Editor') && 'Editor' || 'Viewer'"
+use_pkce = true
+
+[log]
+mode = "console"
+level = "{{ monitoring_grafana_loglevel }}"
+;filters = "rendering:debug"
+
+[log.console]
+format = "json"
+
+[log.frontend]
+provider = "grafana"
+
+[unified_alerting]
+enabled = true
+
+[unified_alerting.state_history]
+enabled = true
+backend = "loki"
+loki_remote_url = "http://loki:3100"
+
+[unified_alerting.reserved_labels]
+disabled_labels = grafana_folder
+
+[unified_alerting.screenshots]
+capture = true
+
+[rendering]
+server_url = http://image-renderer:8081/render
+callback_url = http://grafana:3000/
+
+[alerting]
+enabled = false
+
+[news]
+news_feed_enabled = false
+
+[feature_toggles]
+alertStateHistoryLokiOnly = true
+alertStateHistoryLokiPrimary = true
+alertStateHistoryLokiSecondary = true
+{% if monitoring_grafana_feature_toggles is mapping %}
+{% for key, value in monitoring_grafana_feature_toggles.items() %}
+{{ key }} = {{ value | lower }}
+{% endfor %}
+{% endif %}
diff --git a/roles/monitoring/templates/grafana/image-renderer.container.j2 b/roles/monitoring/templates/grafana/image-renderer.container.j2
new file mode 100644
index 0000000..f19020d
--- /dev/null
+++ b/roles/monitoring/templates/grafana/image-renderer.container.j2
@@ -0,0 +1,36 @@
+{{ ansible_managed | comment }}
+
+[Unit]
+Description = Grafana Image Renderer service
+
+[Service]
+Restart = always
+RestartSec = 5s
+
+[Container]
+Image={{ monitoring_image_renderer_containerimage }}:{{ monitoring_image_renderer_image_tag }}
+ContainerName = image-renderer
+
+Exec = ""
+
+AutoUpdate = registry
+LogDriver = journald
+
+NoNewPrivileges = true
+ReadOnly = true
+DropCapability = all
+UserNS = auto:size=65535
+{% if monitoring_grafana_selinux_level != omit %}
+SecurityLabelLevel = {{ monitoring_grafana_selinux_level }}
+{% endif %}
+
+Network = caddy.network
+
+ExposeHostPort = 8081
+
+PodmanArgs = --memory={{ monitoring_image_renderer_memory_high }}
+PodmanArgs = --memory-swap={{ monitoring_image_renderer_swap_max }}
+PodmanArgs = --memory-reservation={{ monitoring_image_renderer_memory_low }}
+
+[Install]
+WantedBy = default.target
diff --git a/roles/monitoring/templates/loki/datasource.yml.j2 b/roles/monitoring/templates/loki/datasource.yml.j2
new file mode 100644
index 0000000..62e7fd0
--- /dev/null
+++ b/roles/monitoring/templates/loki/datasource.yml.j2
@@ -0,0 +1,15 @@
+{{ ansible_managed | comment }}
+
+apiVersion: 1
+
+datasources:
+ - name: Loki
+ type: loki
+ access: proxy
+ url: 'http://loki:3100'
+ isDefault: false
+ editable: false
+ jsonData:
+ timeout: 60
+ manageAlerts: true
+ maxLines: 1000
diff --git a/roles/monitoring/templates/loki/loki-config.yml.j2 b/roles/monitoring/templates/loki/loki-config.yml.j2
new file mode 100644
index 0000000..c5c8cd5
--- /dev/null
+++ b/roles/monitoring/templates/loki/loki-config.yml.j2
@@ -0,0 +1,102 @@
+{{ ansible_managed | comment }}
+
+target: "all"
+
+auth_enabled: false
+
+server:
+ http_listen_port: 3100
+ log_level: "{{ monitoring_loki_loglevel }}"
+ log_format: json
+
+querier:
+ max_concurrent: 16
+
+frontend:
+ address: 127.0.0.1
+ compress_responses: true
+ max_outstanding_per_tenant: 30000
+
+query_scheduler:
+ max_outstanding_requests_per_tenant: 30000
+
+query_range:
+ align_queries_with_step: true
+ parallelise_shardable_queries: true
+ cache_results: true
+ results_cache:
+ cache:
+ embedded_cache:
+ enabled: true
+ max_size_mb: 512
+ ttl: 24h
+
+ruler:
+ storage:
+ type: local
+ local:
+ directory: /data/loki/ruler
+ rule_path: /data/loki/rules
+ alertmanager_url: http://localhost:9093
+ enable_alertmanager_v2: true
+ enable_api: true
+ remote_write:
+ enabled: true
+ client:
+ url: http://prometheus:9090/api/v1/write
+ wal:
+ dir: /data/loki/ruler-wal
+
+ingester:
+ wal:
+ enabled: true
+ dir: /data/loki/wal
+ replay_memory_ceiling: 1GB
+
+storage_config:
+ tsdb_shipper:
+ active_index_directory: /data/loki/tsdb-index
+ cache_location: /data/loki/tsdb-cache
+ shared_store: filesystem
+ cache_ttl: 24h
+ filesystem:
+ directory: /data/loki/chunks
+ index_queries_cache_config:
+ embedded_cache:
+ enabled: true
+
+chunk_store_config:
+ chunk_cache_config:
+ enable_fifocache: false
+ embedded_cache:
+ enabled: true
+ write_dedupe_cache_config:
+ embedded_cache:
+ enabled: true
+
+schema_config:
+ {{ monitoring_loki_schema_config | to_nice_yaml(indent=2) | indent(2) }}
+
+compactor:
+ working_directory: /data/loki/compactor
+ shared_store: filesystem
+ retention_enabled: true
+
+limits_config:
+ enforce_metric_name: false
+ retention_period: {{ monitoring_loki_retention_period }}
+
+tracing:
+ enabled: false
+
+common:
+ instance_addr: 127.0.0.1
+ instance_interface_names:
+ - lo
+ ring:
+ kvstore:
+ store: inmemory
+ replication_factor: 1
+
+analytics:
+ reporting_enabled: false
diff --git a/roles/monitoring/templates/loki/loki.caddy.j2 b/roles/monitoring/templates/loki/loki.caddy.j2
new file mode 100644
index 0000000..ccb4308
--- /dev/null
+++ b/roles/monitoring/templates/loki/loki.caddy.j2
@@ -0,0 +1,42 @@
+{{ ansible_managed | comment }}
+
+{{ monitoring_loki_domain }} {
+ encode gzip
+
+ header {
+ # enable HSTS
+ Strict-Transport-Security "max-age=31536000; preload;"
+
+ # disable clients from sniffing the media type
+ X-Content-Type-Options nosniff
+
+ # clickjacking protection
+ X-Frame-Options DENY
+
+ # keep referrer data off of HTTP connections
+ Referrer-Policy no-referrer-when-downgrade
+
+ # Server name removing
+ -Server
+ }
+
+ @validLoki {
+ method POST
+ path /loki/api/v1/push
+ }
+
+ handle @validLoki {
+ reverse_proxy loki:3100
+{% if monitoring_loki_basic_auth is defined and monitoring_loki_basic_auth is iterable %}
+ basicauth {
+{% for item in monitoring_loki_basic_auth %}
+ {{ item.username }} {{ item.hashed_password }}
+{% endfor%}
+ }
+{% endif %}
+ }
+
+ handle {
+ respond "Not Found" 404
+ }
+}
diff --git a/roles/monitoring/templates/loki/loki.container.j2 b/roles/monitoring/templates/loki/loki.container.j2
new file mode 100644
index 0000000..8832e71
--- /dev/null
+++ b/roles/monitoring/templates/loki/loki.container.j2
@@ -0,0 +1,40 @@
+{{ ansible_managed | comment }}
+
+[Unit]
+Description = Grafana Loki logging server
+
+[Service]
+Restart = always
+RestartSec = 5s
+
+[Container]
+Image = {{ monitoring_loki_containerimage }}:{{ monitoring_loki_image_tag }}
+ContainerName = loki
+
+Exec = -config.file=/etc/loki/local-config.yaml \
+ -log-config-reverse-order
+
+AutoUpdate = registry
+LogDriver = journald
+
+NoNewPrivileges = true
+ReadOnly = true
+DropCapability = all
+UserNS = auto:size=65535
+{% if monitoring_loki_selinux_level != omit %}
+SecurityLabelLevel = {{ monitoring_loki_selinux_level }}
+{% endif %}
+
+Network = caddy.network
+
+ExposeHostPort = 3100
+
+Volume = {{ monitoring_install_dir }}/loki/data:/data/loki:U
+Volume = {{ monitoring_install_dir }}/loki/config/loki-config.yaml:/etc/loki/local-config.yaml:ro,U
+
+PodmanArgs = --memory={{ monitoring_loki_memory_high }}
+PodmanArgs = --memory-swap={{ monitoring_loki_swap_max }}
+PodmanArgs = --memory-reservation={{ monitoring_loki_memory_low }}
+
+[Install]
+WantedBy = default.target
diff --git a/roles/monitoring/templates/prometheus/datasource.yml.j2 b/roles/monitoring/templates/prometheus/datasource.yml.j2
new file mode 100644
index 0000000..9ac6db9
--- /dev/null
+++ b/roles/monitoring/templates/prometheus/datasource.yml.j2
@@ -0,0 +1,21 @@
+{{ ansible_managed | comment }}
+
+apiVersion: 1
+
+datasources:
+ - name: Prometheus
+ type: prometheus
+ access: proxy
+ url: 'http://prometheus:9090'
+ isDefault: true
+ editable: false
+ jsonData:
+ timeout: 60
+ manageAlerts: true
+ httpMethod: POST
+ prometheusType: Prometheus
+ prometheusVersion: 2.50.0
+ cacheLevel: 'Low'
+ disableRecordingRules: false
+ incrementalQuerying: true
+ incrementalQueryOverlapWindow: 10m
diff --git a/roles/monitoring/templates/prometheus/prometheus.caddy.j2 b/roles/monitoring/templates/prometheus/prometheus.caddy.j2
new file mode 100644
index 0000000..8cec4eb
--- /dev/null
+++ b/roles/monitoring/templates/prometheus/prometheus.caddy.j2
@@ -0,0 +1,42 @@
+{{ ansible_managed | comment }}
+
+{{ monitoring_prometheus_domain }} {
+ encode gzip
+
+ header {
+ # enable HSTS
+ Strict-Transport-Security "max-age=31536000; preload;"
+
+ # disable clients from sniffing the media type
+ X-Content-Type-Options nosniff
+
+ # clickjacking protection
+ X-Frame-Options DENY
+
+ # keep referrer data off of HTTP connections
+ Referrer-Policy no-referrer-when-downgrade
+
+ # Server name removing
+ -Server
+ }
+
+ @validPrometheus {
+ method POST
+ path /api/v1/write
+ }
+
+ handle @validPrometheus {
+ reverse_proxy prometheus:9090
+{% if monitoring_prometheus_basic_auth is defined and monitoring_prometheus_basic_auth is iterable %}
+ basicauth {
+{% for item in monitoring_prometheus_basic_auth %}
+ {{ item.username }} {{ item.hashed_password }}
+{% endfor%}
+ }
+{% endif %}
+ }
+
+ handle {
+ respond "Not Found" 404
+ }
+}
diff --git a/roles/monitoring/templates/prometheus/prometheus.container.j2 b/roles/monitoring/templates/prometheus/prometheus.container.j2
new file mode 100644
index 0000000..4e6cc89
--- /dev/null
+++ b/roles/monitoring/templates/prometheus/prometheus.container.j2
@@ -0,0 +1,47 @@
+{{ ansible_managed | comment }}
+
+[Unit]
+Description = Prometheus timeseries database
+
+[Service]
+Restart = always
+RestartSec = 5s
+
+[Container]
+Image = {{ monitoring_prometheus_containerimage }}:{{ monitoring_prometheus_image_tag }}
+ContainerName = prometheus
+
+Exec = --config.file=/etc/prometheus/prometheus.yml \
+ --storage.tsdb.path=/prometheus \
+ --storage.tsdb.retention.time={{ monitoring_prometheus_retention_time }} \
+ --web.console.libraries=/usr/share/prometheus/console_libraries \
+ --web.console.templates=/usr/share/prometheus/consoles \
+{% if monitoring_prometheus_write_receiver_enable %}
+ --web.enable-remote-write-receiver \
+{% endif %}
+ --log.level={{ monitoring_prometheus_loglevel }}
+
+AutoUpdate = registry
+LogDriver = journald
+
+NoNewPrivileges = true
+ReadOnly = true
+DropCapability = all
+UserNS = auto:size=65535
+{% if monitoring_prometheus_selinux_level != omit %}
+SecurityLabelLevel = {{ monitoring_prometheus_selinux_level }}
+{% endif %}
+
+Network = caddy.network
+
+ExposeHostPort = 9090
+
+Volume = {{ monitoring_install_dir }}/prometheus/data:/prometheus:U
+Volume = {{ monitoring_install_dir }}/prometheus/config:/etc/prometheus:ro,U
+
+PodmanArgs = --memory={{ monitoring_prometheus_memory_high }}
+PodmanArgs = --memory-swap={{ monitoring_prometheus_swap_max }}
+PodmanArgs = --memory-reservation={{ monitoring_prometheus_memory_low }}
+
+[Install]
+WantedBy = default.target
diff --git a/roles/monitoring/templates/prometheus/prometheus.yml.j2 b/roles/monitoring/templates/prometheus/prometheus.yml.j2
new file mode 100644
index 0000000..a9f201f
--- /dev/null
+++ b/roles/monitoring/templates/prometheus/prometheus.yml.j2
@@ -0,0 +1,9 @@
+{{ ansible_managed | comment }}
+
+global:
+ scrape_interval: 60s
+ scrape_timeout: 10s
+ evaluation_interval: 60s
+
+scrape_configs:
+ {{ monitoring_prometheus_scrape_configs | to_nice_yaml(indent=2) | indent(2) }}