This commit is contained in:
Ornel_Zply 2025-12-04 10:37:05 +01:00
parent 91bc3c856f
commit f2a051576d
9 changed files with 337 additions and 0 deletions

View File

@ -0,0 +1,40 @@
{
"title": "App Runtime Overview (Dev)",
"uid": "dev-app-runtime-overview",
"tags": ["dev","application"],
"time": { "from": "now-1h", "to": "now" },
"schemaVersion": 42,
"panels": [
{
"type": "timeseries",
"title": "Requests per Second (RPS)",
"gridPos": {"x":0,"y":0,"w":12,"h":8},
"datasource": "prometheus",
"targets": [
{ "refId": "A", "expr": "sum(rate(http_requests_total[2m]))" }
]
},
{
"type": "timeseries",
"title": "Error Rate (%)",
"gridPos": {"x":12,"y":0,"w":12,"h":8},
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "percent" }, "overrides": [] },
"targets": [
{ "refId": "A", "expr": "sum(rate(http_requests_total{status=~\"5..\"}[2m])) / sum(rate(http_requests_total[2m])) * 100" }
]
},
{
"type": "timeseries",
"title": "Latency P95 (s)",
"gridPos": {"x":0,"y":8,"w":24,"h":8},
"datasource": "prometheus",
"targets": [
{
"refId": "A",
"expr": "histogram_quantile(0.95, sum by (le) (rate(http_request_duration_seconds_bucket[2m])))"
}
]
}
]
}

View File

@ -0,0 +1,58 @@
{
"title": "Logs by Service (Dev)",
"uid": "dev-logs-by-service",
"tags": ["dev","logs","loki"],
"time": { "from": "now-1h", "to": "now" },
"schemaVersion": 42,
"templating": {
"list": [
{
"name": "service",
"label": "Service (label app)",
"type": "query",
"datasource": "loki",
"query": "label_values({app!=\"\"}, app)",
"includeAll": true,
"multi": true,
"refresh": 2,
"current": {}
}
]
},
"panels": [
{
"type": "logs",
"title": "Logs $service",
"gridPos": { "x": 0, "y": 0, "w": 24, "h": 12 },
"datasource": "loki",
"options": {
"showLabels": true,
"showTime": true,
"wrapLogMessage": true,
"prettifyLogMessage": true
},
"targets": [
{
"refId": "A",
"expr": "{app=~\"$service\"}"
}
]
},
{
"type": "timeseries",
"title": "Error rate (logs/min) $service",
"gridPos": { "x": 0, "y": 12, "w": 24, "h": 8 },
"datasource": "loki",
"fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] },
"options": {
"legend": { "showLegend": true, "placement": "bottom" }
},
"targets": [
{
"refId": "A",
"expr": "sum by (app)(rate({app=~\"$service\"} |~ \"(?i)(error|exception|fail|timeout)\"[5m])) * 60"
}
]
}
]
}

View File

@ -0,0 +1,49 @@
{
"title": "Infra Overview (Ops)",
"uid": "ops-infra-overview",
"tags": ["ops","infrastructure"],
"time": { "from": "now-1h", "to": "now" },
"schemaVersion": 42,
"panels": [
{
"type": "timeseries",
"title": "CPU Utilization (%)",
"gridPos": {"x":0,"y":0,"w":12,"h":8},
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "percent" }, "overrides": [] },
"targets": [
{ "refId": "A", "expr": "100 - (avg by(instance)(rate(node_cpu_seconds_total{mode=\"idle\"}[2m]))*100)" }
]
},
{
"type": "timeseries",
"title": "Memory Utilization (%)",
"gridPos": {"x":12,"y":0,"w":12,"h":8},
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "percent" }, "overrides": [] },
"targets": [
{ "refId": "A", "expr": "((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * 100" }
]
},
{
"type": "timeseries",
"title": "Disk Space Used (bytes)",
"gridPos": {"x":0,"y":8,"w":12,"h":8},
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "bytes" }, "overrides": [] },
"targets": [
{ "refId": "A", "expr": "node_filesystem_size_bytes{fstype!~\"tmpfs|overlay\"} - node_filesystem_avail_bytes{fstype!~\"tmpfs|overlay\"}" }
]
},
{
"type": "timeseries",
"title": "Host Power (Watts) - Scaphandre",
"gridPos": {"x":12,"y":8,"w":12,"h":8},
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "watt" }, "overrides": [] },
"targets": [
{ "refId": "A", "expr": "scaph_host_power_microwatts / 1e6" }
]
}
]
}

View File

@ -0,0 +1,86 @@
{
"title": "Prometheus Health (Ops)",
"uid": "ops-prom-health",
"tags": ["ops","prometheus","health"],
"time": { "from": "now-1h", "to": "now" },
"schemaVersion": 42,
"panels": [
{
"type": "stat",
"title": "Targets DOWN (total)",
"gridPos": { "x": 0, "y": 0, "w": 6, "h": 4 },
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "none" }, "overrides": [] },
"options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "" } },
"targets": [
{ "refId": "A", "expr": "sum(1 - up)" }
]
},
{
"type": "stat",
"title": "Alerts firing",
"gridPos": { "x": 6, "y": 0, "w": 6, "h": 4 },
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "none" }, "overrides": [] },
"options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "" } },
"targets": [
{ "refId": "A", "expr": "count(ALERTS{alertstate=\"firing\"})" }
]
},
{
"type": "stat",
"title": "Ingest rate (samples/s)",
"gridPos": { "x": 12, "y": 0, "w": 12, "h": 4 },
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] },
"options": { "reduceOptions": { "calcs": ["lastNotNull"] } },
"targets": [
{ "refId": "A", "expr": "rate(prometheus_tsdb_head_samples_appended_total[5m])" }
]
},
{
"type": "timeseries",
"title": "Targets UP by job",
"gridPos": { "x": 0, "y": 4, "w": 12, "h": 8 },
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "none" }, "overrides": [] },
"options": { "legend": { "showLegend": true, "placement": "bottom" } },
"targets": [
{ "refId": "A", "expr": "sum by(job)(up)" }
]
},
{
"type": "timeseries",
"title": "Scrape duration (s) by job",
"gridPos": { "x": 12, "y": 4, "w": 12, "h": 8 },
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
"options": { "legend": { "showLegend": true, "placement": "bottom" } },
"targets": [
{ "refId": "A", "expr": "avg by(job)(scrape_duration_seconds)" }
]
},
{
"type": "timeseries",
"title": "Rule group eval duration (s)",
"gridPos": { "x": 0, "y": 12, "w": 12, "h": 8 },
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] },
"options": { "legend": { "showLegend": true, "placement": "bottom" } },
"targets": [
{ "refId": "A", "expr": "max by(rule_group) (prometheus_rule_group_last_duration_seconds)" }
]
},
{
"type": "timeseries",
"title": "TSDB head chunks",
"gridPos": { "x": 12, "y": 12, "w": 12, "h": 8 },
"datasource": "prometheus",
"fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] },
"options": { "legend": { "showLegend": true, "placement": "bottom" } },
"targets": [
{ "refId": "A", "expr": "prometheus_tsdb_head_chunks" }
]
}
]
}

View File

@ -0,0 +1,65 @@
{
"title": "Logs Ops (Errors & System)",
"uid": "ops-logs-errors",
"tags": ["ops","logs","loki"],
"time": { "from": "now-2h", "to": "now" },
"schemaVersion": 42,
"templating": {
"list": [
{
"name": "job",
"label": "Job",
"type": "query",
"datasource": "loki",
"query": "label_values(job)",
"includeAll": true,
"multi": true,
"refresh": 2
},
{
"name": "instance",
"label": "Instance",
"type": "query",
"datasource": "loki",
"query": "label_values({job=~\"$job\"}, instance)",
"includeAll": true,
"multi": true,
"refresh": 2
}
]
},
"panels": [
{
"type": "timeseries",
"title": "Error rate by instance (logs/min)",
"gridPos": { "x": 0, "y": 0, "w": 24, "h": 8 },
"datasource": "loki",
"fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] },
"options": { "legend": { "showLegend": true, "placement": "bottom" } },
"targets": [
{
"refId": "A",
"expr": "sum by (instance)(rate({job=~\"$job\", instance=~\"$instance\"} |~ \"(?i)(critical|error|err)\"[5m])) * 60"
}
]
},
{
"type": "logs",
"title": "Recent critical & errors $job / $instance",
"gridPos": { "x": 0, "y": 8, "w": 24, "h": 14 },
"datasource": "loki",
"options": {
"showLabels": true,
"showTime": true,
"wrapLogMessage": true,
"prettifyLogMessage": true
},
"targets": [
{
"refId": "A",
"expr": "{job=~\"$job\", instance=~\"$instance\"} |~ \"(?i)(critical|error|err|panic|oom)\""
}
]
}
]
}

View File

@ -0,0 +1,7 @@
apiVersion: 1
folders:
- uid: ops-folder
title: "Ops Infrastructure & Plateforme"
- uid: dev-folder
title: "Dev Application & Qualité"

View File

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: "dev-dashboards"
orgId: 1
folderUid: "dev-folder"
type: file
disableDeletion: false
editable: true
updateIntervalSeconds: 30
options:
path: /var/lib/grafana/dashboards/dev
foldersFromFilesStructure: true

View File

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: "ops-dashboards"
orgId: 1
folderUid: "ops-folder"
type: file
disableDeletion: false
editable: true
updateIntervalSeconds: 30
options:
path: /var/lib/grafana/dashboards/ops
foldersFromFilesStructure: true

View File

@ -0,0 +1,8 @@
apiVersion: 1
datasources:
- name: prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
isDefault: true
editable: true