diff --git a/centre_observabilite/docker-compose.yml b/centre_observabilite/docker-compose.yml new file mode 100644 index 0000000..da5412e --- /dev/null +++ b/centre_observabilite/docker-compose.yml @@ -0,0 +1,82 @@ +services: + prometheus: + image: prom/prometheus:latest + container_name: prometheus-observability + volumes: + - ./prometheus.yml:/etc/prometheus/prometheus.yml + command: + - '--config.file=/etc/prometheus/prometheus.yml' + ports: + - "9090:9090" + networks: + - observability + grafana: + image: grafana/grafana:latest + container_name: grafana-observability + ports: + - "3000:3000" + volumes: + - grafana-data:/var/lib/grafana + - ./observability/grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro + - ./observability/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro + - ./observability/grafana/dashboards:/var/lib/grafana/dashboards:ro + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=admin + - GF_SMTP_ENABLED=true + - GF_SMTP_HOST=smtp.gmail.com:587 + - GF_SMTP_USER=kimraumilliardaire@gmail.com + - GF_SMTP_PASSWORD=vmfc xrtt yvvm gylz + - GF_SMTP_FROM_ADDRESS=kimraumilliardaire@gmail.com + - GF_SMTP_FROM_NAME=Grafana Alerts + - GF_SMTP_SKIP_VERIFY=true # utile si problème de certifs côté conteneur + # volumes, depends_on, etc. + networks: + - observability + depends_on: + - loki + loki: + image: grafana/loki:2.8.2 + container_name: loki-observability + ports: + - "3100:3100" + command: -config.file=/etc/loki/local-config.yaml + volumes: + - ./loki-config.yaml:/etc/loki/local-config.yaml + - ./loki-wal:/wal + - ./loki-chunks:/loki/chunks + - ./loki-index:/loki/index + networks: + - observability + promtail: + image: grafana/promtail:2.8.2 + container_name: promtail-observability + volumes: + - ./promtail-config.yaml:/etc/promtail/config.yaml + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/log:/var/log:ro + command: + - -config.file=/etc/promtail/config.yaml + #depends_on: + # - loki + networks: + - observability + node_exporter: + image: prom/node-exporter:latest + container_name: node-exporter + restart: unless-stopped + pid: "host" + network_mode: "host" + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + command: + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - '--collector.filesystem.ignored-mount-points="^/(sys|proc|dev|host|etc)($$|/)"' +volumes: + grafana-data: +networks: + observability: + driver: bridge diff --git a/centre_observabilite/loki-chunks/loki_cluster_seed.json b/centre_observabilite/loki-chunks/loki_cluster_seed.json new file mode 100644 index 0000000..09fa3af --- /dev/null +++ b/centre_observabilite/loki-chunks/loki_cluster_seed.json @@ -0,0 +1 @@ +{"UID":"714e0dc1-bca9-44e1-aca6-110f8b49de5c","created_at":"2025-09-29T13:46:47.834317171Z","version":{"version":"2.8.2","revision":"9f809eda7","branch":"HEAD","buildUser":"root@e401cfcb874f","buildDate":"2023-05-03T11:07:54Z","goVersion":"go1.20.4"}} \ No newline at end of file diff --git a/centre_observabilite/loki-config.yaml b/centre_observabilite/loki-config.yaml new file mode 100644 index 0000000..63d45cb --- /dev/null +++ b/centre_observabilite/loki-config.yaml @@ -0,0 +1,59 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + +ingester: + wal: + enabled: true + dir: /wal + flush_on_shutdown: true + chunk_idle_period: 5m + chunk_retain_period: 30s + max_chunk_age: 1h + lifecycler: + ring: + replication_factor: 1 + +schema_config: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + +storage_config: + boltdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/index + shared_store: filesystem + filesystem: + directory: /loki/chunks + +limits_config: + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h + ingestion_rate_mb: 10 + ingestion_burst_size_mb: 20 + +compactor: + working_directory: /loki/compactor + shared_store: filesystem + compaction_interval: 10m \ No newline at end of file diff --git a/centre_observabilite/loki-index/loki_cluster_seed.json b/centre_observabilite/loki-index/loki_cluster_seed.json new file mode 100644 index 0000000..09fa3af --- /dev/null +++ b/centre_observabilite/loki-index/loki_cluster_seed.json @@ -0,0 +1 @@ +{"UID":"714e0dc1-bca9-44e1-aca6-110f8b49de5c","created_at":"2025-09-29T13:46:47.834317171Z","version":{"version":"2.8.2","revision":"9f809eda7","branch":"HEAD","buildUser":"root@e401cfcb874f","buildDate":"2023-05-03T11:07:54Z","goVersion":"go1.20.4"}} \ No newline at end of file diff --git a/centre_observabilite/loki-wal/00000004 b/centre_observabilite/loki-wal/00000004 new file mode 100644 index 0000000..e69de29 diff --git a/centre_observabilite/loki-wal/checkpoint.000003/00000000 b/centre_observabilite/loki-wal/checkpoint.000003/00000000 new file mode 100644 index 0000000..e69de29 diff --git a/centre_observabilite/observability/grafana/dashboards/dev/00_app_runtime_overview.json b/centre_observabilite/observability/grafana/dashboards/dev/00_app_runtime_overview.json new file mode 100644 index 0000000..11c78fd --- /dev/null +++ b/centre_observabilite/observability/grafana/dashboards/dev/00_app_runtime_overview.json @@ -0,0 +1,40 @@ +{ + "title": "App Runtime Overview (Dev)", + "uid": "dev-app-runtime-overview", + "tags": ["dev","application"], + "time": { "from": "now-1h", "to": "now" }, + "schemaVersion": 42, + "panels": [ + { + "type": "timeseries", + "title": "Requests per Second (RPS)", + "gridPos": {"x":0,"y":0,"w":12,"h":8}, + "datasource": "prometheus", + "targets": [ + { "refId": "A", "expr": "sum(rate(http_requests_total[2m]))" } + ] + }, + { + "type": "timeseries", + "title": "Error Rate (%)", + "gridPos": {"x":12,"y":0,"w":12,"h":8}, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "percent" }, "overrides": [] }, + "targets": [ + { "refId": "A", "expr": "sum(rate(http_requests_total{status=~\"5..\"}[2m])) / sum(rate(http_requests_total[2m])) * 100" } + ] + }, + { + "type": "timeseries", + "title": "Latency P95 (s)", + "gridPos": {"x":0,"y":8,"w":24,"h":8}, + "datasource": "prometheus", + "targets": [ + { + "refId": "A", + "expr": "histogram_quantile(0.95, sum by (le) (rate(http_request_duration_seconds_bucket[2m])))" + } + ] + } + ] +} diff --git a/centre_observabilite/observability/grafana/dashboards/dev/01_logs_by_service.json b/centre_observabilite/observability/grafana/dashboards/dev/01_logs_by_service.json new file mode 100644 index 0000000..ce2f50c --- /dev/null +++ b/centre_observabilite/observability/grafana/dashboards/dev/01_logs_by_service.json @@ -0,0 +1,58 @@ +{ + "title": "Logs by Service (Dev)", + "uid": "dev-logs-by-service", + "tags": ["dev","logs","loki"], + "time": { "from": "now-1h", "to": "now" }, + "schemaVersion": 42, + "templating": { + "list": [ + { + "name": "service", + "label": "Service (label app)", + "type": "query", + "datasource": "loki", + "query": "label_values({app!=\"\"}, app)", + "includeAll": true, + "multi": true, + "refresh": 2, + "current": {} + } + ] + }, + "panels": [ + { + "type": "logs", + "title": "Logs – $service", + "gridPos": { "x": 0, "y": 0, "w": 24, "h": 12 }, + "datasource": "loki", + "options": { + "showLabels": true, + "showTime": true, + "wrapLogMessage": true, + "prettifyLogMessage": true + }, + "targets": [ + { + "refId": "A", + "expr": "{app=~\"$service\"}" + } + ] + }, + { + "type": "timeseries", + "title": "Error rate (logs/min) – $service", + "gridPos": { "x": 0, "y": 12, "w": 24, "h": 8 }, + "datasource": "loki", + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "options": { + "legend": { "showLegend": true, "placement": "bottom" } + }, + "targets": [ + { + "refId": "A", + "expr": "sum by (app)(rate({app=~\"$service\"} |~ \"(?i)(error|exception|fail|timeout)\"[5m])) * 60" + } + ] + } + ] +} diff --git a/centre_observabilite/observability/grafana/dashboards/ops/00_infra_overview.json b/centre_observabilite/observability/grafana/dashboards/ops/00_infra_overview.json new file mode 100644 index 0000000..018caaf --- /dev/null +++ b/centre_observabilite/observability/grafana/dashboards/ops/00_infra_overview.json @@ -0,0 +1,49 @@ +{ + "title": "Infra Overview (Ops)", + "uid": "ops-infra-overview", + "tags": ["ops","infrastructure"], + "time": { "from": "now-1h", "to": "now" }, + "schemaVersion": 42, + "panels": [ + { + "type": "timeseries", + "title": "CPU Utilization (%)", + "gridPos": {"x":0,"y":0,"w":12,"h":8}, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "percent" }, "overrides": [] }, + "targets": [ + { "refId": "A", "expr": "100 - (avg by(instance)(rate(node_cpu_seconds_total{mode=\"idle\"}[2m]))*100)" } + ] + }, + { + "type": "timeseries", + "title": "Memory Utilization (%)", + "gridPos": {"x":12,"y":0,"w":12,"h":8}, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "percent" }, "overrides": [] }, + "targets": [ + { "refId": "A", "expr": "((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes) * 100" } + ] + }, + { + "type": "timeseries", + "title": "Disk Space Used (bytes)", + "gridPos": {"x":0,"y":8,"w":12,"h":8}, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "bytes" }, "overrides": [] }, + "targets": [ + { "refId": "A", "expr": "node_filesystem_size_bytes{fstype!~\"tmpfs|overlay\"} - node_filesystem_avail_bytes{fstype!~\"tmpfs|overlay\"}" } + ] + }, + { + "type": "timeseries", + "title": "Host Power (Watts) - Scaphandre", + "gridPos": {"x":12,"y":8,"w":12,"h":8}, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "watt" }, "overrides": [] }, + "targets": [ + { "refId": "A", "expr": "scaph_host_power_microwatts / 1e6" } + ] + } + ] +} diff --git a/centre_observabilite/observability/grafana/dashboards/ops/03_prometheus_health.json b/centre_observabilite/observability/grafana/dashboards/ops/03_prometheus_health.json new file mode 100644 index 0000000..4299647 --- /dev/null +++ b/centre_observabilite/observability/grafana/dashboards/ops/03_prometheus_health.json @@ -0,0 +1,86 @@ +{ + "title": "Prometheus Health (Ops)", + "uid": "ops-prom-health", + "tags": ["ops","prometheus","health"], + "time": { "from": "now-1h", "to": "now" }, + "schemaVersion": 42, + "panels": [ + { + "type": "stat", + "title": "Targets DOWN (total)", + "gridPos": { "x": 0, "y": 0, "w": 6, "h": 4 }, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "none" }, "overrides": [] }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "" } }, + "targets": [ + { "refId": "A", "expr": "sum(1 - up)" } + ] + }, + { + "type": "stat", + "title": "Alerts firing", + "gridPos": { "x": 6, "y": 0, "w": 6, "h": 4 }, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "none" }, "overrides": [] }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "" } }, + "targets": [ + { "refId": "A", "expr": "count(ALERTS{alertstate=\"firing\"})" } + ] + }, + { + "type": "stat", + "title": "Ingest rate (samples/s)", + "gridPos": { "x": 12, "y": 0, "w": 12, "h": 4 }, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"] } }, + "targets": [ + { "refId": "A", "expr": "rate(prometheus_tsdb_head_samples_appended_total[5m])" } + ] + }, + { + "type": "timeseries", + "title": "Targets UP by job", + "gridPos": { "x": 0, "y": 4, "w": 12, "h": 8 }, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "none" }, "overrides": [] }, + "options": { "legend": { "showLegend": true, "placement": "bottom" } }, + "targets": [ + { "refId": "A", "expr": "sum by(job)(up)" } + ] + }, + { + "type": "timeseries", + "title": "Scrape duration (s) by job", + "gridPos": { "x": 12, "y": 4, "w": 12, "h": 8 }, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] }, + "options": { "legend": { "showLegend": true, "placement": "bottom" } }, + "targets": [ + { "refId": "A", "expr": "avg by(job)(scrape_duration_seconds)" } + ] + }, + { + "type": "timeseries", + "title": "Rule group eval duration (s)", + "gridPos": { "x": 0, "y": 12, "w": 12, "h": 8 }, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] }, + "options": { "legend": { "showLegend": true, "placement": "bottom" } }, + "targets": [ + { "refId": "A", "expr": "max by(rule_group) (prometheus_rule_group_last_duration_seconds)" } + ] + }, + { + "type": "timeseries", + "title": "TSDB head chunks", + "gridPos": { "x": 12, "y": 12, "w": 12, "h": 8 }, + "datasource": "prometheus", + "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] }, + "options": { "legend": { "showLegend": true, "placement": "bottom" } }, + "targets": [ + { "refId": "A", "expr": "prometheus_tsdb_head_chunks" } + ] + } + ] +} diff --git a/centre_observabilite/observability/grafana/dashboards/ops/04_logs_loki_ops.json b/centre_observabilite/observability/grafana/dashboards/ops/04_logs_loki_ops.json new file mode 100644 index 0000000..41dc7d4 --- /dev/null +++ b/centre_observabilite/observability/grafana/dashboards/ops/04_logs_loki_ops.json @@ -0,0 +1,65 @@ +{ + "title": "Logs – Ops (Errors & System)", + "uid": "ops-logs-errors", + "tags": ["ops","logs","loki"], + "time": { "from": "now-2h", "to": "now" }, + "schemaVersion": 42, + "templating": { + "list": [ + { + "name": "job", + "label": "Job", + "type": "query", + "datasource": "loki", + "query": "label_values(job)", + "includeAll": true, + "multi": true, + "refresh": 2 + }, + { + "name": "instance", + "label": "Instance", + "type": "query", + "datasource": "loki", + "query": "label_values({job=~\"$job\"}, instance)", + "includeAll": true, + "multi": true, + "refresh": 2 + } + ] + }, + "panels": [ + { + "type": "timeseries", + "title": "Error rate by instance (logs/min)", + "gridPos": { "x": 0, "y": 0, "w": 24, "h": 8 }, + "datasource": "loki", + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "options": { "legend": { "showLegend": true, "placement": "bottom" } }, + "targets": [ + { + "refId": "A", + "expr": "sum by (instance)(rate({job=~\"$job\", instance=~\"$instance\"} |~ \"(?i)(critical|error|err)\"[5m])) * 60" + } + ] + }, + { + "type": "logs", + "title": "Recent critical & errors – $job / $instance", + "gridPos": { "x": 0, "y": 8, "w": 24, "h": 14 }, + "datasource": "loki", + "options": { + "showLabels": true, + "showTime": true, + "wrapLogMessage": true, + "prettifyLogMessage": true + }, + "targets": [ + { + "refId": "A", + "expr": "{job=~\"$job\", instance=~\"$instance\"} |~ \"(?i)(critical|error|err|panic|oom)\"" + } + ] + } + ] +} diff --git a/centre_observabilite/observability/grafana/provisioning/dashboards/folders.yaml b/centre_observabilite/observability/grafana/provisioning/dashboards/folders.yaml new file mode 100644 index 0000000..4901783 --- /dev/null +++ b/centre_observabilite/observability/grafana/provisioning/dashboards/folders.yaml @@ -0,0 +1,7 @@ +apiVersion: 1 + +folders: + - uid: ops-folder + title: "Ops – Infrastructure & Plateforme" + - uid: dev-folder + title: "Dev – Application & Qualité" diff --git a/centre_observabilite/observability/grafana/provisioning/dashboards/provider-dev.yaml b/centre_observabilite/observability/grafana/provisioning/dashboards/provider-dev.yaml new file mode 100644 index 0000000..a997263 --- /dev/null +++ b/centre_observabilite/observability/grafana/provisioning/dashboards/provider-dev.yaml @@ -0,0 +1,12 @@ +apiVersion: 1 +providers: + - name: "dev-dashboards" + orgId: 1 + folderUid: "dev-folder" + type: file + disableDeletion: false + editable: true + updateIntervalSeconds: 30 + options: + path: /var/lib/grafana/dashboards/dev + foldersFromFilesStructure: true diff --git a/centre_observabilite/observability/grafana/provisioning/dashboards/provider-ops.yaml b/centre_observabilite/observability/grafana/provisioning/dashboards/provider-ops.yaml new file mode 100644 index 0000000..091b380 --- /dev/null +++ b/centre_observabilite/observability/grafana/provisioning/dashboards/provider-ops.yaml @@ -0,0 +1,12 @@ +apiVersion: 1 +providers: + - name: "ops-dashboards" + orgId: 1 + folderUid: "ops-folder" + type: file + disableDeletion: false + editable: true + updateIntervalSeconds: 30 + options: + path: /var/lib/grafana/dashboards/ops + foldersFromFilesStructure: true diff --git a/centre_observabilite/observability/grafana/provisioning/datasources/prometheus.yaml b/centre_observabilite/observability/grafana/provisioning/datasources/prometheus.yaml new file mode 100644 index 0000000..3c25da3 --- /dev/null +++ b/centre_observabilite/observability/grafana/provisioning/datasources/prometheus.yaml @@ -0,0 +1,8 @@ +apiVersion: 1 +datasources: + - name: prometheus + type: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: true diff --git a/centre_observabilite/prometheus.yml b/centre_observabilite/prometheus.yml new file mode 100644 index 0000000..b86e1c5 --- /dev/null +++ b/centre_observabilite/prometheus.yml @@ -0,0 +1,30 @@ +global: + scrape_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'observabilite' + static_configs: + - targets: ['192.168.4.4:9100'] #ip de la machine observabilite + + - job_name: 'scaphandre' + static_configs: + - targets: ['192.168.4.4:8080'] + fallback_scrape_protocol: "PrometheusText1.0.0" + + - job_name: 'apache_vmservices' + static_configs: + - targets: ['192.168.56.17:9117'] #ip vmService + + - job_name: 'vms' + static_configs: + - targets: ['192.168.56.18:9100'] #ip vmHardware + - targets: ['192.168.56.17:9100'] #ip vmServices + - targets: ['192.168.56.15:9100'] #ip vmApplicatifs + + - job_name: 'tomcat' + static_configs: + - targets: ['192.168.56.15:9082'] #ip vmApplicatif diff --git a/centre_observabilite/promtail-config.yaml b/centre_observabilite/promtail-config.yaml new file mode 100644 index 0000000..f8cc655 --- /dev/null +++ b/centre_observabilite/promtail-config.yaml @@ -0,0 +1,26 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker_logs + static_configs: + - targets: + - localhost + labels: + job: docker + __path__: /var/lib/docker/containers/*/*.log + + - job_name: system_logs + static_configs: + - targets: + - localhost + labels: + job: syslog + __path__: /var/log/*.log \ No newline at end of file diff --git a/note/commande_docker.md b/note/commande_docker.md new file mode 100644 index 0000000..a0ae365 --- /dev/null +++ b/note/commande_docker.md @@ -0,0 +1,27 @@ +# methode 1 +# Arrêter tous les containers +docker stop $(docker ps -aq) + +# Supprimer tous les containers +docker rm -f $(docker ps -aq) + +# Supprimer toutes les images +docker rmi -f $(docker images -q) + +# Supprimer tous les volumes +docker volume rm $(docker volume ls -q) + +# Supprimer tous les réseaux personnalisés +docker network rm $(docker network ls -q | grep -v "bridge\|host\|none") + +# verifier que tt est propre +docker system df + + + +# methode 2 +docker system prune -a --volumes --force + +# Pour construire + +docker compose up --build \ No newline at end of file