diff --git a/apps/severed-blog-config.yaml b/apps/severed-blog-config.yaml index 1faf3d1..b1aeecf 100644 --- a/apps/severed-blog-config.yaml +++ b/apps/severed-blog-config.yaml @@ -44,4 +44,13 @@ data: # logging / lb config real_ip_header X-Forwarded-For; set_real_ip_from 10.0.0.0/8; + + # metrics endpoint for Alloy/Prometheus + location /metrics { + stub_status on; + access_log off; + allow 127.0.0.1; + allow 10.0.0.0/8; # Allow internal cluster pods + deny all; + } } diff --git a/apps/severed-blog-hpa.yaml b/apps/severed-blog-hpa.yaml new file mode 100644 index 0000000..97d2689 --- /dev/null +++ b/apps/severed-blog-hpa.yaml @@ -0,0 +1,41 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: severed-blog-hpa + namespace: severed-apps +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: severed-blog + minReplicas: 2 # Never drop below 2 for HA + maxReplicas: 6 # Maximum number of pods to prevent cluster exhaustion + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 # Scale up if CPU Usage exceeds 70% + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: 80 # Scale up if RAM Usage exceeds 80% + - type: Pods + pods: + metric: + name: http_requests_per_second + target: + type: AverageValue + averageValue: 10 # Scale up if requests > 10 per second per pod + behavior: + scaleDown: + stabilizationWindowSeconds: 300 # Wait 5 minutes before removing a pod + policies: + - type: Percent + value: 100 + periodSeconds: 15 + scaleUp: + stabilizationWindowSeconds: 0 # Scale up immediately when busy diff --git a/apps/severed-blog.yaml b/apps/severed-blog.yaml index a25dd6f..caeb168 100644 --- a/apps/severed-blog.yaml +++ b/apps/severed-blog.yaml @@ -19,12 +19,17 @@ spec: imagePullPolicy: Never ports: - containerPort: 80 - + resources: + requests: + cpu: "50m" # 0.05 cores (Guaranteed to the pod) + memory: "64Mi" # 64 Megabytes + limits: + cpu: "200m" # Max allowed (Prevents one pod from eating the Mac's CPU) + memory: "128Mi" # Max allowed volumeMounts: - name: nginx-config-vol mountPath: /etc/nginx/conf.d/default.conf subPath: default.conf - volumes: - name: nginx-config-vol configMap: diff --git a/infra/alloy-setup.yaml b/infra/alloy-setup.yaml index 9acb786..f3e65ab 100644 --- a/infra/alloy-setup.yaml +++ b/infra/alloy-setup.yaml @@ -47,34 +47,13 @@ metadata: namespace: monitoring data: config.alloy: | - // 1. discovery (Shared by Logs and Metrics) + // 1. discovery discovery.kubernetes "k8s_pods" { role = "pod" } - - // 2. metrics pipeline - // A. read host hardware stats (CPU/RAM) - prometheus.exporter.unix "host" { - rootfs_path = "/host/root" - sysfs_path = "/host/sys" - procfs_path = "/host/proc" - } - // B. scrape those stats - prometheus.scrape "host_scraper" { - targets = prometheus.exporter.unix.host.targets - forward_to = [prometheus.remote_write.metrics_service.receiver] - } - - // C. send to Prometheus - prometheus.remote_write "metrics_service" { - endpoint { - url = sys.env("PROM_URL") - } - } - - // 3. logs pipeline (With Relabeling Fix) - // A. relabeling: Promote hidden K8s tags to real labels + // 2. Relabeling (MUST BE DEFINED BEFORE USE) + // This adds 'app', 'namespace', and 'pod' labels to the targets discovery.relabel "k8s_labels" { targets = discovery.kubernetes.k8s_pods.targets @@ -83,38 +62,65 @@ data: source_labels = ["__meta_kubernetes_pod_label_app"] target_label = "app" } - rule { action = "replace" source_labels = ["__meta_kubernetes_namespace"] target_label = "namespace" } - rule { action = "replace" source_labels = ["__meta_kubernetes_pod_name"] target_label = "pod" } - + } + + // 3. Filter for Blog Metrics + // Only pass targets where the label 'app' is 'severed-blog' + discovery.relabel "blog_filter" { + targets = discovery.relabel.k8s_labels.output rule { - action = "replace" - source_labels = ["__meta_kubernetes_pod_container_name"] - target_label = "container" + source_labels = ["app"] + regex = "severed-blog" + action = "keep" } } - // B. tail logs: using the relabeled targets + // 4. Metrics Pipeline + prometheus.scrape "nginx_scraper" { + targets = discovery.relabel.blog_filter.output + forward_to = [prometheus.remote_write.metrics_service.receiver] + job_name = "integrations/nginx" + } + + prometheus.exporter.unix "host" { + rootfs_path = "/host/root" + sysfs_path = "/host/sys" + procfs_path = "/host/proc" + } + + prometheus.scrape "host_scraper" { + targets = prometheus.exporter.unix.host.targets + forward_to = [prometheus.remote_write.metrics_service.receiver] + } + + prometheus.remote_write "metrics_service" { + endpoint { + url = sys.env("PROM_URL") + } + } + + // 5. Logs Pipeline loki.source.kubernetes "pod_logs" { targets = discovery.relabel.k8s_labels.output forward_to = [loki.write.default.receiver] } - // C. send to Loki loki.write "default" { endpoint { url = sys.env("LOKI_URL") } } + --- # --- Agent Deployment (DaemonSet) --- # deploys one alloy agent per node to monitor the entire cluster. diff --git a/infra/observer/adapter-values.yaml b/infra/observer/adapter-values.yaml new file mode 100644 index 0000000..5edb5c1 --- /dev/null +++ b/infra/observer/adapter-values.yaml @@ -0,0 +1,16 @@ +prometheus: + url: http://prometheus.monitoring.svc.cluster.local + port: 9090 + +rules: + default: true + custom: + - seriesQuery: '{__name__=~"nginx_status_requests",namespace!="",pod!=""}' + resources: + overrides: + namespace: {resource: "namespace"} + pod: {resource: "pod"} + name: + matches: "^nginx_status_requests" + as: "http_requests_per_second" + metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)' diff --git a/infra/observer/prometheus.yaml b/infra/observer/prometheus.yaml index 14e4660..abc3500 100644 --- a/infra/observer/prometheus.yaml +++ b/infra/observer/prometheus.yaml @@ -1,4 +1,4 @@ -# --- Configuration --- +# Configuration apiVersion: v1 kind: ConfigMap metadata: @@ -14,7 +14,7 @@ data: out_of_order_time_window: 1m --- -# --- Service --- +# Service apiVersion: v1 kind: Service metadata: @@ -29,7 +29,7 @@ spec: targetPort: 9090 --- -# --- The Database (StatefulSet) --- +# The Database (StatefulSet) apiVersion: apps/v1 kind: StatefulSet metadata: