added HPA (cpu, ram, traffic)

2025-12-28 20:19:12 -05:00
parent 40a463bfc4
commit ca3c590bdd
6 changed files with 114 additions and 37 deletions
--- a/apps/severed-blog-config.yaml
+++ b/apps/severed-blog-config.yaml
@@ -44,4 +44,13 @@ data:
      # logging / lb config
      real_ip_header   X-Forwarded-For;
      set_real_ip_from 10.0.0.0/8;
+    
+      # metrics endpoint for Alloy/Prometheus
+      location /metrics {
+        stub_status on;
+        access_log off;
+        allow 127.0.0.1;
+        allow 10.0.0.0/8; # Allow internal cluster pods
+        deny all;
+      }
    }
--- a/apps/severed-blog-hpa.yaml
+++ b/apps/severed-blog-hpa.yaml
@@ -0,0 +1,41 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: severed-blog-hpa
+  namespace: severed-apps
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: severed-blog
+  minReplicas: 2  # Never drop below 2 for HA
+  maxReplicas: 6  # Maximum number of pods to prevent cluster exhaustion
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: 70      # Scale up if CPU Usage exceeds 70%
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: 80      # Scale up if RAM Usage exceeds 80%
+    - type: Pods
+      pods:
+        metric:
+          name: http_requests_per_second
+        target:
+          type: AverageValue
+          averageValue: 10            # Scale up if requests > 10 per second per pod
+  behavior:
+    scaleDown:
+      stabilizationWindowSeconds: 300 # Wait 5 minutes before removing a pod
+      policies:
+        - type: Percent
+          value: 100
+          periodSeconds: 15
+    scaleUp:
+      stabilizationWindowSeconds: 0   # Scale up immediately when busy
--- a/apps/severed-blog.yaml
+++ b/apps/severed-blog.yaml
@@ -19,12 +19,17 @@ spec:
          imagePullPolicy: Never
          ports:
            - containerPort: 80
-          
+          resources:
+            requests:
+              cpu: "50m"      # 0.05 cores (Guaranteed to the pod)
+              memory: "64Mi"  # 64 Megabytes
+            limits:
+              cpu: "200m"     # Max allowed (Prevents one pod from eating the Mac's CPU)
+              memory: "128Mi" # Max allowed
          volumeMounts:
            - name: nginx-config-vol
              mountPath: /etc/nginx/conf.d/default.conf
              subPath: default.conf
-      
      volumes:
        - name: nginx-config-vol
          configMap:
--- a/infra/alloy-setup.yaml
+++ b/infra/alloy-setup.yaml
@@ -47,34 +47,13 @@ metadata:
  namespace: monitoring
 data:
  config.alloy: |
-    // 1. discovery (Shared by Logs and Metrics)
+    // 1. discovery
    discovery.kubernetes "k8s_pods" {
      role = "pod"
    }
-    
-    // 2. metrics pipeline
-    // A. read host hardware stats (CPU/RAM)
-    prometheus.exporter.unix "host" {
-      rootfs_path = "/host/root"
-      sysfs_path  = "/host/sys"
-      procfs_path = "/host/proc"
-    }

-    // B. scrape those stats
-    prometheus.scrape "host_scraper" {
-      targets    = prometheus.exporter.unix.host.targets
-      forward_to = [prometheus.remote_write.metrics_service.receiver]
-    }
-
-    // C. send to Prometheus
-    prometheus.remote_write "metrics_service" {
-      endpoint {
-        url = sys.env("PROM_URL")
-      }
-    }
-
-    // 3. logs pipeline (With Relabeling Fix)
-    // A. relabeling: Promote hidden K8s tags to real labels
+    // 2. Relabeling (MUST BE DEFINED BEFORE USE)
+    // This adds 'app', 'namespace', and 'pod' labels to the targets
    discovery.relabel "k8s_labels" {
      targets = discovery.kubernetes.k8s_pods.targets

@@ -83,38 +62,65 @@ data:
        source_labels = ["__meta_kubernetes_pod_label_app"]
        target_label  = "app"
      }
-    
      rule {
        action        = "replace"
        source_labels = ["__meta_kubernetes_namespace"]
        target_label  = "namespace"
      }
-    
      rule {
        action        = "replace"
        source_labels = ["__meta_kubernetes_pod_name"]
        target_label  = "pod"
      }
-    
+    }
+
+    // 3. Filter for Blog Metrics
+    // Only pass targets where the label 'app' is 'severed-blog'
+    discovery.relabel "blog_filter" {
+      targets = discovery.relabel.k8s_labels.output
      rule {
-        action        = "replace"
-        source_labels = ["__meta_kubernetes_pod_container_name"]
-        target_label  = "container"
+        source_labels = ["app"]
+        regex         = "severed-blog"
+        action        = "keep"
      }
    }

-    // B. tail logs: using the relabeled targets
+    // 4. Metrics Pipeline
+    prometheus.scrape "nginx_scraper" {
+      targets    = discovery.relabel.blog_filter.output
+      forward_to = [prometheus.remote_write.metrics_service.receiver]
+      job_name   = "integrations/nginx"
+    }
+
+    prometheus.exporter.unix "host" {
+      rootfs_path = "/host/root"
+      sysfs_path  = "/host/sys"
+      procfs_path = "/host/proc"
+    }
+
+    prometheus.scrape "host_scraper" {
+      targets    = prometheus.exporter.unix.host.targets
+      forward_to = [prometheus.remote_write.metrics_service.receiver]
+    }
+
+    prometheus.remote_write "metrics_service" {
+      endpoint {
+        url = sys.env("PROM_URL")
+      }
+    }
+
+    // 5. Logs Pipeline
    loki.source.kubernetes "pod_logs" {
      targets    = discovery.relabel.k8s_labels.output
      forward_to = [loki.write.default.receiver]
    }

-    // C. send to Loki
    loki.write "default" {
      endpoint {
        url = sys.env("LOKI_URL")
      }
    }
+
 ---
 # --- Agent Deployment (DaemonSet) ---
 # deploys one alloy agent per node to monitor the entire cluster.
--- a/infra/observer/adapter-values.yaml
+++ b/infra/observer/adapter-values.yaml
@@ -0,0 +1,16 @@
+prometheus:
+  url: http://prometheus.monitoring.svc.cluster.local
+  port: 9090
+
+rules:
+  default: true
+  custom:
+    - seriesQuery: '{__name__=~"nginx_status_requests",namespace!="",pod!=""}'
+      resources:
+        overrides:
+          namespace: {resource: "namespace"}
+          pod: {resource: "pod"}
+      name:
+        matches: "^nginx_status_requests"
+        as: "http_requests_per_second"
+      metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
--- a/infra/observer/prometheus.yaml
+++ b/infra/observer/prometheus.yaml
@@ -1,4 +1,4 @@
-# --- Configuration ---
+# Configuration
 apiVersion: v1
 kind: ConfigMap
 metadata:
@@ -14,7 +14,7 @@ data:
        out_of_order_time_window: 1m

 ---
-# --- Service ---
+# Service
 apiVersion: v1
 kind: Service
 metadata:
@@ -29,7 +29,7 @@ spec:
      targetPort: 9090

 ---
-# --- The Database (StatefulSet) ---
+# The Database (StatefulSet)
 apiVersion: apps/v1
 kind: StatefulSet
 metadata: