From f0738d7d7b3c2897b8f1999b1c50851a361b5309 Mon Sep 17 00:00:00 2001 From: wboughattas Date: Sun, 28 Dec 2025 23:37:34 -0500 Subject: [PATCH] fixed nginx_http_requests_total, added sh scripts --- apps/severed-blog-config.yaml | 1 + apps/severed-blog-hpa.yaml | 2 +- apps/severed-blog.yaml | 26 +++++++-- infra/alloy-setup.yaml | 65 +++++++-------------- namespaces.yaml | 8 ++- scripts/README.md | 102 +++++++++++++++++++++++++++++++++ scripts/access-hub.sh | 24 ++++++++ scripts/deploy-all.sh | 50 ++++++++++++++++ scripts/setup-grafana-creds.sh | 15 +++++ 9 files changed, 242 insertions(+), 51 deletions(-) create mode 100644 scripts/README.md create mode 100644 scripts/access-hub.sh create mode 100644 scripts/deploy-all.sh create mode 100644 scripts/setup-grafana-creds.sh diff --git a/apps/severed-blog-config.yaml b/apps/severed-blog-config.yaml index b1aeecf..663a2bc 100644 --- a/apps/severed-blog-config.yaml +++ b/apps/severed-blog-config.yaml @@ -51,6 +51,7 @@ data: access_log off; allow 127.0.0.1; allow 10.0.0.0/8; # Allow internal cluster pods + allow 172.16.0.0/12; # Allow K3d/Docker internal bridge network deny all; } } diff --git a/apps/severed-blog-hpa.yaml b/apps/severed-blog-hpa.yaml index 97d2689..7b3f36e 100644 --- a/apps/severed-blog-hpa.yaml +++ b/apps/severed-blog-hpa.yaml @@ -26,7 +26,7 @@ spec: - type: Pods pods: metric: - name: http_requests_per_second + name: nginx_http_requests_total target: type: AverageValue averageValue: 10 # Scale up if requests > 10 per second per pod diff --git a/apps/severed-blog.yaml b/apps/severed-blog.yaml index caeb168..92e16c0 100644 --- a/apps/severed-blog.yaml +++ b/apps/severed-blog.yaml @@ -21,15 +21,33 @@ spec: - containerPort: 80 resources: requests: - cpu: "50m" # 0.05 cores (Guaranteed to the pod) - memory: "64Mi" # 64 Megabytes + cpu: "50m" + memory: "64Mi" limits: - cpu: "200m" # Max allowed (Prevents one pod from eating the Mac's CPU) - memory: "128Mi" # Max allowed + cpu: "200m" + memory: "128Mi" volumeMounts: - name: nginx-config-vol mountPath: /etc/nginx/conf.d/default.conf subPath: default.conf + + # --- ADD THE EXPORTER SIDECAR HERE --- + - name: exporter + image: nginx/nginx-prometheus-exporter:latest + args: + - -nginx.scrape-uri=http://localhost:80/metrics + ports: + - containerPort: 9113 + name: metrics + resources: + requests: + cpu: "10m" + memory: "32Mi" + limits: + cpu: "50m" + memory: "64Mi" + # ------------------------------------- + volumes: - name: nginx-config-vol configMap: diff --git a/infra/alloy-setup.yaml b/infra/alloy-setup.yaml index f3e65ab..75b1ae6 100644 --- a/infra/alloy-setup.yaml +++ b/infra/alloy-setup.yaml @@ -1,6 +1,4 @@ # --- RBAC configuration --- -# creates a serviceaccount with permissions to discover pods and read logs. - apiVersion: v1 kind: ServiceAccount metadata: @@ -13,11 +11,9 @@ kind: ClusterRole metadata: name: alloy-cluster-role rules: - # discovery permissions: allows alloy to find targets: Nodes, Pods, Services. - apiGroups: [ "" ] resources: [ "nodes", "nodes/proxy", "services", "endpoints", "pods" ] verbs: [ "get", "list", "watch" ] - # log access: required for 'loki.source.kubernetes' to tail logs. - apiGroups: [ "" ] resources: [ "pods/log" ] verbs: [ "get", "list", "watch" ] @@ -38,8 +34,6 @@ subjects: --- # --- Alloy pipeline configuration --- -# defines how telemetry data is collected, processed, and exported. - apiVersion: v1 kind: ConfigMap metadata: @@ -47,51 +41,39 @@ metadata: namespace: monitoring data: config.alloy: | - // 1. discovery + // 1. Discovery: Find all pods discovery.kubernetes "k8s_pods" { role = "pod" } - // 2. Relabeling (MUST BE DEFINED BEFORE USE) - // This adds 'app', 'namespace', and 'pod' labels to the targets - discovery.relabel "k8s_labels" { + // 2. Relabeling: Filter for ONLY the blog pods + discovery.relabel "blog_pods" { targets = discovery.kubernetes.k8s_pods.targets - + rule { - action = "replace" + action = "keep" source_labels = ["__meta_kubernetes_pod_label_app"] - target_label = "app" + regex = "severed-blog" } + rule { - action = "replace" - source_labels = ["__meta_kubernetes_namespace"] - target_label = "namespace" - } - rule { - action = "replace" - source_labels = ["__meta_kubernetes_pod_name"] - target_label = "pod" + action = "replace" + source_labels = ["__address__"] + target_label = "__address__" + regex = "([^:]+)(?::\\d+)?" + replacement = "$1:9113" } } - // 3. Filter for Blog Metrics - // Only pass targets where the label 'app' is 'severed-blog' - discovery.relabel "blog_filter" { - targets = discovery.relabel.k8s_labels.output - rule { - source_labels = ["app"] - regex = "severed-blog" - action = "keep" - } - } - - // 4. Metrics Pipeline + // 3. Direct Nginx Scraper prometheus.scrape "nginx_scraper" { - targets = discovery.relabel.blog_filter.output + targets = discovery.relabel.blog_pods.output + forward_to = [prometheus.remote_write.metrics_service.receiver] job_name = "integrations/nginx" } + // 4. Host Metrics prometheus.exporter.unix "host" { rootfs_path = "/host/root" sysfs_path = "/host/sys" @@ -103,15 +85,16 @@ data: forward_to = [prometheus.remote_write.metrics_service.receiver] } + // 5. Remote Write: Send to Prometheus prometheus.remote_write "metrics_service" { endpoint { url = sys.env("PROM_URL") } } - // 5. Logs Pipeline + // 6. Logs Pipeline: Send to Loki loki.source.kubernetes "pod_logs" { - targets = discovery.relabel.k8s_labels.output + targets = discovery.relabel.blog_pods.output forward_to = [loki.write.default.receiver] } @@ -120,11 +103,8 @@ data: url = sys.env("LOKI_URL") } } - --- # --- Agent Deployment (DaemonSet) --- -# deploys one alloy agent per node to monitor the entire cluster. - apiVersion: apps/v1 kind: DaemonSet metadata: @@ -142,10 +122,7 @@ spec: serviceAccountName: alloy-sa hostNetwork: true hostPID: true - - # Forces the pod to use K8s CoreDNS even when running on host network dnsPolicy: ClusterFirstWithHostNet - containers: - name: alloy image: grafana/alloy:latest @@ -153,12 +130,10 @@ spec: - run - --server.http.listen-addr=0.0.0.0:12345 - /etc/alloy/config.alloy - envFrom: - configMapRef: name: monitoring-env optional: false - volumeMounts: - name: config mountPath: /etc/alloy @@ -188,4 +163,4 @@ spec: path: /sys - name: root hostPath: - path: / + path: / \ No newline at end of file diff --git a/namespaces.yaml b/namespaces.yaml index 67aeb94..6704996 100644 --- a/namespaces.yaml +++ b/namespaces.yaml @@ -13,4 +13,10 @@ metadata: apiVersion: v1 kind: Namespace metadata: - name: kubernetes-dashboard \ No newline at end of file + name: kubernetes-dashboard + +--- +apiVersion: v1 +kind: Namespace +metadata: + name: openebs diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..c4f5471 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,102 @@ +# Severed-Infra: Health & Diagnostics Guide + +### 1. The Foundation: Node & Storage Stability + +Before troubleshooting apps, ensure the physical (Docker) layer is stable. + +* **Node Readiness:** All 3 nodes (1 server, 2 agents) must be `Ready`. + +```bash +kubectl get nodes +``` + +* **Storage Binding:** Verify that the OpenEBS Persistent Volume Claims (PVCs) for Loki and Prometheus are `Bound`. + +```bash +kubectl get pvc -n monitoring +``` + +[//]: # (todo add: kubectl get pods -n openebs) + +kubectl get pods -n severed-apps +kubectl get pods -n monitoring +kubectl get pods -n kubernetes-dashboard +kubectl get pods -n openebs + +kubectl rollout restart deployment grafana -n monitoring + +--- + +### 2. The Telemetry Bridge: Alloy & Exporter + +Check if Alloy is successfully translating raw Nginx text into Prometheus numbers. + +* **Error Scan:** Check Alloy logs specifically for `scrape_uri` or `connection refused` errors. + +```bash +kubectl logs -n monitoring -l name=alloy --tail=50 +``` + +[//]: # (kubectl apply -f infra/alloy-setup.yaml) +[//]: # (kubectl delete pods -n monitoring -l name=alloy) +[//]: # (kubectl get pods -n monitoring) +[//]: # (kubectl describe pod alloy-dq2cd -n monitoring) +[//]: # (kubectl logs -n monitoring -l name=alloy --tail=50) +[//]: # (kubectl get pod -n monitoring -l app=grafana -o jsonpath='{.items[0].spec.containers[0].env}' | jq) + +[//]: # (kubectl rollout restart deployment severed-blog -n severed-apps) + +* **Internal Handshake:** Use your `access-hub.sh` script and visit `localhost:12345`. +* Find the `prometheus.exporter.nginx.blog` component. +* Ensure the health status is **Green/Up**. + +--- + +### 3. The Database: Prometheus Query Test + +If the exporter is working, the metrics will appear in the Prometheus time-series database. + +* **Live Traffic Check:** Verify that `nginx_http_requests_total` is returning a data vector (not an empty list `[]`). + +```bash +kubectl exec -it prometheus-0 -n monitoring -- \ + wget -qO- "http://localhost:9090/api/v1/query?query=nginx_http_requests_total" + +``` + +* **Metric Discovery:** List all Nginx-related metrics currently being stored. + +```bash +kubectl exec -it prometheus-0 -n monitoring -- \ + wget -qO- "http://localhost:9090/api/v1/label/__name__/values" | grep nginx + +``` + +--- + +### 4. The "Brain": Horizontal Pod Autoscaler (HPA) + +The HPA is the final consumer of this data. If this is healthy, the cluster is auto-scaling correctly. + +* **Target Alignment:** The `TARGETS` column should show a real value (e.g., `0/10`) rather than ``. + +```bash +kubectl get hpa -n severed-apps + +``` + +* **Adapter Check:** Ensure the Custom Metrics API is serving the translated Nginx metrics to the Kubernetes master. + +```bash +kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1/namespaces/severed-apps/pods/*/nginx_http_requests_total" + +``` + +### Cheat Sheet + +| Symptom | Probable Cause | Fix | +|----------------------------|-----------------------------|-------------------------------------------| +| `502 Bad Gateway` | Node resource exhaustion | Restart K3d or increase Docker RAM | +| `strconv.ParseFloat` error | Missing Nginx Exporter | Use `prometheus.exporter.nginx` in Alloy | +| HPA shows `` | Prometheus Adapter mismatch | Verify `adapter-values.yaml` metric names | +| `No nodes found` | Corrupted cluster state | Run `k3d cluster delete` and recreate | diff --git a/scripts/access-hub.sh b/scripts/access-hub.sh new file mode 100644 index 0000000..5a76c99 --- /dev/null +++ b/scripts/access-hub.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e + +# Kill existing tunnels to prevent port conflicts +killall kubectl 2>/dev/null + +# Kubernetes Dashboard (Kong Proxy) +kubectl -n kubernetes-dashboard port-forward svc/kubernetes-dashboard-kong-proxy 8443:443 & + +# Alloy UI (Internal Health) +kubectl -n monitoring port-forward ds/alloy 12345:12345 & + +# Grafana +kubectl -n monitoring port-forward svc/grafana-service 3000:3000 & + +echo "Dashboard: https://localhost:8443" +echo "Alloy UI: http://localhost:12345" +echo "Grafana: http://localhost:3000" + +echo "Grafana: http://grafana.localhost:8080" +echo "Blog: http://blog.localhost:8080" + +wait diff --git a/scripts/deploy-all.sh b/scripts/deploy-all.sh new file mode 100644 index 0000000..3251232 --- /dev/null +++ b/scripts/deploy-all.sh @@ -0,0 +1,50 @@ +#!/bin/bash +set -e +cd .. + +# 0. Environment Prep +echo "Importing severed-blog:v0.3 into k3d..." +k3d image import severed-blog:v0.3 -c severed-cluster + +# 1. Foundation +kubectl apply -f namespaces.yaml + +echo "Installing OpenEBS LocalPV Provisioner..." +kubectl apply -f https://openebs.github.io/charts/openebs-operator.yaml +kubectl apply -f infra/storage/openebs-sc.yaml + +# 2. Monitoring Stack +echo "Creating Grafana Secrets..." +kubectl create secret generic grafana-secrets -n monitoring \ + --from-literal=admin-user=admin \ + --from-literal=admin-password=admin \ + --dry-run=client -o yaml | kubectl apply -f - + +kubectl apply -f infra/observer/loki.yaml +kubectl apply -f infra/observer/prometheus.yaml +kubectl apply -f infra/alloy-env.yaml +kubectl apply -f infra/alloy-setup.yaml + +# 3. Application Layer +kubectl apply -f apps/severed-blog-config.yaml +kubectl apply -f apps/severed-blog.yaml +kubectl apply -f apps/severed-blog-service.yaml +kubectl apply -f apps/severed-blog-hpa.yaml +kubectl apply -f apps/severed-ingress.yaml + +# 4. Visualization and Scaling Bridge +kubectl apply -f infra/observer/dashboard-json.yaml +kubectl apply -f infra/observer/grafana.yaml + +echo "Installing Prometheus Adapter..." +helm upgrade --install prometheus-adapter prometheus-community/prometheus-adapter \ + -n monitoring \ + -f infra/observer/adapter-values.yaml + +# 5. Dashboard Setup +kubectl apply -f infra/dashboard/dashboard-admin.yaml +kubectl apply -f infra/dashboard/permanent-token.yaml + +echo "Deployment Complete. Retrieving Token..." +kubectl -n kubernetes-dashboard get secret admin-user-token -o jsonpath={".data.token"} | base64 -d +echo -e "\n" diff --git a/scripts/setup-grafana-creds.sh b/scripts/setup-grafana-creds.sh new file mode 100644 index 0000000..a84b197 --- /dev/null +++ b/scripts/setup-grafana-creds.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -e + +read NEW_PASS + +GRAFANA_POD=$(kubectl get pod -n monitoring -l app=grafana -o jsonpath='{.items[0].metadata.name}') + +if [ -z "$GRAFANA_POD" ]; then + echo "Error: Grafana pod not found." + exit 1 +fi + +echo "Setting Grafana admin password..." +kubectl exec -it -n monitoring "$GRAFANA_POD" -- grafana-cli admin reset-admin-password "$NEW_PASS"