fixed nginx_http_requests_total, added sh scripts

This commit is contained in:
wboughattas
2025-12-28 23:37:34 -05:00
parent ca3c590bdd
commit f0738d7d7b
9 changed files with 242 additions and 51 deletions

View File

@@ -51,6 +51,7 @@ data:
access_log off;
allow 127.0.0.1;
allow 10.0.0.0/8; # Allow internal cluster pods
allow 172.16.0.0/12; # Allow K3d/Docker internal bridge network
deny all;
}
}

View File

@@ -26,7 +26,7 @@ spec:
- type: Pods
pods:
metric:
name: http_requests_per_second
name: nginx_http_requests_total
target:
type: AverageValue
averageValue: 10 # Scale up if requests > 10 per second per pod

View File

@@ -21,15 +21,33 @@ spec:
- containerPort: 80
resources:
requests:
cpu: "50m" # 0.05 cores (Guaranteed to the pod)
memory: "64Mi" # 64 Megabytes
cpu: "50m"
memory: "64Mi"
limits:
cpu: "200m" # Max allowed (Prevents one pod from eating the Mac's CPU)
memory: "128Mi" # Max allowed
cpu: "200m"
memory: "128Mi"
volumeMounts:
- name: nginx-config-vol
mountPath: /etc/nginx/conf.d/default.conf
subPath: default.conf
# --- ADD THE EXPORTER SIDECAR HERE ---
- name: exporter
image: nginx/nginx-prometheus-exporter:latest
args:
- -nginx.scrape-uri=http://localhost:80/metrics
ports:
- containerPort: 9113
name: metrics
resources:
requests:
cpu: "10m"
memory: "32Mi"
limits:
cpu: "50m"
memory: "64Mi"
# -------------------------------------
volumes:
- name: nginx-config-vol
configMap:

View File

@@ -1,6 +1,4 @@
# --- RBAC configuration ---
# creates a serviceaccount with permissions to discover pods and read logs.
apiVersion: v1
kind: ServiceAccount
metadata:
@@ -13,11 +11,9 @@ kind: ClusterRole
metadata:
name: alloy-cluster-role
rules:
# discovery permissions: allows alloy to find targets: Nodes, Pods, Services.
- apiGroups: [ "" ]
resources: [ "nodes", "nodes/proxy", "services", "endpoints", "pods" ]
verbs: [ "get", "list", "watch" ]
# log access: required for 'loki.source.kubernetes' to tail logs.
- apiGroups: [ "" ]
resources: [ "pods/log" ]
verbs: [ "get", "list", "watch" ]
@@ -38,8 +34,6 @@ subjects:
---
# --- Alloy pipeline configuration ---
# defines how telemetry data is collected, processed, and exported.
apiVersion: v1
kind: ConfigMap
metadata:
@@ -47,51 +41,39 @@ metadata:
namespace: monitoring
data:
config.alloy: |
// 1. discovery
// 1. Discovery: Find all pods
discovery.kubernetes "k8s_pods" {
role = "pod"
}
// 2. Relabeling (MUST BE DEFINED BEFORE USE)
// This adds 'app', 'namespace', and 'pod' labels to the targets
discovery.relabel "k8s_labels" {
// 2. Relabeling: Filter for ONLY the blog pods
discovery.relabel "blog_pods" {
targets = discovery.kubernetes.k8s_pods.targets
rule {
action = "replace"
action = "keep"
source_labels = ["__meta_kubernetes_pod_label_app"]
target_label = "app"
regex = "severed-blog"
}
rule {
action = "replace"
source_labels = ["__meta_kubernetes_namespace"]
target_label = "namespace"
}
rule {
action = "replace"
source_labels = ["__meta_kubernetes_pod_name"]
target_label = "pod"
action = "replace"
source_labels = ["__address__"]
target_label = "__address__"
regex = "([^:]+)(?::\\d+)?"
replacement = "$1:9113"
}
}
// 3. Filter for Blog Metrics
// Only pass targets where the label 'app' is 'severed-blog'
discovery.relabel "blog_filter" {
targets = discovery.relabel.k8s_labels.output
rule {
source_labels = ["app"]
regex = "severed-blog"
action = "keep"
}
}
// 4. Metrics Pipeline
// 3. Direct Nginx Scraper
prometheus.scrape "nginx_scraper" {
targets = discovery.relabel.blog_filter.output
targets = discovery.relabel.blog_pods.output
forward_to = [prometheus.remote_write.metrics_service.receiver]
job_name = "integrations/nginx"
}
// 4. Host Metrics
prometheus.exporter.unix "host" {
rootfs_path = "/host/root"
sysfs_path = "/host/sys"
@@ -103,15 +85,16 @@ data:
forward_to = [prometheus.remote_write.metrics_service.receiver]
}
// 5. Remote Write: Send to Prometheus
prometheus.remote_write "metrics_service" {
endpoint {
url = sys.env("PROM_URL")
}
}
// 5. Logs Pipeline
// 6. Logs Pipeline: Send to Loki
loki.source.kubernetes "pod_logs" {
targets = discovery.relabel.k8s_labels.output
targets = discovery.relabel.blog_pods.output
forward_to = [loki.write.default.receiver]
}
@@ -120,11 +103,8 @@ data:
url = sys.env("LOKI_URL")
}
}
---
# --- Agent Deployment (DaemonSet) ---
# deploys one alloy agent per node to monitor the entire cluster.
apiVersion: apps/v1
kind: DaemonSet
metadata:
@@ -142,10 +122,7 @@ spec:
serviceAccountName: alloy-sa
hostNetwork: true
hostPID: true
# Forces the pod to use K8s CoreDNS even when running on host network
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: alloy
image: grafana/alloy:latest
@@ -153,12 +130,10 @@ spec:
- run
- --server.http.listen-addr=0.0.0.0:12345
- /etc/alloy/config.alloy
envFrom:
- configMapRef:
name: monitoring-env
optional: false
volumeMounts:
- name: config
mountPath: /etc/alloy

View File

@@ -14,3 +14,9 @@ apiVersion: v1
kind: Namespace
metadata:
name: kubernetes-dashboard
---
apiVersion: v1
kind: Namespace
metadata:
name: openebs

102
scripts/README.md Normal file
View File

@@ -0,0 +1,102 @@
# Severed-Infra: Health & Diagnostics Guide
### 1. The Foundation: Node & Storage Stability
Before troubleshooting apps, ensure the physical (Docker) layer is stable.
* **Node Readiness:** All 3 nodes (1 server, 2 agents) must be `Ready`.
```bash
kubectl get nodes
```
* **Storage Binding:** Verify that the OpenEBS Persistent Volume Claims (PVCs) for Loki and Prometheus are `Bound`.
```bash
kubectl get pvc -n monitoring
```
[//]: # (todo add: kubectl get pods -n openebs)
kubectl get pods -n severed-apps
kubectl get pods -n monitoring
kubectl get pods -n kubernetes-dashboard
kubectl get pods -n openebs
kubectl rollout restart deployment grafana -n monitoring
---
### 2. The Telemetry Bridge: Alloy & Exporter
Check if Alloy is successfully translating raw Nginx text into Prometheus numbers.
* **Error Scan:** Check Alloy logs specifically for `scrape_uri` or `connection refused` errors.
```bash
kubectl logs -n monitoring -l name=alloy --tail=50
```
[//]: # (kubectl apply -f infra/alloy-setup.yaml)
[//]: # (kubectl delete pods -n monitoring -l name=alloy)
[//]: # (kubectl get pods -n monitoring)
[//]: # (kubectl describe pod alloy-dq2cd -n monitoring)
[//]: # (kubectl logs -n monitoring -l name=alloy --tail=50)
[//]: # (kubectl get pod -n monitoring -l app=grafana -o jsonpath='{.items[0].spec.containers[0].env}' | jq)
[//]: # (kubectl rollout restart deployment severed-blog -n severed-apps)
* **Internal Handshake:** Use your `access-hub.sh` script and visit `localhost:12345`.
* Find the `prometheus.exporter.nginx.blog` component.
* Ensure the health status is **Green/Up**.
---
### 3. The Database: Prometheus Query Test
If the exporter is working, the metrics will appear in the Prometheus time-series database.
* **Live Traffic Check:** Verify that `nginx_http_requests_total` is returning a data vector (not an empty list `[]`).
```bash
kubectl exec -it prometheus-0 -n monitoring -- \
wget -qO- "http://localhost:9090/api/v1/query?query=nginx_http_requests_total"
```
* **Metric Discovery:** List all Nginx-related metrics currently being stored.
```bash
kubectl exec -it prometheus-0 -n monitoring -- \
wget -qO- "http://localhost:9090/api/v1/label/__name__/values" | grep nginx
```
---
### 4. The "Brain": Horizontal Pod Autoscaler (HPA)
The HPA is the final consumer of this data. If this is healthy, the cluster is auto-scaling correctly.
* **Target Alignment:** The `TARGETS` column should show a real value (e.g., `0/10`) rather than `<unknown>`.
```bash
kubectl get hpa -n severed-apps
```
* **Adapter Check:** Ensure the Custom Metrics API is serving the translated Nginx metrics to the Kubernetes master.
```bash
kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1/namespaces/severed-apps/pods/*/nginx_http_requests_total"
```
### Cheat Sheet
| Symptom | Probable Cause | Fix |
|----------------------------|-----------------------------|-------------------------------------------|
| `502 Bad Gateway` | Node resource exhaustion | Restart K3d or increase Docker RAM |
| `strconv.ParseFloat` error | Missing Nginx Exporter | Use `prometheus.exporter.nginx` in Alloy |
| HPA shows `<unknown>` | Prometheus Adapter mismatch | Verify `adapter-values.yaml` metric names |
| `No nodes found` | Corrupted cluster state | Run `k3d cluster delete` and recreate |

24
scripts/access-hub.sh Normal file
View File

@@ -0,0 +1,24 @@
#!/bin/bash
set -e
# Kill existing tunnels to prevent port conflicts
killall kubectl 2>/dev/null
# Kubernetes Dashboard (Kong Proxy)
kubectl -n kubernetes-dashboard port-forward svc/kubernetes-dashboard-kong-proxy 8443:443 &
# Alloy UI (Internal Health)
kubectl -n monitoring port-forward ds/alloy 12345:12345 &
# Grafana
kubectl -n monitoring port-forward svc/grafana-service 3000:3000 &
echo "Dashboard: https://localhost:8443"
echo "Alloy UI: http://localhost:12345"
echo "Grafana: http://localhost:3000"
echo "Grafana: http://grafana.localhost:8080"
echo "Blog: http://blog.localhost:8080"
wait

50
scripts/deploy-all.sh Normal file
View File

@@ -0,0 +1,50 @@
#!/bin/bash
set -e
cd ..
# 0. Environment Prep
echo "Importing severed-blog:v0.3 into k3d..."
k3d image import severed-blog:v0.3 -c severed-cluster
# 1. Foundation
kubectl apply -f namespaces.yaml
echo "Installing OpenEBS LocalPV Provisioner..."
kubectl apply -f https://openebs.github.io/charts/openebs-operator.yaml
kubectl apply -f infra/storage/openebs-sc.yaml
# 2. Monitoring Stack
echo "Creating Grafana Secrets..."
kubectl create secret generic grafana-secrets -n monitoring \
--from-literal=admin-user=admin \
--from-literal=admin-password=admin \
--dry-run=client -o yaml | kubectl apply -f -
kubectl apply -f infra/observer/loki.yaml
kubectl apply -f infra/observer/prometheus.yaml
kubectl apply -f infra/alloy-env.yaml
kubectl apply -f infra/alloy-setup.yaml
# 3. Application Layer
kubectl apply -f apps/severed-blog-config.yaml
kubectl apply -f apps/severed-blog.yaml
kubectl apply -f apps/severed-blog-service.yaml
kubectl apply -f apps/severed-blog-hpa.yaml
kubectl apply -f apps/severed-ingress.yaml
# 4. Visualization and Scaling Bridge
kubectl apply -f infra/observer/dashboard-json.yaml
kubectl apply -f infra/observer/grafana.yaml
echo "Installing Prometheus Adapter..."
helm upgrade --install prometheus-adapter prometheus-community/prometheus-adapter \
-n monitoring \
-f infra/observer/adapter-values.yaml
# 5. Dashboard Setup
kubectl apply -f infra/dashboard/dashboard-admin.yaml
kubectl apply -f infra/dashboard/permanent-token.yaml
echo "Deployment Complete. Retrieving Token..."
kubectl -n kubernetes-dashboard get secret admin-user-token -o jsonpath={".data.token"} | base64 -d
echo -e "\n"

View File

@@ -0,0 +1,15 @@
#!/bin/bash
set -e
read NEW_PASS
GRAFANA_POD=$(kubectl get pod -n monitoring -l app=grafana -o jsonpath='{.items[0].metadata.name}')
if [ -z "$GRAFANA_POD" ]; then
echo "Error: Grafana pod not found."
exit 1
fi
echo "Setting Grafana admin password..."
kubectl exec -it -n monitoring "$GRAFANA_POD" -- grafana-cli admin reset-admin-password "$NEW_PASS"