fixed nginx_http_requests_total, added sh scripts
This commit is contained in:
@@ -51,6 +51,7 @@ data:
|
||||
access_log off;
|
||||
allow 127.0.0.1;
|
||||
allow 10.0.0.0/8; # Allow internal cluster pods
|
||||
allow 172.16.0.0/12; # Allow K3d/Docker internal bridge network
|
||||
deny all;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ spec:
|
||||
- type: Pods
|
||||
pods:
|
||||
metric:
|
||||
name: http_requests_per_second
|
||||
name: nginx_http_requests_total
|
||||
target:
|
||||
type: AverageValue
|
||||
averageValue: 10 # Scale up if requests > 10 per second per pod
|
||||
|
||||
@@ -21,15 +21,33 @@ spec:
|
||||
- containerPort: 80
|
||||
resources:
|
||||
requests:
|
||||
cpu: "50m" # 0.05 cores (Guaranteed to the pod)
|
||||
memory: "64Mi" # 64 Megabytes
|
||||
cpu: "50m"
|
||||
memory: "64Mi"
|
||||
limits:
|
||||
cpu: "200m" # Max allowed (Prevents one pod from eating the Mac's CPU)
|
||||
memory: "128Mi" # Max allowed
|
||||
cpu: "200m"
|
||||
memory: "128Mi"
|
||||
volumeMounts:
|
||||
- name: nginx-config-vol
|
||||
mountPath: /etc/nginx/conf.d/default.conf
|
||||
subPath: default.conf
|
||||
|
||||
# --- ADD THE EXPORTER SIDECAR HERE ---
|
||||
- name: exporter
|
||||
image: nginx/nginx-prometheus-exporter:latest
|
||||
args:
|
||||
- -nginx.scrape-uri=http://localhost:80/metrics
|
||||
ports:
|
||||
- containerPort: 9113
|
||||
name: metrics
|
||||
resources:
|
||||
requests:
|
||||
cpu: "10m"
|
||||
memory: "32Mi"
|
||||
limits:
|
||||
cpu: "50m"
|
||||
memory: "64Mi"
|
||||
# -------------------------------------
|
||||
|
||||
volumes:
|
||||
- name: nginx-config-vol
|
||||
configMap:
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
# --- RBAC configuration ---
|
||||
# creates a serviceaccount with permissions to discover pods and read logs.
|
||||
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
@@ -13,11 +11,9 @@ kind: ClusterRole
|
||||
metadata:
|
||||
name: alloy-cluster-role
|
||||
rules:
|
||||
# discovery permissions: allows alloy to find targets: Nodes, Pods, Services.
|
||||
- apiGroups: [ "" ]
|
||||
resources: [ "nodes", "nodes/proxy", "services", "endpoints", "pods" ]
|
||||
verbs: [ "get", "list", "watch" ]
|
||||
# log access: required for 'loki.source.kubernetes' to tail logs.
|
||||
- apiGroups: [ "" ]
|
||||
resources: [ "pods/log" ]
|
||||
verbs: [ "get", "list", "watch" ]
|
||||
@@ -38,8 +34,6 @@ subjects:
|
||||
|
||||
---
|
||||
# --- Alloy pipeline configuration ---
|
||||
# defines how telemetry data is collected, processed, and exported.
|
||||
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
@@ -47,51 +41,39 @@ metadata:
|
||||
namespace: monitoring
|
||||
data:
|
||||
config.alloy: |
|
||||
// 1. discovery
|
||||
// 1. Discovery: Find all pods
|
||||
discovery.kubernetes "k8s_pods" {
|
||||
role = "pod"
|
||||
}
|
||||
|
||||
// 2. Relabeling (MUST BE DEFINED BEFORE USE)
|
||||
// This adds 'app', 'namespace', and 'pod' labels to the targets
|
||||
discovery.relabel "k8s_labels" {
|
||||
// 2. Relabeling: Filter for ONLY the blog pods
|
||||
discovery.relabel "blog_pods" {
|
||||
targets = discovery.kubernetes.k8s_pods.targets
|
||||
|
||||
rule {
|
||||
action = "replace"
|
||||
source_labels = ["__meta_kubernetes_pod_label_app"]
|
||||
target_label = "app"
|
||||
}
|
||||
rule {
|
||||
action = "replace"
|
||||
source_labels = ["__meta_kubernetes_namespace"]
|
||||
target_label = "namespace"
|
||||
}
|
||||
rule {
|
||||
action = "replace"
|
||||
source_labels = ["__meta_kubernetes_pod_name"]
|
||||
target_label = "pod"
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Filter for Blog Metrics
|
||||
// Only pass targets where the label 'app' is 'severed-blog'
|
||||
discovery.relabel "blog_filter" {
|
||||
targets = discovery.relabel.k8s_labels.output
|
||||
rule {
|
||||
source_labels = ["app"]
|
||||
regex = "severed-blog"
|
||||
action = "keep"
|
||||
source_labels = ["__meta_kubernetes_pod_label_app"]
|
||||
regex = "severed-blog"
|
||||
}
|
||||
|
||||
rule {
|
||||
action = "replace"
|
||||
source_labels = ["__address__"]
|
||||
target_label = "__address__"
|
||||
regex = "([^:]+)(?::\\d+)?"
|
||||
replacement = "$1:9113"
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Metrics Pipeline
|
||||
// 3. Direct Nginx Scraper
|
||||
prometheus.scrape "nginx_scraper" {
|
||||
targets = discovery.relabel.blog_filter.output
|
||||
targets = discovery.relabel.blog_pods.output
|
||||
|
||||
forward_to = [prometheus.remote_write.metrics_service.receiver]
|
||||
job_name = "integrations/nginx"
|
||||
}
|
||||
|
||||
// 4. Host Metrics
|
||||
prometheus.exporter.unix "host" {
|
||||
rootfs_path = "/host/root"
|
||||
sysfs_path = "/host/sys"
|
||||
@@ -103,15 +85,16 @@ data:
|
||||
forward_to = [prometheus.remote_write.metrics_service.receiver]
|
||||
}
|
||||
|
||||
// 5. Remote Write: Send to Prometheus
|
||||
prometheus.remote_write "metrics_service" {
|
||||
endpoint {
|
||||
url = sys.env("PROM_URL")
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Logs Pipeline
|
||||
// 6. Logs Pipeline: Send to Loki
|
||||
loki.source.kubernetes "pod_logs" {
|
||||
targets = discovery.relabel.k8s_labels.output
|
||||
targets = discovery.relabel.blog_pods.output
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
|
||||
@@ -120,11 +103,8 @@ data:
|
||||
url = sys.env("LOKI_URL")
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
# --- Agent Deployment (DaemonSet) ---
|
||||
# deploys one alloy agent per node to monitor the entire cluster.
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
@@ -142,10 +122,7 @@ spec:
|
||||
serviceAccountName: alloy-sa
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
|
||||
# Forces the pod to use K8s CoreDNS even when running on host network
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
|
||||
containers:
|
||||
- name: alloy
|
||||
image: grafana/alloy:latest
|
||||
@@ -153,12 +130,10 @@ spec:
|
||||
- run
|
||||
- --server.http.listen-addr=0.0.0.0:12345
|
||||
- /etc/alloy/config.alloy
|
||||
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: monitoring-env
|
||||
optional: false
|
||||
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/alloy
|
||||
|
||||
@@ -14,3 +14,9 @@ apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: kubernetes-dashboard
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: openebs
|
||||
|
||||
102
scripts/README.md
Normal file
102
scripts/README.md
Normal file
@@ -0,0 +1,102 @@
|
||||
# Severed-Infra: Health & Diagnostics Guide
|
||||
|
||||
### 1. The Foundation: Node & Storage Stability
|
||||
|
||||
Before troubleshooting apps, ensure the physical (Docker) layer is stable.
|
||||
|
||||
* **Node Readiness:** All 3 nodes (1 server, 2 agents) must be `Ready`.
|
||||
|
||||
```bash
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
* **Storage Binding:** Verify that the OpenEBS Persistent Volume Claims (PVCs) for Loki and Prometheus are `Bound`.
|
||||
|
||||
```bash
|
||||
kubectl get pvc -n monitoring
|
||||
```
|
||||
|
||||
[//]: # (todo add: kubectl get pods -n openebs)
|
||||
|
||||
kubectl get pods -n severed-apps
|
||||
kubectl get pods -n monitoring
|
||||
kubectl get pods -n kubernetes-dashboard
|
||||
kubectl get pods -n openebs
|
||||
|
||||
kubectl rollout restart deployment grafana -n monitoring
|
||||
|
||||
---
|
||||
|
||||
### 2. The Telemetry Bridge: Alloy & Exporter
|
||||
|
||||
Check if Alloy is successfully translating raw Nginx text into Prometheus numbers.
|
||||
|
||||
* **Error Scan:** Check Alloy logs specifically for `scrape_uri` or `connection refused` errors.
|
||||
|
||||
```bash
|
||||
kubectl logs -n monitoring -l name=alloy --tail=50
|
||||
```
|
||||
|
||||
[//]: # (kubectl apply -f infra/alloy-setup.yaml)
|
||||
[//]: # (kubectl delete pods -n monitoring -l name=alloy)
|
||||
[//]: # (kubectl get pods -n monitoring)
|
||||
[//]: # (kubectl describe pod alloy-dq2cd -n monitoring)
|
||||
[//]: # (kubectl logs -n monitoring -l name=alloy --tail=50)
|
||||
[//]: # (kubectl get pod -n monitoring -l app=grafana -o jsonpath='{.items[0].spec.containers[0].env}' | jq)
|
||||
|
||||
[//]: # (kubectl rollout restart deployment severed-blog -n severed-apps)
|
||||
|
||||
* **Internal Handshake:** Use your `access-hub.sh` script and visit `localhost:12345`.
|
||||
* Find the `prometheus.exporter.nginx.blog` component.
|
||||
* Ensure the health status is **Green/Up**.
|
||||
|
||||
---
|
||||
|
||||
### 3. The Database: Prometheus Query Test
|
||||
|
||||
If the exporter is working, the metrics will appear in the Prometheus time-series database.
|
||||
|
||||
* **Live Traffic Check:** Verify that `nginx_http_requests_total` is returning a data vector (not an empty list `[]`).
|
||||
|
||||
```bash
|
||||
kubectl exec -it prometheus-0 -n monitoring -- \
|
||||
wget -qO- "http://localhost:9090/api/v1/query?query=nginx_http_requests_total"
|
||||
|
||||
```
|
||||
|
||||
* **Metric Discovery:** List all Nginx-related metrics currently being stored.
|
||||
|
||||
```bash
|
||||
kubectl exec -it prometheus-0 -n monitoring -- \
|
||||
wget -qO- "http://localhost:9090/api/v1/label/__name__/values" | grep nginx
|
||||
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### 4. The "Brain": Horizontal Pod Autoscaler (HPA)
|
||||
|
||||
The HPA is the final consumer of this data. If this is healthy, the cluster is auto-scaling correctly.
|
||||
|
||||
* **Target Alignment:** The `TARGETS` column should show a real value (e.g., `0/10`) rather than `<unknown>`.
|
||||
|
||||
```bash
|
||||
kubectl get hpa -n severed-apps
|
||||
|
||||
```
|
||||
|
||||
* **Adapter Check:** Ensure the Custom Metrics API is serving the translated Nginx metrics to the Kubernetes master.
|
||||
|
||||
```bash
|
||||
kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta1/namespaces/severed-apps/pods/*/nginx_http_requests_total"
|
||||
|
||||
```
|
||||
|
||||
### Cheat Sheet
|
||||
|
||||
| Symptom | Probable Cause | Fix |
|
||||
|----------------------------|-----------------------------|-------------------------------------------|
|
||||
| `502 Bad Gateway` | Node resource exhaustion | Restart K3d or increase Docker RAM |
|
||||
| `strconv.ParseFloat` error | Missing Nginx Exporter | Use `prometheus.exporter.nginx` in Alloy |
|
||||
| HPA shows `<unknown>` | Prometheus Adapter mismatch | Verify `adapter-values.yaml` metric names |
|
||||
| `No nodes found` | Corrupted cluster state | Run `k3d cluster delete` and recreate |
|
||||
24
scripts/access-hub.sh
Normal file
24
scripts/access-hub.sh
Normal file
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# Kill existing tunnels to prevent port conflicts
|
||||
killall kubectl 2>/dev/null
|
||||
|
||||
# Kubernetes Dashboard (Kong Proxy)
|
||||
kubectl -n kubernetes-dashboard port-forward svc/kubernetes-dashboard-kong-proxy 8443:443 &
|
||||
|
||||
# Alloy UI (Internal Health)
|
||||
kubectl -n monitoring port-forward ds/alloy 12345:12345 &
|
||||
|
||||
# Grafana
|
||||
kubectl -n monitoring port-forward svc/grafana-service 3000:3000 &
|
||||
|
||||
echo "Dashboard: https://localhost:8443"
|
||||
echo "Alloy UI: http://localhost:12345"
|
||||
echo "Grafana: http://localhost:3000"
|
||||
|
||||
echo "Grafana: http://grafana.localhost:8080"
|
||||
echo "Blog: http://blog.localhost:8080"
|
||||
|
||||
wait
|
||||
50
scripts/deploy-all.sh
Normal file
50
scripts/deploy-all.sh
Normal file
@@ -0,0 +1,50 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
cd ..
|
||||
|
||||
# 0. Environment Prep
|
||||
echo "Importing severed-blog:v0.3 into k3d..."
|
||||
k3d image import severed-blog:v0.3 -c severed-cluster
|
||||
|
||||
# 1. Foundation
|
||||
kubectl apply -f namespaces.yaml
|
||||
|
||||
echo "Installing OpenEBS LocalPV Provisioner..."
|
||||
kubectl apply -f https://openebs.github.io/charts/openebs-operator.yaml
|
||||
kubectl apply -f infra/storage/openebs-sc.yaml
|
||||
|
||||
# 2. Monitoring Stack
|
||||
echo "Creating Grafana Secrets..."
|
||||
kubectl create secret generic grafana-secrets -n monitoring \
|
||||
--from-literal=admin-user=admin \
|
||||
--from-literal=admin-password=admin \
|
||||
--dry-run=client -o yaml | kubectl apply -f -
|
||||
|
||||
kubectl apply -f infra/observer/loki.yaml
|
||||
kubectl apply -f infra/observer/prometheus.yaml
|
||||
kubectl apply -f infra/alloy-env.yaml
|
||||
kubectl apply -f infra/alloy-setup.yaml
|
||||
|
||||
# 3. Application Layer
|
||||
kubectl apply -f apps/severed-blog-config.yaml
|
||||
kubectl apply -f apps/severed-blog.yaml
|
||||
kubectl apply -f apps/severed-blog-service.yaml
|
||||
kubectl apply -f apps/severed-blog-hpa.yaml
|
||||
kubectl apply -f apps/severed-ingress.yaml
|
||||
|
||||
# 4. Visualization and Scaling Bridge
|
||||
kubectl apply -f infra/observer/dashboard-json.yaml
|
||||
kubectl apply -f infra/observer/grafana.yaml
|
||||
|
||||
echo "Installing Prometheus Adapter..."
|
||||
helm upgrade --install prometheus-adapter prometheus-community/prometheus-adapter \
|
||||
-n monitoring \
|
||||
-f infra/observer/adapter-values.yaml
|
||||
|
||||
# 5. Dashboard Setup
|
||||
kubectl apply -f infra/dashboard/dashboard-admin.yaml
|
||||
kubectl apply -f infra/dashboard/permanent-token.yaml
|
||||
|
||||
echo "Deployment Complete. Retrieving Token..."
|
||||
kubectl -n kubernetes-dashboard get secret admin-user-token -o jsonpath={".data.token"} | base64 -d
|
||||
echo -e "\n"
|
||||
15
scripts/setup-grafana-creds.sh
Normal file
15
scripts/setup-grafana-creds.sh
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
read NEW_PASS
|
||||
|
||||
GRAFANA_POD=$(kubectl get pod -n monitoring -l app=grafana -o jsonpath='{.items[0].metadata.name}')
|
||||
|
||||
if [ -z "$GRAFANA_POD" ]; then
|
||||
echo "Error: Grafana pod not found."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Setting Grafana admin password..."
|
||||
kubectl exec -it -n monitoring "$GRAFANA_POD" -- grafana-cli admin reset-admin-password "$NEW_PASS"
|
||||
Reference in New Issue
Block a user