added HPA (cpu, ram, traffic)
This commit is contained in:
@@ -44,4 +44,13 @@ data:
|
||||
# logging / lb config
|
||||
real_ip_header X-Forwarded-For;
|
||||
set_real_ip_from 10.0.0.0/8;
|
||||
|
||||
# metrics endpoint for Alloy/Prometheus
|
||||
location /metrics {
|
||||
stub_status on;
|
||||
access_log off;
|
||||
allow 127.0.0.1;
|
||||
allow 10.0.0.0/8; # Allow internal cluster pods
|
||||
deny all;
|
||||
}
|
||||
}
|
||||
|
||||
41
apps/severed-blog-hpa.yaml
Normal file
41
apps/severed-blog-hpa.yaml
Normal file
@@ -0,0 +1,41 @@
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: severed-blog-hpa
|
||||
namespace: severed-apps
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: severed-blog
|
||||
minReplicas: 2 # Never drop below 2 for HA
|
||||
maxReplicas: 6 # Maximum number of pods to prevent cluster exhaustion
|
||||
metrics:
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 70 # Scale up if CPU Usage exceeds 70%
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 80 # Scale up if RAM Usage exceeds 80%
|
||||
- type: Pods
|
||||
pods:
|
||||
metric:
|
||||
name: http_requests_per_second
|
||||
target:
|
||||
type: AverageValue
|
||||
averageValue: 10 # Scale up if requests > 10 per second per pod
|
||||
behavior:
|
||||
scaleDown:
|
||||
stabilizationWindowSeconds: 300 # Wait 5 minutes before removing a pod
|
||||
policies:
|
||||
- type: Percent
|
||||
value: 100
|
||||
periodSeconds: 15
|
||||
scaleUp:
|
||||
stabilizationWindowSeconds: 0 # Scale up immediately when busy
|
||||
@@ -19,12 +19,17 @@ spec:
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 80
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: "50m" # 0.05 cores (Guaranteed to the pod)
|
||||
memory: "64Mi" # 64 Megabytes
|
||||
limits:
|
||||
cpu: "200m" # Max allowed (Prevents one pod from eating the Mac's CPU)
|
||||
memory: "128Mi" # Max allowed
|
||||
volumeMounts:
|
||||
- name: nginx-config-vol
|
||||
mountPath: /etc/nginx/conf.d/default.conf
|
||||
subPath: default.conf
|
||||
|
||||
volumes:
|
||||
- name: nginx-config-vol
|
||||
configMap:
|
||||
|
||||
@@ -47,34 +47,13 @@ metadata:
|
||||
namespace: monitoring
|
||||
data:
|
||||
config.alloy: |
|
||||
// 1. discovery (Shared by Logs and Metrics)
|
||||
// 1. discovery
|
||||
discovery.kubernetes "k8s_pods" {
|
||||
role = "pod"
|
||||
}
|
||||
|
||||
// 2. metrics pipeline
|
||||
// A. read host hardware stats (CPU/RAM)
|
||||
prometheus.exporter.unix "host" {
|
||||
rootfs_path = "/host/root"
|
||||
sysfs_path = "/host/sys"
|
||||
procfs_path = "/host/proc"
|
||||
}
|
||||
|
||||
// B. scrape those stats
|
||||
prometheus.scrape "host_scraper" {
|
||||
targets = prometheus.exporter.unix.host.targets
|
||||
forward_to = [prometheus.remote_write.metrics_service.receiver]
|
||||
}
|
||||
|
||||
// C. send to Prometheus
|
||||
prometheus.remote_write "metrics_service" {
|
||||
endpoint {
|
||||
url = sys.env("PROM_URL")
|
||||
}
|
||||
}
|
||||
|
||||
// 3. logs pipeline (With Relabeling Fix)
|
||||
// A. relabeling: Promote hidden K8s tags to real labels
|
||||
// 2. Relabeling (MUST BE DEFINED BEFORE USE)
|
||||
// This adds 'app', 'namespace', and 'pod' labels to the targets
|
||||
discovery.relabel "k8s_labels" {
|
||||
targets = discovery.kubernetes.k8s_pods.targets
|
||||
|
||||
@@ -83,38 +62,65 @@ data:
|
||||
source_labels = ["__meta_kubernetes_pod_label_app"]
|
||||
target_label = "app"
|
||||
}
|
||||
|
||||
rule {
|
||||
action = "replace"
|
||||
source_labels = ["__meta_kubernetes_namespace"]
|
||||
target_label = "namespace"
|
||||
}
|
||||
|
||||
rule {
|
||||
action = "replace"
|
||||
source_labels = ["__meta_kubernetes_pod_name"]
|
||||
target_label = "pod"
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// 3. Filter for Blog Metrics
|
||||
// Only pass targets where the label 'app' is 'severed-blog'
|
||||
discovery.relabel "blog_filter" {
|
||||
targets = discovery.relabel.k8s_labels.output
|
||||
rule {
|
||||
action = "replace"
|
||||
source_labels = ["__meta_kubernetes_pod_container_name"]
|
||||
target_label = "container"
|
||||
source_labels = ["app"]
|
||||
regex = "severed-blog"
|
||||
action = "keep"
|
||||
}
|
||||
}
|
||||
|
||||
// B. tail logs: using the relabeled targets
|
||||
// 4. Metrics Pipeline
|
||||
prometheus.scrape "nginx_scraper" {
|
||||
targets = discovery.relabel.blog_filter.output
|
||||
forward_to = [prometheus.remote_write.metrics_service.receiver]
|
||||
job_name = "integrations/nginx"
|
||||
}
|
||||
|
||||
prometheus.exporter.unix "host" {
|
||||
rootfs_path = "/host/root"
|
||||
sysfs_path = "/host/sys"
|
||||
procfs_path = "/host/proc"
|
||||
}
|
||||
|
||||
prometheus.scrape "host_scraper" {
|
||||
targets = prometheus.exporter.unix.host.targets
|
||||
forward_to = [prometheus.remote_write.metrics_service.receiver]
|
||||
}
|
||||
|
||||
prometheus.remote_write "metrics_service" {
|
||||
endpoint {
|
||||
url = sys.env("PROM_URL")
|
||||
}
|
||||
}
|
||||
|
||||
// 5. Logs Pipeline
|
||||
loki.source.kubernetes "pod_logs" {
|
||||
targets = discovery.relabel.k8s_labels.output
|
||||
forward_to = [loki.write.default.receiver]
|
||||
}
|
||||
|
||||
// C. send to Loki
|
||||
loki.write "default" {
|
||||
endpoint {
|
||||
url = sys.env("LOKI_URL")
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
# --- Agent Deployment (DaemonSet) ---
|
||||
# deploys one alloy agent per node to monitor the entire cluster.
|
||||
|
||||
16
infra/observer/adapter-values.yaml
Normal file
16
infra/observer/adapter-values.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
prometheus:
|
||||
url: http://prometheus.monitoring.svc.cluster.local
|
||||
port: 9090
|
||||
|
||||
rules:
|
||||
default: true
|
||||
custom:
|
||||
- seriesQuery: '{__name__=~"nginx_status_requests",namespace!="",pod!=""}'
|
||||
resources:
|
||||
overrides:
|
||||
namespace: {resource: "namespace"}
|
||||
pod: {resource: "pod"}
|
||||
name:
|
||||
matches: "^nginx_status_requests"
|
||||
as: "http_requests_per_second"
|
||||
metricsQuery: 'sum(rate(<<.Series>>{<<.LabelMatchers>>}[1m])) by (<<.GroupBy>>)'
|
||||
@@ -1,4 +1,4 @@
|
||||
# --- Configuration ---
|
||||
# Configuration
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
@@ -14,7 +14,7 @@ data:
|
||||
out_of_order_time_window: 1m
|
||||
|
||||
---
|
||||
# --- Service ---
|
||||
# Service
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
@@ -29,7 +29,7 @@ spec:
|
||||
targetPort: 9090
|
||||
|
||||
---
|
||||
# --- The Database (StatefulSet) ---
|
||||
# The Database (StatefulSet)
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
|
||||
Reference in New Issue
Block a user