ops: add CI/CD pipeline, a/b rolling deploy, Gitea Actions workflow
Deploy to Production / deploy (push) Failing after 10s
Deploy to Production / deploy (push) Failing after 10s
- .gitea/workflows/deploy.yml — push-to-main triggers rolling deploy - scripts/deploy-bluegreen.sh — a-stack then b-stack restart; Maven runs in Docker (no JDK needed on runner host); Caddy reload at end - scripts/deploy-all.ps1 — emergency manual deploy from dev machine - infra/docker-compose.yml — a/b pairs per service; wget health checks; Gitea service; Prometheus/Grafana/DB ports restricted to localhost - infra/Caddyfile — dual upstreams with health-based routing - infra/Dockerfile.* — one per service - infra/prometheus.yml + grafana provisioning Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,249 @@
|
||||
{
|
||||
"uid": "apix-registry-perf",
|
||||
"title": "APIX Registry — Performance",
|
||||
"tags": ["apix", "registry"],
|
||||
"timezone": "browser",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"templating": { "list": [] },
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Request rate",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 0, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "sum(rate(http_server_requests_seconds_count{job=\"apix-registry\"}[5m]))",
|
||||
"instant": true,
|
||||
"legendFormat": "req/s"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "reqps",
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 50 },
|
||||
{ "color": "red", "value": 200 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "P50 latency",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 6, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.50, sum by (le) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\"}[5m]))) * 1000",
|
||||
"instant": true,
|
||||
"legendFormat": "P50 ms"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 100 },
|
||||
{ "color": "red", "value": 500 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "P95 latency",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 12, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.95, sum by (le) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\"}[5m]))) * 1000",
|
||||
"instant": true,
|
||||
"legendFormat": "P95 ms"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 200 },
|
||||
{ "color": "red", "value": 1000 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Error rate",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 18, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "100 * sum(rate(http_server_requests_seconds_count{job=\"apix-registry\",outcome=\"SERVER_ERROR\"}[5m])) / sum(rate(http_server_requests_seconds_count{job=\"apix-registry\"}[5m]))",
|
||||
"instant": true,
|
||||
"legendFormat": "error %"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "Latency by endpoint — P50 / P95 (ms)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 0, "y": 4, "w": 12, "h": 9 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.50, sum by (le, uri) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\"}[5m]))) * 1000",
|
||||
"legendFormat": "P50 {{uri}}"
|
||||
},
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.95, sum by (le, uri) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\"}[5m]))) * 1000",
|
||||
"legendFormat": "P95 {{uri}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "ms" },
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"title": "Request rate by endpoint (req/s)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 12, "y": 4, "w": 12, "h": 9 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "sum by (method, uri) (rate(http_server_requests_seconds_count{job=\"apix-registry\"}[5m]))",
|
||||
"legendFormat": "{{method}} {{uri}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "reqps" },
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"title": "HTTP status code distribution",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 0, "y": 13, "w": 12, "h": 8 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "sum by (status) (rate(http_server_requests_seconds_count{job=\"apix-registry\"}[5m]))",
|
||||
"legendFormat": "HTTP {{status}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "reqps" },
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": { "id": "byRegexp", "options": "HTTP 4.." },
|
||||
"properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }]
|
||||
},
|
||||
{
|
||||
"matcher": { "id": "byRegexp", "options": "HTTP 5.." },
|
||||
"properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
|
||||
}
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"title": "IoT replacements endpoint — P95 latency (ms)",
|
||||
"description": "Focused view on GET /services/{id}/replacements — the hot path for IoT device discovery.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 12, "y": 13, "w": 12, "h": 8 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.95, sum by (le) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\",uri=\"/services/{id}/replacements\"}[5m]))) * 1000",
|
||||
"legendFormat": "P95 /replacements"
|
||||
},
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.50, sum by (le) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\",uri=\"/services/{id}/replacements\"}[5m]))) * 1000",
|
||||
"legendFormat": "P50 /replacements"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "ms" },
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi" }
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: APIX
|
||||
type: file
|
||||
disableDeletion: true
|
||||
updateIntervalSeconds: 30
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards
|
||||
Reference in New Issue
Block a user