ops: add CI/CD pipeline, a/b rolling deploy, Gitea Actions workflow
Deploy to Production / deploy (push) Failing after 10s
Deploy to Production / deploy (push) Failing after 10s
- .gitea/workflows/deploy.yml — push-to-main triggers rolling deploy - scripts/deploy-bluegreen.sh — a-stack then b-stack restart; Maven runs in Docker (no JDK needed on runner host); Caddy reload at end - scripts/deploy-all.ps1 — emergency manual deploy from dev machine - infra/docker-compose.yml — a/b pairs per service; wget health checks; Gitea service; Prometheus/Grafana/DB ports restricted to localhost - infra/Caddyfile — dual upstreams with health-based routing - infra/Dockerfile.* — one per service - infra/prometheus.yml + grafana provisioning Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,60 @@
|
||||
www.api-index.org {
|
||||
reverse_proxy portal-a:8081 portal-b:8081 {
|
||||
lb_policy first
|
||||
health_uri /q/health/live
|
||||
health_interval 5s
|
||||
fail_duration 30s
|
||||
}
|
||||
header Strict-Transport-Security "max-age=31536000; includeSubDomains"
|
||||
}
|
||||
|
||||
api-index.org {
|
||||
reverse_proxy registry-a:8180 registry-b:8180 {
|
||||
lb_policy first
|
||||
health_uri /q/health/live
|
||||
health_interval 5s
|
||||
fail_duration 30s
|
||||
}
|
||||
|
||||
header {
|
||||
Strict-Transport-Security "max-age=31536000; includeSubDomains"
|
||||
X-Content-Type-Options "nosniff"
|
||||
X-Frame-Options "DENY"
|
||||
-Server
|
||||
}
|
||||
|
||||
log {
|
||||
output file /var/log/caddy/api-index.log
|
||||
format json
|
||||
}
|
||||
}
|
||||
|
||||
demo.api-index.org {
|
||||
reverse_proxy demo-a:8083 demo-b:8083 {
|
||||
lb_policy first
|
||||
health_uri /q/health/live
|
||||
health_interval 5s
|
||||
fail_duration 30s
|
||||
}
|
||||
header Strict-Transport-Security "max-age=31536000; includeSubDomains"
|
||||
header X-Content-Type-Options "nosniff"
|
||||
header -Server
|
||||
}
|
||||
|
||||
git.api-index.org {
|
||||
reverse_proxy gitea:3001
|
||||
header Strict-Transport-Security "max-age=31536000; includeSubDomains"
|
||||
header -Server
|
||||
}
|
||||
|
||||
# grafana.api-index.org — access via SSH tunnel for now:
|
||||
# ssh -L 3000:localhost:3000 deploy@204.168.156.179
|
||||
# Uncomment when DNS record is added and bcrypt hash is generated:
|
||||
# caddy hash-password --plaintext <password>
|
||||
# grafana.api-index.org {
|
||||
# basic_auth {
|
||||
# admin $2a$14$REPLACE_WITH_BCRYPT_HASH
|
||||
# }
|
||||
# reverse_proxy grafana:3000
|
||||
# header Strict-Transport-Security "max-age=31536000; includeSubDomains"
|
||||
# }
|
||||
@@ -0,0 +1,8 @@
|
||||
FROM eclipse-temurin:21-jre-alpine
|
||||
RUN addgroup -S apix && adduser -S apix -G apix
|
||||
WORKDIR /app
|
||||
COPY apix-demo/target/quarkus-app/ quarkus-app/
|
||||
RUN chown -R apix:apix /app
|
||||
USER apix
|
||||
EXPOSE 8083
|
||||
ENTRYPOINT ["java", "-jar", "quarkus-app/quarkus-run.jar"]
|
||||
@@ -0,0 +1,8 @@
|
||||
FROM eclipse-temurin:21-jre-alpine
|
||||
RUN addgroup -S apix && adduser -S apix -G apix
|
||||
WORKDIR /app
|
||||
COPY apix-portal/target/quarkus-app/ quarkus-app/
|
||||
RUN chown -R apix:apix /app
|
||||
USER apix
|
||||
EXPOSE 8081
|
||||
ENTRYPOINT ["java", "-jar", "quarkus-app/quarkus-run.jar"]
|
||||
@@ -0,0 +1,8 @@
|
||||
FROM eclipse-temurin:21-jre-alpine
|
||||
RUN addgroup -S apix && adduser -S apix -G apix
|
||||
WORKDIR /app
|
||||
COPY apix-registry/target/quarkus-app/ quarkus-app/
|
||||
RUN chown -R apix:apix /app
|
||||
USER apix
|
||||
EXPOSE 8180
|
||||
ENTRYPOINT ["java", "-jar", "quarkus-app/quarkus-run.jar"]
|
||||
@@ -0,0 +1,8 @@
|
||||
FROM eclipse-temurin:21-jre-alpine
|
||||
RUN addgroup -S apix && adduser -S apix -G apix
|
||||
WORKDIR /app
|
||||
COPY apix-spider/target/quarkus-app/ quarkus-app/
|
||||
RUN chown -R apix:apix /app
|
||||
USER apix
|
||||
EXPOSE 8082
|
||||
ENTRYPOINT ["java", "-jar", "quarkus-app/quarkus-run.jar"]
|
||||
+183
-19
@@ -1,8 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
# Production service topology. For local JVM dev mode see docker-compose.override.yml (Block 5 / I-02).
|
||||
# Images are built and pushed by CI (Block 5 / I-21); Dockerfiles are Block 5-6 (I-04 to I-06).
|
||||
|
||||
services:
|
||||
|
||||
db:
|
||||
@@ -12,7 +7,7 @@ services:
|
||||
POSTGRES_PASSWORD: ${APIX_DB_PASSWORD:-apix}
|
||||
POSTGRES_DB: ${APIX_DB_NAME:-apix}
|
||||
ports:
|
||||
- "${APIX_DB_PORT:-5432}:5432"
|
||||
- "127.0.0.1:${APIX_DB_PORT:-5432}:5432"
|
||||
volumes:
|
||||
- db_data:/var/lib/postgresql/data
|
||||
healthcheck:
|
||||
@@ -22,7 +17,9 @@ services:
|
||||
retries: 5
|
||||
restart: unless-stopped
|
||||
|
||||
registry:
|
||||
# ── Registry (a/b for rolling zero-downtime deploys) ──────────────────────
|
||||
|
||||
registry-a:
|
||||
image: apix-registry:latest
|
||||
ports:
|
||||
- "8180:8180"
|
||||
@@ -31,8 +28,14 @@ services:
|
||||
QUARKUS_DATASOURCE_USERNAME: ${APIX_DB_USER:-apix}
|
||||
QUARKUS_DATASOURCE_PASSWORD: ${APIX_DB_PASSWORD:-apix}
|
||||
APIX_API_KEY: ${APIX_API_KEY}
|
||||
APIX_REGISTRY_BASE_URL: ${APIX_REGISTRY_BASE_URL:-https://api-index.org}
|
||||
APIX_REGISTRY_NAME: ${APIX_REGISTRY_NAME:-APIX Registry}
|
||||
GLEIF_API_URL: ${GLEIF_API_URL:-https://api.gleif.org/api/v1}
|
||||
OPENCORPORATES_API_KEY: ${OPENCORPORATES_API_KEY:-}
|
||||
APIX_MAIL_SIGNING_PRIVATE_KEY: ${APIX_MAIL_SIGNING_PRIVATE_KEY:-}
|
||||
APIX_MAIL_SIGNING_PUBLIC_KEY: ${APIX_MAIL_SIGNING_PUBLIC_KEY:-}
|
||||
APIX_MAIL_SIGNING_KID: ${APIX_MAIL_SIGNING_KID:-dev}
|
||||
APIX_PORTAL_BASE_URL: ${APIX_PORTAL_BASE_URL:-https://www.api-index.org}
|
||||
SANCTIONS_CACHE_PATH: /app/sanctions
|
||||
LOG_LEVEL: ${LOG_LEVEL:-INFO}
|
||||
volumes:
|
||||
@@ -41,13 +44,43 @@ services:
|
||||
db:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -sf http://localhost:8180/q/health/live || exit 1"]
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:8180/q/health/live || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
|
||||
# Internal only — no public port exposure
|
||||
registry-b:
|
||||
image: apix-registry:latest
|
||||
environment:
|
||||
QUARKUS_DATASOURCE_JDBC_URL: jdbc:postgresql://db:5432/${APIX_DB_NAME:-apix}
|
||||
QUARKUS_DATASOURCE_USERNAME: ${APIX_DB_USER:-apix}
|
||||
QUARKUS_DATASOURCE_PASSWORD: ${APIX_DB_PASSWORD:-apix}
|
||||
APIX_API_KEY: ${APIX_API_KEY}
|
||||
APIX_REGISTRY_BASE_URL: ${APIX_REGISTRY_BASE_URL:-https://api-index.org}
|
||||
APIX_REGISTRY_NAME: ${APIX_REGISTRY_NAME:-APIX Registry}
|
||||
GLEIF_API_URL: ${GLEIF_API_URL:-https://api.gleif.org/api/v1}
|
||||
OPENCORPORATES_API_KEY: ${OPENCORPORATES_API_KEY:-}
|
||||
APIX_MAIL_SIGNING_PRIVATE_KEY: ${APIX_MAIL_SIGNING_PRIVATE_KEY:-}
|
||||
APIX_MAIL_SIGNING_PUBLIC_KEY: ${APIX_MAIL_SIGNING_PUBLIC_KEY:-}
|
||||
APIX_MAIL_SIGNING_KID: ${APIX_MAIL_SIGNING_KID:-dev}
|
||||
APIX_PORTAL_BASE_URL: ${APIX_PORTAL_BASE_URL:-https://www.api-index.org}
|
||||
SANCTIONS_CACHE_PATH: /app/sanctions
|
||||
LOG_LEVEL: ${LOG_LEVEL:-INFO}
|
||||
volumes:
|
||||
- sanctions_cache:/app/sanctions
|
||||
depends_on:
|
||||
db:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:8180/q/health/live || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
|
||||
# ── Spider (single — cron job, no in-flight request concern) ─────────────
|
||||
|
||||
spider:
|
||||
image: apix-spider:latest
|
||||
environment:
|
||||
@@ -60,28 +93,88 @@ services:
|
||||
db:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -sf http://localhost:8082/q/health/live || exit 1"]
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:8082/q/health/live || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
|
||||
portal:
|
||||
image: apix-portal:latest
|
||||
ports:
|
||||
- "8081:8081"
|
||||
# ── Demo (a/b) ────────────────────────────────────────────────────────────
|
||||
|
||||
demo-a:
|
||||
image: apix-demo:latest
|
||||
environment:
|
||||
REGISTRY_BASE_URL: http://registry:8180
|
||||
QUARKUS_DATASOURCE_JDBC_URL: jdbc:postgresql://db:5432/${APIX_DB_NAME:-apix}
|
||||
QUARKUS_DATASOURCE_USERNAME: ${APIX_DB_USER:-apix}
|
||||
QUARKUS_DATASOURCE_PASSWORD: ${APIX_DB_PASSWORD:-apix}
|
||||
APIX_REGISTRY_URL: http://registry-a:8180
|
||||
APIX_API_KEY: ${APIX_API_KEY}
|
||||
APIX_DEMO_BASE_URL: ${APIX_DEMO_BASE_URL:-https://demo.api-index.org}
|
||||
LOG_LEVEL: ${LOG_LEVEL:-INFO}
|
||||
depends_on:
|
||||
- registry
|
||||
registry-a:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "curl -sf http://localhost:8081/q/health/live || exit 1"]
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:8083/q/health/live || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
|
||||
demo-b:
|
||||
image: apix-demo:latest
|
||||
environment:
|
||||
QUARKUS_DATASOURCE_JDBC_URL: jdbc:postgresql://db:5432/${APIX_DB_NAME:-apix}
|
||||
QUARKUS_DATASOURCE_USERNAME: ${APIX_DB_USER:-apix}
|
||||
QUARKUS_DATASOURCE_PASSWORD: ${APIX_DB_PASSWORD:-apix}
|
||||
APIX_REGISTRY_URL: http://registry-b:8180
|
||||
APIX_API_KEY: ${APIX_API_KEY}
|
||||
APIX_DEMO_BASE_URL: ${APIX_DEMO_BASE_URL:-https://demo.api-index.org}
|
||||
LOG_LEVEL: ${LOG_LEVEL:-INFO}
|
||||
depends_on:
|
||||
registry-b:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:8083/q/health/live || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
|
||||
# ── Portal (a/b) ──────────────────────────────────────────────────────────
|
||||
|
||||
portal-a:
|
||||
image: apix-portal:latest
|
||||
ports:
|
||||
- "8081:8081"
|
||||
environment:
|
||||
APIX_REGISTRY_URL: http://registry-a:8180
|
||||
LOG_LEVEL: ${LOG_LEVEL:-INFO}
|
||||
depends_on:
|
||||
- registry-a
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:8081/q/health/live || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
|
||||
portal-b:
|
||||
image: apix-portal:latest
|
||||
environment:
|
||||
APIX_REGISTRY_URL: http://registry-b:8180
|
||||
LOG_LEVEL: ${LOG_LEVEL:-INFO}
|
||||
depends_on:
|
||||
- registry-b
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:8081/q/health/live || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
|
||||
# ── Edge proxy ────────────────────────────────────────────────────────────
|
||||
|
||||
caddy:
|
||||
image: caddy:2-alpine
|
||||
ports:
|
||||
@@ -93,8 +186,76 @@ services:
|
||||
- caddy_data:/data
|
||||
- caddy_config:/config
|
||||
depends_on:
|
||||
- registry
|
||||
- portal
|
||||
- registry-a
|
||||
- portal-a
|
||||
restart: unless-stopped
|
||||
|
||||
# ── Source control & CI ───────────────────────────────────────────────────
|
||||
|
||||
gitea:
|
||||
image: gitea/gitea:1
|
||||
environment:
|
||||
USER_UID: "1000"
|
||||
USER_GID: "1000"
|
||||
GITEA__server__DOMAIN: git.api-index.org
|
||||
GITEA__server__ROOT_URL: https://git.api-index.org
|
||||
GITEA__server__HTTP_PORT: "3001"
|
||||
GITEA__server__SSH_PORT: "2222"
|
||||
GITEA__server__SSH_DOMAIN: git.api-index.org
|
||||
GITEA__database__DB_TYPE: sqlite3
|
||||
GITEA__security__SECRET_KEY: ${GITEA_SECRET_KEY}
|
||||
GITEA__security__INTERNAL_TOKEN: ${GITEA_INTERNAL_TOKEN}
|
||||
GITEA__security__INSTALL_LOCK: "true"
|
||||
GITEA__service__DISABLE_REGISTRATION: "true"
|
||||
GITEA__service__REQUIRE_SIGNIN_VIEW: "false"
|
||||
GITEA__actions__ENABLED: "true"
|
||||
GITEA__log__LEVEL: warn
|
||||
ports:
|
||||
- "127.0.0.1:3001:3001"
|
||||
- "2222:2222"
|
||||
volumes:
|
||||
- gitea_data:/data
|
||||
restart: unless-stopped
|
||||
|
||||
# ── Observability ─────────────────────────────────────────────────────────
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.53.1
|
||||
volumes:
|
||||
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
- prometheus_data:/prometheus
|
||||
command:
|
||||
- --config.file=/etc/prometheus/prometheus.yml
|
||||
- --storage.tsdb.path=/prometheus
|
||||
- --storage.tsdb.retention.time=30d
|
||||
- --web.enable-lifecycle
|
||||
ports:
|
||||
- "127.0.0.1:9090:9090"
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:9090/-/healthy || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:11.1.3
|
||||
ports:
|
||||
- "127.0.0.1:3000:3000"
|
||||
environment:
|
||||
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_ADMIN_PASSWORD:-admin}
|
||||
GF_USERS_ALLOW_SIGN_UP: "false"
|
||||
GF_SERVER_ROOT_URL: ${GRAFANA_ROOT_URL:-http://localhost:3000}
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
depends_on:
|
||||
- prometheus
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:3000/api/health || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
@@ -102,3 +263,6 @@ volumes:
|
||||
sanctions_cache:
|
||||
caddy_data:
|
||||
caddy_config:
|
||||
prometheus_data:
|
||||
grafana_data:
|
||||
gitea_data:
|
||||
|
||||
@@ -0,0 +1,249 @@
|
||||
{
|
||||
"uid": "apix-registry-perf",
|
||||
"title": "APIX Registry — Performance",
|
||||
"tags": ["apix", "registry"],
|
||||
"timezone": "browser",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"refresh": "30s",
|
||||
"time": { "from": "now-1h", "to": "now" },
|
||||
"templating": { "list": [] },
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Request rate",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 0, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "sum(rate(http_server_requests_seconds_count{job=\"apix-registry\"}[5m]))",
|
||||
"instant": true,
|
||||
"legendFormat": "req/s"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "reqps",
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 50 },
|
||||
{ "color": "red", "value": 200 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "P50 latency",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 6, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.50, sum by (le) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\"}[5m]))) * 1000",
|
||||
"instant": true,
|
||||
"legendFormat": "P50 ms"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 100 },
|
||||
{ "color": "red", "value": 500 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "P95 latency",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 12, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.95, sum by (le) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\"}[5m]))) * 1000",
|
||||
"instant": true,
|
||||
"legendFormat": "P95 ms"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "ms",
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 200 },
|
||||
{ "color": "red", "value": 1000 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Error rate",
|
||||
"type": "stat",
|
||||
"gridPos": { "x": 18, "y": 0, "w": 6, "h": 4 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "100 * sum(rate(http_server_requests_seconds_count{job=\"apix-registry\",outcome=\"SERVER_ERROR\"}[5m])) / sum(rate(http_server_requests_seconds_count{job=\"apix-registry\"}[5m]))",
|
||||
"instant": true,
|
||||
"legendFormat": "error %"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"color": { "mode": "thresholds" },
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 1 },
|
||||
{ "color": "red", "value": 5 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"options": {
|
||||
"reduceOptions": { "calcs": ["lastNotNull"] },
|
||||
"colorMode": "background",
|
||||
"graphMode": "none"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "Latency by endpoint — P50 / P95 (ms)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 0, "y": 4, "w": 12, "h": 9 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.50, sum by (le, uri) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\"}[5m]))) * 1000",
|
||||
"legendFormat": "P50 {{uri}}"
|
||||
},
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.95, sum by (le, uri) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\"}[5m]))) * 1000",
|
||||
"legendFormat": "P95 {{uri}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "ms" },
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"title": "Request rate by endpoint (req/s)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 12, "y": 4, "w": 12, "h": 9 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "sum by (method, uri) (rate(http_server_requests_seconds_count{job=\"apix-registry\"}[5m]))",
|
||||
"legendFormat": "{{method}} {{uri}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "reqps" },
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"title": "HTTP status code distribution",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 0, "y": 13, "w": 12, "h": 8 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "sum by (status) (rate(http_server_requests_seconds_count{job=\"apix-registry\"}[5m]))",
|
||||
"legendFormat": "HTTP {{status}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "reqps" },
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": { "id": "byRegexp", "options": "HTTP 4.." },
|
||||
"properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } }]
|
||||
},
|
||||
{
|
||||
"matcher": { "id": "byRegexp", "options": "HTTP 5.." },
|
||||
"properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }]
|
||||
}
|
||||
]
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi" }
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"title": "IoT replacements endpoint — P95 latency (ms)",
|
||||
"description": "Focused view on GET /services/{id}/replacements — the hot path for IoT device discovery.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "x": 12, "y": 13, "w": 12, "h": 8 },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.95, sum by (le) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\",uri=\"/services/{id}/replacements\"}[5m]))) * 1000",
|
||||
"legendFormat": "P95 /replacements"
|
||||
},
|
||||
{
|
||||
"datasource": "Prometheus",
|
||||
"expr": "histogram_quantile(0.50, sum by (le) (rate(http_server_requests_seconds_bucket{job=\"apix-registry\",uri=\"/services/{id}/replacements\"}[5m]))) * 1000",
|
||||
"legendFormat": "P50 /replacements"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": { "unit": "ms" },
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": { "calcs": ["mean", "max"], "displayMode": "table", "placement": "bottom" },
|
||||
"tooltip": { "mode": "multi" }
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: APIX
|
||||
type: file
|
||||
disableDeletion: true
|
||||
updateIntervalSeconds: 30
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards
|
||||
@@ -0,0 +1,9 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
editable: false
|
||||
@@ -0,0 +1,25 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: apix-registry
|
||||
metrics_path: /q/metrics
|
||||
static_configs:
|
||||
- targets: ['registry:8180']
|
||||
labels:
|
||||
service: registry
|
||||
|
||||
- job_name: apix-spider
|
||||
metrics_path: /q/metrics
|
||||
static_configs:
|
||||
- targets: ['spider:8082']
|
||||
labels:
|
||||
service: spider
|
||||
|
||||
- job_name: apix-portal
|
||||
metrics_path: /q/metrics
|
||||
static_configs:
|
||||
- targets: ['portal:8081']
|
||||
labels:
|
||||
service: portal
|
||||
Reference in New Issue
Block a user