version: 1

defaults:
  organization_slug: wiley-tech
  workspace_slug: main
  app_version: latest
  labels:
    app.kubernetes.io/managed-by: terraform
    app.kubernetes.io/part-of: alphaswarm

tooling:
  terraform:
    binary_setting: ALPHASWARM_TERRAFORM_BINARY
    min_version: "1.10"
    provider_mirror_path: data/terraform/plugin-cache
    plugin_cache_path: data/terraform/plugin-cache-runtime
    cli_config_file: data/terraform/terraform.tfrc
  local_shell:
    command: bash
    windows_path_prepend:
      - C:/Program Files/Git/bin

services:
  - id: alphaswarm-core
    aliases: [alphaswarm-api]
    label: AlphaSwarm Core API
    role: api
    workload: deployment
    app_label: alphaswarm-core
    container: api
    image_key: api
    port: 8000
    health_path: /readyz
    restartable: true
    logs_enabled: true
  - id: alphaswarm-worker
    label: AlphaSwarm Worker
    role: worker
    workload: deployment
    app_label: alphaswarm-worker
    container: worker
    image_key: worker
    restartable: true
    logs_enabled: true
  # Phase 4c worker/executor split — heavy-compute task executor. Drains
  # backtest/training/ml/agents/factors/rag queues; the orchestration
  # ``alphaswarm-worker`` above keeps the light/coordination queues.
  - id: alphaswarm-executor
    label: AlphaSwarm Executor
    role: executor
    workload: deployment
    app_label: alphaswarm-executor
    container: executor
    image_key: executor
    restartable: true
    logs_enabled: true
  - id: alphaswarm-beat
    label: AlphaSwarm Beat
    role: scheduler
    workload: deployment
    app_label: alphaswarm-beat
    container: beat
    image_key: beat
    restartable: true
    logs_enabled: true
  - id: alphaswarm-client
    aliases: [alphaswarm-frontend]
    label: AlphaSwarm Client
    role: frontend
    workload: deployment
    app_label: alphaswarm-client
    container: client
    image_key: frontend
    port: 80
    health_path: /
    restartable: true
    logs_enabled: true
  - id: alphaswarm-ui
    aliases: [alphaswarm-frontend-cloud, alphaswarm-paas]
    label: AlphaSwarm UI (Cloud)
    role: frontend
    workload: deployment
    app_label: alphaswarm-ui
    namespace: alphaswarm-ui
    container: ui
    image_key: ui
    port: 80
    health_path: /api/healthz
    restartable: true
    logs_enabled: true
    endpoints:
      url: http://alphaswarm-ui.alphaswarm-ui.svc.cluster.local:80
      # Apex (alpha-swarm.ai) is now the public site (Cloudflare Pages,
      # alphaswarm-site below); the operator dashboard lives at app.*.
      public_url: https://app.alpha-swarm.ai
  # Hosted demo — alphaswarm_ui running in read-only demo mode, fronted by
  # Cloudflare Access (terraform/modules/cloudflare_access_demo; demo-edge stack).
  - id: alphaswarm-ui-demo
    aliases: [alphaswarm-demo]
    label: AlphaSwarm UI (Demo)
    role: frontend
    workload: deployment
    app_label: alphaswarm-ui-demo
    namespace: alphaswarm-ui
    container: ui
    image_key: ui
    port: 80
    health_path: /api/healthz
    restartable: true
    logs_enabled: true
    endpoints:
      url: http://alphaswarm-ui-demo.alphaswarm-ui.svc.cluster.local:80
      public_url: https://demo.alpha-swarm.ai
  # Public site at the apex — marketing landing + /docs + /demo. An EDGE
  # property (Cloudflare Pages), NOT a cluster workload; stays up when the
  # cluster is degraded (terraform/modules/cloudflare_pages_docs; docs-edge stack).
  - id: alphaswarm-site
    label: AlphaSwarm Public Site (apex)
    role: edge
    workload: external
    endpoints:
      public_url: https://alpha-swarm.ai
      docs_url: https://alpha-swarm.ai/docs
      demo_url: https://alpha-swarm.ai/demo
  - id: alphaswarm-cp
    label: AlphaSwarm Control Plane
    role: control-plane
    workload: deployment
    app_label: alphaswarm-cp
    container: control-plane
    image_key: cp
    port: 9000
    health_path: /manage/readyz
    restartable: true
    logs_enabled: true
    namespace: alphaswarm
    endpoints:
      # Cluster-internal base URL — every alphaswarm_* service reaches the
      # controller through this. Honours the canonical pattern
      # ``http://<svc>.<ns>.svc.cluster.local:<port>``.
      url: http://alphaswarm-cp.alphaswarm.svc.cluster.local:9000
      # ``manage`` is the resolution target for ``cluster_mgmt_url`` /
      # ``AQPControlPlaneClient._resolve_base_url`` per
      # ``URL_FALLBACK_FIELDS`` in ``alphaswarm/config/topology_fallback.py``.
      manage: http://alphaswarm-cp.alphaswarm.svc.cluster.local:9000/manage
      # ``auth`` is the unified identity-broker surface the launcher /
      # auth-provider / connection-manager refactor introduces in
      # Phase 1 (M2M + Agent Identity + federated discovery) and
      # extends in Phase 3 (login / callback / logout / refresh / me /
      # stepup / device). Single hostname so the SPAs and CLI agree
      # on the broker URL.
      auth: http://alphaswarm-cp.alphaswarm.svc.cluster.local:9000/auth
      # ``proxy`` is reserved for the Phase 5 connection-proxy mesh.
      # Wire is added now so ``URL_FALLBACK_FIELDS`` can land in
      # advance; the route itself ships in Phase 5.
      proxy: http://alphaswarm-cp.alphaswarm.svc.cluster.local:9000/proxy
      # External hostname behind the Cloudflare tunnel. Operators wire
      # this once their tunnel is up; the in-cluster URL above keeps
      # working for service-to-service traffic.
      public_url: https://manage.alpha-swarm.ai
  - id: alphaswarm-admin
    aliases: [alphaswarm-admin-bff]
    label: AlphaSwarm Admin (Internal)
    # Internal admin BFF + Next.js frontend, audit-first surface for
    # managed services + company accounts. Authenticates human admins
    # via the AlphaSwarm staff Entra tenant and outbound M2M via per-deployment
    # Entra Agent Identities (alphaswarm_admin_agent_identity Terraform
    # module). NEVER imports alphaswarm.* — HTTP-only to alphaswarm_controller
    # + the AlphaSwarm monolith. See alphaswarm_admin/AGENTS.md.
    role: admin
    workload: deployment
    app_label: alphaswarm-admin
    namespace: alphaswarm-admin
    container: admin
    image_key: admin
    port: 8900
    health_path: /admin/healthz
    restartable: true
    logs_enabled: true
    endpoints:
      url: http://alphaswarm-admin.alphaswarm-admin.svc.cluster.local:8900
      public_url: https://admin.alpha-swarm.ai
  - id: alphaswarm-ml-mcp
    label: AlphaSwarm MLOps MCP Server
    # Dedicated MCP server publishing the ``data.ml.*`` slice of
    # the DataMCP catalog under its own audience claim. Initial
    # deployment piggybacks on the API pod (the FastAPI router at
    # ``/mcp/ml``); production rollouts can split it into its own
    # sidecar / pod when GPU isolation matters. See AGENTS rule 49.
    role: mcp
    workload: deployment
    app_label: alphaswarm-core
    container: api
    image_key: api
    port: 8000
    health_path: /mcp/ml/tools
    restartable: true
    logs_enabled: true
    endpoints:
      http: http://alphaswarm-core.alphaswarm.svc.cluster.local:8000/mcp/ml
  - id: alphaswarm-docs
    aliases: [alphaswarm-docs-edge, docs-alpha-swarm-ai]
    label: AlphaSwarm Docs (Edge Property)
    role: docs
    # Edge property — NOT a cluster workload. Hosted on Cloudflare
    # Pages at https://docs.alpha-swarm.ai. See AGENTS rule 47 +
    # alphaswarm_platform/terraform/modules/cloudflare_pages_docs/.
    workload: external
    restartable: false
    logs_enabled: false
    endpoints:
      url: https://docs.alpha-swarm.ai
      public_url: https://docs.alpha-swarm.ai
      mcp_url: https://docs.alpha-swarm.ai/mcp
      llms_txt: https://docs.alpha-swarm.ai/llms.txt
      llms_full_txt: https://docs.alpha-swarm.ai/llms-full.txt
  - id: alphaswarm-docs-status
    aliases: [status-alpha-swarm-ai]
    label: AlphaSwarm Status (Instatus)
    role: status-page
    # Separate Cloudflare zone; lives at https://status.alpha-swarm.ai.
    # Standalone for incident-time resilience — must stay up when
    # the rest of the stack is degraded.
    workload: external
    restartable: false
    logs_enabled: false
    endpoints:
      url: https://status.alpha-swarm.ai
      public_url: https://status.alpha-swarm.ai
  - id: alphaswarm-docs-archive
    aliases: [archive-alpha-swarm-ai]
    label: AlphaSwarm Docs (Sunset Archive)
    role: docs-archive
    # Sunset Stripe-style API epochs freeze on archive.alpha-swarm.ai.
    workload: external
    restartable: false
    logs_enabled: false
    endpoints:
      url: https://archive.alpha-swarm.ai
      public_url: https://archive.alpha-swarm.ai
  - id: postgres
    label: PostgreSQL pgvector
    role: database
    workload: statefulset
    app_label: postgres
    container: postgres
    port: 5432
    storage: 5Gi
    restartable: false
    logs_enabled: true
  - id: redis
    label: Redis
    role: cache
    workload: statefulset
    app_label: redis
    container: redis
    port: 6379
    storage: 2Gi
    restartable: false
    logs_enabled: true
  - id: neo4j
    label: Neo4j
    role: graph
    workload: statefulset
    app_label: neo4j
    container: neo4j
    port: 7474
    storage: 5Gi
    restartable: false
    logs_enabled: true
  - id: chromadb
    label: ChromaDB
    role: vector-store
    workload: deployment
    app_label: chromadb
    container: chromadb
    port: 8000
    restartable: false
    logs_enabled: true
  - id: mlflow
    label: MLflow
    role: mlops
    workload: deployment
    app_label: mlflow
    container: mlflow
    port: 5000
    restartable: false
    logs_enabled: true
  - id: otel-collector
    label: OpenTelemetry Collector
    role: observability
    workload: deployment
    app_label: otel-collector
    container: otel-collector
    port: 4317
    restartable: false
    logs_enabled: true
  - id: jaeger
    label: Jaeger
    role: tracing
    workload: deployment
    app_label: jaeger
    container: jaeger
    port: 16686
    restartable: false
    logs_enabled: true

  # =========================================================================
  # Phase 1 of the AlphaSwarm infra-expansion plan: lift-and-shift declaration of
  # AlphaSwarm-owned shared cluster services. Workload manifests fill in during
  # Phase 2 (additive: redpanda, questdb, phoenix, hudi, kube-prom-stack,
  # opentelemetry-operator) and Phase 5 (legacy lift-and-shift from
  # rpi_kubernetes/kubernetes/{base-services,observability,mlops}/).
  # The namespace + endpoints fields make these the canonical addresses
  # that alphaswarm.config.topology_fallback resolves to when ALPHASWARM_* env vars are
  # unset on URL-typed Settings fields.
  # =========================================================================

  # --- Streaming (Strimzi + Redpanda side-by-side per plan question 2) ----
  - id: kafka
    aliases: [kafka-strimzi, strimzi]
    label: Strimzi Kafka
    role: streaming
    workload: statefulset
    app_label: trading-kafka
    cluster: streaming.strimzi
    namespace: alphaswarm-streaming
    port: 9092
    storage: 50Gi
    restartable: false
    logs_enabled: true
    protocols:
      kafka: 9092
      kafka_tls: 9093
      kafka_external: 9094
    endpoints:
      bootstrap: trading-kafka-kafka-bootstrap.alphaswarm-streaming.svc.cluster.local:9092
      admin_bootstrap: trading-kafka-kafka-bootstrap.alphaswarm-streaming.svc.cluster.local:9092
  - id: schema-registry
    label: Apicurio Schema Registry
    role: streaming
    workload: deployment
    app_label: apicurio-registry
    cluster: streaming.strimzi
    namespace: alphaswarm-streaming
    port: 8080
    restartable: false
    logs_enabled: true
    endpoints:
      ccompat: http://apicurio-registry.alphaswarm-streaming.svc.cluster.local:8080/apis/ccompat/v7
  - id: redpanda
    label: Redpanda
    role: streaming
    workload: statefulset
    app_label: redpanda
    cluster: streaming.redpanda
    namespace: alphaswarm-streaming
    port: 9092
    storage: 100Gi
    restartable: false
    logs_enabled: true
    protocols:
      kafka: 9092
      admin: 9644
      schema_registry: 8081
      pandaproxy: 8082
    endpoints:
      bootstrap: redpanda.alphaswarm-streaming.svc.cluster.local:9092
      admin: http://redpanda.alphaswarm-streaming.svc.cluster.local:9644
      schema_registry: http://redpanda.alphaswarm-streaming.svc.cluster.local:8081
  - id: redpanda-connect
    label: Redpanda Connect
    role: streaming
    workload: deployment
    app_label: redpanda-connect
    cluster: streaming.redpanda
    namespace: alphaswarm-streaming
    port: 4195
    restartable: true
    logs_enabled: true
    endpoints:
      ui: http://redpanda-connect.alphaswarm-streaming.svc.cluster.local:4195
  - id: flink
    label: Apache Flink Session Cluster
    role: streaming
    workload: deployment
    app_label: flink-trading
    cluster: streaming.flink
    namespace: alphaswarm-streaming
    port: 8081
    restartable: false
    logs_enabled: true
    endpoints:
      rest: http://flink-trading-rest.alphaswarm-streaming.svc.cluster.local:8081

  # --- Time-series database (Phase 2 - new) -------------------------------
  - id: questdb
    label: QuestDB
    role: timeseries
    workload: statefulset
    app_label: questdb
    cluster: timeseries.questdb
    namespace: alphaswarm-timeseries
    port: 9000
    storage: 200Gi
    restartable: false
    logs_enabled: true
    protocols:
      http: 9000
      ilp_tcp: 9009
      pgwire: 8812
    endpoints:
      http: http://questdb.alphaswarm-timeseries.svc.cluster.local:9000
      ilp_tcp: questdb.alphaswarm-timeseries.svc.cluster.local:9009
      pgwire: postgresql://alphaswarm:alphaswarm@questdb.alphaswarm-timeseries.svc.cluster.local:8812/qdb

  # --- Lakehouse (Iceberg canonical + Hudi additive per plan section D) ---
  - id: polaris
    label: Polaris Iceberg Catalog
    role: lakehouse
    workload: deployment
    app_label: polaris
    cluster: lakehouse.iceberg
    namespace: alphaswarm-lakehouse
    port: 8181
    restartable: false
    logs_enabled: true
    endpoints:
      rest: http://polaris.alphaswarm-lakehouse.svc.cluster.local:8181
      iceberg_rest: http://polaris.alphaswarm-lakehouse.svc.cluster.local:8181/api/catalog/v1
  - id: hudi
    label: Apache Hudi (additive)
    role: lakehouse
    workload: external
    app_label: hudi
    cluster: lakehouse.hudi
    namespace: alphaswarm-lakehouse
    restartable: false
    logs_enabled: false
    endpoints:
      warehouse: s3://alphaswarm-lakehouse/hudi/
      metastore: thrift://polaris.alphaswarm-lakehouse.svc.cluster.local:9083
  - id: spark-operator
    label: Spark Operator (Hudi writer host)
    role: lakehouse
    workload: deployment
    app_label: spark-operator
    cluster: lakehouse.hudi
    namespace: alphaswarm-mlops
    port: 8443
    restartable: true
    logs_enabled: true

  # --- Observability (kube-prometheus-stack + OTel + Phoenix - Phase 2) ---
  - id: prometheus
    label: Prometheus (kube-prometheus-stack)
    role: observability
    workload: statefulset
    app_label: prometheus
    cluster: observability.metrics
    namespace: alphaswarm-observability
    port: 9090
    storage: 100Gi
    restartable: false
    logs_enabled: true
    endpoints:
      query: http://prometheus-operated.alphaswarm-observability.svc.cluster.local:9090
      remote_write: http://prometheus-operated.alphaswarm-observability.svc.cluster.local:9090/api/v1/write
  - id: grafana
    label: Grafana
    role: observability
    workload: deployment
    app_label: grafana
    cluster: observability.metrics
    namespace: alphaswarm-observability
    port: 3000
    restartable: true
    logs_enabled: true
    endpoints:
      ui: http://grafana.alphaswarm-observability.svc.cluster.local:3000
  - id: tempo
    label: Grafana Tempo
    role: tracing
    workload: statefulset
    app_label: tempo
    cluster: observability.tracing
    namespace: alphaswarm-observability
    port: 3200
    storage: 100Gi
    restartable: false
    logs_enabled: true
    protocols:
      http: 3200
      otlp_grpc: 4317
      otlp_http: 4318
    endpoints:
      query: http://tempo.alphaswarm-observability.svc.cluster.local:3200
      otlp_grpc: tempo.alphaswarm-observability.svc.cluster.local:4317
      otlp_http: http://tempo.alphaswarm-observability.svc.cluster.local:4318
  - id: loki
    label: Grafana Loki
    role: logging
    workload: statefulset
    app_label: loki
    cluster: observability.logging
    namespace: alphaswarm-observability
    port: 3100
    storage: 200Gi
    restartable: false
    logs_enabled: true
    endpoints:
      push: http://loki.alphaswarm-observability.svc.cluster.local:3100/loki/api/v1/push
      query: http://loki.alphaswarm-observability.svc.cluster.local:3100
  - id: phoenix
    label: Arize Phoenix (LLM/agent/RAG observability)
    role: observability
    workload: deployment
    app_label: phoenix
    cluster: observability.ai
    namespace: alphaswarm-observability
    port: 6006
    storage: 16Gi
    restartable: true
    logs_enabled: true
    protocols:
      ui_http: 6006
      otlp_grpc: 4317
    endpoints:
      ui: http://phoenix.alphaswarm-observability.svc.cluster.local:6006
      otlp_http: http://phoenix.alphaswarm-observability.svc.cluster.local:6006
      otlp_grpc: phoenix.alphaswarm-observability.svc.cluster.local:4317
  - id: marquez
    label: Marquez (OpenLineage)
    role: observability
    workload: deployment
    app_label: marquez
    cluster: observability.lineage
    namespace: alphaswarm-observability
    port: 5000
    storage: 4Gi
    restartable: true
    logs_enabled: true
    protocols:
      http: 5000
    endpoints:
      http: http://marquez.alphaswarm-observability.svc.cluster.local:5000
      lineage: http://marquez.alphaswarm-observability.svc.cluster.local:5000/api/v1/lineage
  - id: phoenix-postgres
    label: Phoenix Postgres backend
    role: database
    workload: statefulset
    app_label: phoenix-postgresql
    cluster: observability.ai
    namespace: alphaswarm-observability
    port: 5432
    storage: 16Gi
    restartable: false
    logs_enabled: true

  # --- Data services (lift-and-shift; canonical owner now AlphaSwarm) ------------
  - id: postgres-shared
    label: Shared PostgreSQL
    role: database
    workload: statefulset
    app_label: postgresql
    cluster: data-services
    namespace: alphaswarm-data-services
    port: 5432
    storage: 50Gi
    restartable: false
    logs_enabled: true
    endpoints:
      dsn: postgresql+psycopg2://alphaswarm:alphaswarm@postgresql.alphaswarm-data-services.svc.cluster.local:5432/alphaswarm
      async_dsn: postgresql+asyncpg://alphaswarm:alphaswarm@postgresql.alphaswarm-data-services.svc.cluster.local:5432/alphaswarm
  - id: redis-shared
    label: Shared Redis (cache + RAG)
    role: cache
    workload: statefulset
    app_label: redis
    cluster: data-services
    namespace: alphaswarm-data-services
    port: 6379
    storage: 8Gi
    restartable: false
    logs_enabled: true
    endpoints:
      url: redis://redis.alphaswarm-data-services.svc.cluster.local:6379/0
      pubsub_url: redis://redis.alphaswarm-data-services.svc.cluster.local:6379/1
      cache_url: redis://redis.alphaswarm-data-services.svc.cluster.local:6379/2
  - id: minio
    label: MinIO (S3-compatible object storage)
    role: storage
    workload: statefulset
    app_label: minio
    cluster: data-services
    namespace: alphaswarm-data-services
    port: 9000
    storage: 500Gi
    restartable: false
    logs_enabled: true
    protocols:
      s3: 9000
      console: 9001
    endpoints:
      endpoint: http://minio.alphaswarm-data-services.svc.cluster.local:9000
      console: http://minio.alphaswarm-data-services.svc.cluster.local:9001

  # --- MLOps (lift-and-shift Phase 5) -------------------------------------
  - id: mlflow
    label: MLflow Tracking Server
    role: mlops
    workload: deployment
    app_label: mlflow
    cluster: mlops.tracking
    namespace: alphaswarm-mlops
    port: 5000
    restartable: true
    logs_enabled: true
    endpoints:
      tracking: http://mlflow.alphaswarm-mlops.svc.cluster.local:5000
  - id: dagster
    label: Dagster Webserver
    role: orchestration
    workload: deployment
    app_label: dagster-webserver
    cluster: mlops.orchestration
    namespace: alphaswarm-mlops
    port: 80
    restartable: true
    logs_enabled: true
    endpoints:
      webserver: http://dagster-dagster-webserver.alphaswarm-mlops.svc.cluster.local
      graphql: http://dagster-dagster-webserver.alphaswarm-mlops.svc.cluster.local/graphql

  # --- ELT (lift-and-shift Phase 5) ---------------------------------------
  - id: airbyte
    label: Airbyte (visual builder + workspace)
    role: elt
    workload: deployment
    app_label: airbyte-server
    cluster: elt.airbyte
    namespace: alphaswarm-elt
    port: 8001
    restartable: true
    logs_enabled: true
    endpoints:
      ui: http://airbyte-server-svc.alphaswarm-elt.svc.cluster.local:8001
      api: http://airbyte-server-svc.alphaswarm-elt.svc.cluster.local:8001/api/v1

  # --- Vector + metadata stores (lift-and-shift Phase 5) ------------------
  - id: chromadb
    label: ChromaDB (lightweight vector store)
    role: vector-store
    workload: deployment
    app_label: chromadb
    cluster: data-services
    namespace: alphaswarm-data-services
    port: 8000
    restartable: false
    logs_enabled: true
    endpoints:
      host: http://chromadb.alphaswarm-data-services.svc.cluster.local:8000
  - id: datahub
    label: DataHub GMS
    role: metadata
    workload: deployment
    app_label: datahub-datahub-gms
    cluster: data-services
    namespace: alphaswarm-data-services
    port: 8080
    restartable: true
    logs_enabled: true
    endpoints:
      gms: http://datahub-datahub-gms.alphaswarm-data-services.svc.cluster.local:8080

  # ---------------------------------------------------------------------------
  # Phase G of the AWS hybrid rollout — AWS-native managed services that
  # are NOT cluster-resident. The seven entries below are referenced from
  # the ``aws`` target below and the ``cell-shared-std-us-east-1a`` cell.
  # ---------------------------------------------------------------------------
  - id: bedrock-agentcore-runtime
    label: Bedrock AgentCore Runtime
    component: agentic
    cluster: aws
    namespace: aws-managed
    protocols: [https]
    restartable: false
    logs_enabled: true
    endpoints:
      runtime_arn: hydrate://ssm/alphaswarm/${env}/agentcore_runtime_arn
    annotations:
      alphaswarm.io/managed-by: aws-bedrock
      alphaswarm.io/cpu-architecture: arm64
      alphaswarm.io/session_max_seconds: "28800"

  - id: bedrock-agentcore-gateway
    label: Bedrock AgentCore Gateway
    component: agentic
    cluster: aws
    namespace: aws-managed
    protocols: [https]
    restartable: false
    logs_enabled: true
    endpoints:
      gateway_arn: hydrate://ssm/alphaswarm/${env}/agentcore_gateway_arn
      tool_config_uri: hydrate://ssm/alphaswarm/${env}/agentcore_gateway_tool_config_uri

  - id: bedrock-agentcore-memory
    label: Bedrock AgentCore Memory
    component: agentic
    cluster: aws
    namespace: aws-managed
    protocols: [https]
    endpoints:
      memory_id: hydrate://ssm/alphaswarm/${env}/agentcore_memory_id

  - id: bedrock-kb-collection
    label: Bedrock Knowledge Base (OpenSearch Serverless)
    component: rag
    cluster: aws
    namespace: aws-managed
    protocols: [https]
    endpoints:
      collection_arn: hydrate://ssm/alphaswarm/${env}/kb_collection_arn
      kb_id: hydrate://ssm/alphaswarm/${env}/kb_knowledge_base_id
      source_bucket: hydrate://ssm/alphaswarm/${env}/kb_source_bucket

  - id: cognito-user-pool
    label: Cognito User Pool
    component: identity
    cluster: aws
    namespace: aws-managed
    protocols: [https]
    endpoints:
      user_pool_id: hydrate://ssm/alphaswarm/${env}/cognito_user_pool_id
      user_pool_endpoint: hydrate://ssm/alphaswarm/${env}/cognito_user_pool_endpoint
      user_pool_domain: hydrate://ssm/alphaswarm/${env}/cognito_user_pool_domain

  - id: cloudfront-distribution
    label: CloudFront (admin + AgentCore edge)
    component: edge
    cluster: aws
    namespace: aws-managed
    protocols: [https]
    endpoints:
      domain: hydrate://ssm/alphaswarm/${env}/cloudfront_domain
      distribution_id: hydrate://ssm/alphaswarm/${env}/cloudfront_distribution_id
    annotations:
      alphaswarm.io/aliases: admin.alpha-swarm.ai,agentcore.alpha-swarm.ai
      alphaswarm.io/cloudflare-edge: alpha-swarm.ai,api.alpha-swarm.ai,manage.alpha-swarm.ai

  - id: ecs-fargate-cluster
    label: ECS Fargate (admin BFF + AgentCore proxy)
    component: compute
    cluster: aws
    namespace: aws-managed
    protocols: [https]
    restartable: true
    logs_enabled: true
    endpoints:
      cluster_name: hydrate://ssm/alphaswarm/${env}/ecs_cluster_name
      cluster_arn: hydrate://ssm/alphaswarm/${env}/ecs_cluster_arn
      service_names: hydrate://ssm/alphaswarm/${env}/ecs_service_names

  - id: alb
    label: Application Load Balancer
    component: edge
    cluster: aws
    namespace: aws-managed
    protocols: [https]
    endpoints:
      default: hydrate://ssm/alphaswarm/${env}/alb_dns_name
      zone_id: hydrate://ssm/alphaswarm/${env}/alb_zone_id

  - id: eventbridge-stepfunctions
    label: EventBridge + Step Functions
    component: orchestration
    cluster: aws
    namespace: aws-managed
    protocols: [https]
    endpoints:
      state_machine_arn: hydrate://ssm/alphaswarm/${env}/nightly_sfn_arn
      nightly_rule_arn: hydrate://ssm/alphaswarm/${env}/nightly_rule_arn

targets:
  local:
    id: local
    label: Local k3d
    kind: local
    environment: local
    cloud_provider: local
    namespace: alphaswarm-local
    adapter_preference:
      - in_cluster
      - local_compose
    terraform:
      stack_slug: alphaswarm-local
      spec_path: alphaswarm_platform/configs/terraform/local.yaml
      environment_dir: alphaswarm_platform/terraform/environments/local
      tfvars_path: alphaswarm_platform/terraform/environments/local/terraform.tfvars
      backend_state_path: data/terraform/state/local.tfstate
    cluster:
      name: alphaswarm-local
      kubeconfig_path: ~/.kube/alphaswarm-local.config
      k3d_image: rancher/k3s:v1.30.4-k3s1
      registry_name: alphaswarm-registry
      registry_port: 5001
      registry_host: alphaswarm-registry:5001
      registry_localhost: localhost:5001
      lb_http_port: 8000
      lb_https_port: 3001
      ingress_class: traefik
    endpoints:
      frontend: http://localhost:8000/
      api: http://localhost:8000/api
      registry: localhost:5001
    images:
      registry: alphaswarm-registry:5001
      app_version: latest
      build_locally: true
      services:
        api: alphaswarm-core
        worker: alphaswarm-worker
        executor: alphaswarm-executor
        beat: alphaswarm-beat
        paper: alphaswarm-paper
        serving: alphaswarm-serving
        ingester: alphaswarm-ingester
        frontend: alphaswarm-client
    auth:
      provider: local
      required: false
      scim_enabled: false
    services:
      - alphaswarm-core
      - alphaswarm-worker
      - alphaswarm-beat
      - alphaswarm-client
      - postgres
      - redis
      - neo4j
      - chromadb
      - mlflow
      - otel-collector
      - jaeger
      # Phase 1 of the infra-expansion plan: AlphaSwarm-owned shared infra services
      # (declarations only - workload manifests fill in Phase 2 + Phase 5).
      - kafka
      - schema-registry
      - redpanda
      - redpanda-connect
      - flink
      - questdb
      - polaris
      - hudi
      - spark-operator
      - prometheus
      - grafana
      - tempo
      - loki
      - phoenix
      - phoenix-postgres
      - postgres-shared
      - redis-shared
      - minio
      - dagster
      - airbyte
      - datahub

  tower:
    id: tower
    label: AlphaSwarm two-node tower+laptop
    kind: kubernetes
    environment: tower
    cloud_provider: rpi_cluster
    namespace: alphaswarm
    adapter_preference:
      - in_cluster
      - rpi_cluster
    terraform:
      stack_slug: alphaswarm-tower
      spec_path: alphaswarm_platform/configs/terraform/tower.yaml
      environment_dir: alphaswarm_platform/terraform/environments/tower
      tfvars_path: alphaswarm_platform/terraform/environments/tower/terraform.tfvars
      backend_state_path: data/terraform/state/tower.tfstate
    cluster:
      name: alphaswarm-two-node
      kubeconfig_path: ~/.kube/alphaswarm-tower.config
      kube_context: ""
      ingress_class: nginx
      ingress_host: alpha-swarm.ai
    endpoints:
      frontend: https://alpha-swarm.ai
      api: https://api.alpha-swarm.ai
      manage: https://manage.alpha-swarm.ai
    images:
      registry: docker.io/julian0215
      app_version: dev
      build_locally: false
      services:
        api: alphaswarm-core
        worker: alphaswarm-worker
        executor: alphaswarm-executor
        beat: alphaswarm-beat
        frontend: alphaswarm-client
        cp: alphaswarm-cp
    auth:
      provider: auth0
      required: true
      oidc_issuer: https://alphaswarm-fund.us.auth0.com/
      audience: https://api.alphaswarm.internal/manage
      client_id: ZwJvVAYGRj6drndJhpKlvyLv18Jybavz
      scim_enabled: true
      secret_refs:
        client_secret: auth0-client-secret
        scim_bearer_token_hash: auth0-scim-bearer-token-hash
    services:
      - alphaswarm-core
      - alphaswarm-worker
      - alphaswarm-beat
      - alphaswarm-client
      - alphaswarm-cp
      - postgres
      - redis
      - neo4j
      - chromadb
      - mlflow
      - otel-collector
      - jaeger
      - kafka
      - schema-registry
      - redpanda
      - redpanda-connect
      - flink
      - questdb
      - polaris
      - hudi
      - spark-operator
      - prometheus
      - grafana
      - tempo
      - loki
      - phoenix
      - phoenix-postgres
      - postgres-shared
      - redis-shared
      - minio
      - dagster
      - airbyte
      - datahub

  rpi:
    id: rpi
    label: rpi_kubernetes
    kind: rpi_cluster
    environment: rpi
    cloud_provider: rpi_cluster
    namespace: alphaswarm
    adapter_preference:
      - rpi_cluster
    terraform:
      stack_slug: alphaswarm-rpi-kubernetes
      spec_path: alphaswarm_platform/configs/terraform/rpi.yaml
      environment_dir: alphaswarm_platform/terraform/environments/rpi
      tfvars_path: alphaswarm_platform/terraform/environments/rpi/terraform.tfvars
      backend_state_path: data/terraform/state/rpi.tfstate
    cluster:
      name: rpi_kubernetes
      kubeconfig_path: ~/.kube/config
      kube_context: ""
      ingress_class: nginx
      ingress_host: alpha-swarm.ai
    endpoints:
      frontend: https://alpha-swarm.ai
      api: https://api.alpha-swarm.ai
      manage: https://manage.alpha-swarm.ai
    images:
      registry: docker.io/julian0215
      app_version: replace-with-immutable-tag
      build_locally: false
      services:
        api: alphaswarm-core
        worker: alphaswarm-worker
        executor: alphaswarm-executor
        beat: alphaswarm-beat
        frontend: alphaswarm-client
        cp: alphaswarm-cp
    auth:
      provider: auth0
      required: true
      oidc_issuer: https://alphaswarm-fund.us.auth0.com/
      audience: https://api.alphaswarm.internal/manage
      client_id: ZwJvVAYGRj6drndJhpKlvyLv18Jybavz
      scim_enabled: true
      secret_refs:
        client_secret: auth0-client-secret
        scim_bearer_token_hash: auth0-scim-bearer-token-hash
    services:
      - alphaswarm-core
      - alphaswarm-worker
      - alphaswarm-beat
      - alphaswarm-client
      - alphaswarm-cp
      - postgres
      - redis
      - neo4j
      - chromadb
      - mlflow
      - otel-collector
      - jaeger
      # Phase 1 of the infra-expansion plan: AlphaSwarm-owned shared infra services
      # (declarations only - workload manifests fill in Phase 2 + Phase 5).
      - kafka
      - schema-registry
      - redpanda
      - redpanda-connect
      - flink
      - questdb
      - polaris
      - hudi
      - spark-operator
      - prometheus
      - grafana
      - tempo
      - loki
      - phoenix
      - phoenix-postgres
      - postgres-shared
      - redis-shared
      - minio
      - dagster
      - airbyte
      - datahub

  # ---------------------------------------------------------------------------
  # Single-account minimum deployment target (~$140/mo fixed). Skips
  # EKS / MSK / AgentCore / KB / CloudFront / EventBridge SFN. Use as a
  # stepping-stone before promoting to the full ``aws`` target below.
  # ---------------------------------------------------------------------------
  aws-minimum:
    id: aws-minimum
    label: AWS single-account minimum (~$140/mo)
    kind: aws
    environment: minimum
    cloud_provider: aws
    namespace: alphaswarm
    adapter_preference:
      - aws_eks      # only here so the operator can still attach an EKS
                     # cluster later without retargeting; the minimum env
                     # itself doesn't provision one
      - in_cluster
    terraform:
      stack_slug: alphaswarm-minimum
      spec_path: alphaswarm_platform/configs/terraform/minimum.yaml
      environment_dir: alphaswarm_platform/terraform/environments/minimum
      tfvars_path: alphaswarm_platform/terraform/environments/minimum/terraform.tfvars
      backend_state_path: hydrate://ssm/alphaswarm/minimum/tfstate_bucket_name
    cluster:
      name: alphaswarm-min
      ingress_class: alb
      ingress_host: hydrate://ssm/alphaswarm/minimum/alb_dns_name
    endpoints:
      admin: hydrate://ssm/alphaswarm/minimum/alb_dns_name
    images:
      registry: hydrate://ssm/alphaswarm/minimum/ecr_registry
      app_version: replace-with-immutable-tag
      build_locally: false
      services:
        admin: alphaswarm-admin
        admin_frontend: alphaswarm-admin-frontend
        api: alphaswarm-core
    auth:
      provider: aws_cognito
      required: true
      oidc_issuer: hydrate://ssm/alphaswarm/minimum/cognito_user_pool_endpoint
      audience: ""
      scim_enabled: false
    services:
      - alb
      - ecs-fargate-cluster
      - cognito-user-pool
      - postgres
      - redis

  # ---------------------------------------------------------------------------
  # Phase G of the AWS hybrid rollout — AWS-native deployment target.
  # Composes the EKS Karpenter quant runtime (heritage AlphaSwarm composition) with
  # the new ECS Fargate slice (admin BFF + AgentCore proxy) + the Bedrock
  # AgentCore + Knowledge Base + Cognito + CloudFront + EventBridge SFN
  # surfaces. The matching TerraformStackSpec lives at
  # ``alphaswarm_platform/configs/terraform/aws.yaml`` and the rendered HCL bundle
  # at ``alphaswarm_platform/terraform/environments/live/``.
  # ---------------------------------------------------------------------------
  aws:
    id: aws
    label: AWS hybrid (EKS Karpenter + ECS Fargate + Bedrock)
    kind: aws
    environment: live
    cloud_provider: aws
    namespace: alphaswarm
    adapter_preference:
      - aws_eks      # primary — EKS Karpenter quant runtime
      - in_cluster   # fallback inside the EKS pod
    terraform:
      stack_slug: alphaswarm-aws-live
      spec_path: alphaswarm_platform/configs/terraform/aws.yaml
      environment_dir: alphaswarm_platform/terraform/environments/live
      tfvars_path: alphaswarm_platform/terraform/environments/live/terraform.tfvars
      backend_state_path: hydrate://ssm/alphaswarm/${env}/tfstate_bucket_name
    cluster:
      name: hydrate://ssm/alphaswarm/${env}/eks_cluster_name
      kubeconfig_path: ~/.kube/config
      kube_context: ""
      ingress_class: nginx
      ingress_host: alpha-swarm.ai
    endpoints:
      # Public marketing site + tenant API stay on the Cloudflare tunnel
      # (heritage edge) — admin + AgentCore land on CloudFront.
      frontend: https://alpha-swarm.ai
      api: https://api.alpha-swarm.ai
      manage: https://manage.alpha-swarm.ai
      admin: https://admin.alpha-swarm.ai
      agentcore: https://agentcore.alpha-swarm.ai
    images:
      registry: hydrate://ssm/alphaswarm/${env}/ecr_registry
      app_version: replace-with-immutable-tag
      build_locally: false
      services:
        api: alphaswarm-core
        worker: alphaswarm-worker
        executor: alphaswarm-executor
        beat: alphaswarm-beat
        frontend: alphaswarm-client
        cp: alphaswarm-cp
        agent: alphaswarm-agent       # ARM64-only AgentCore runtime image
        admin: alphaswarm-admin
        admin_frontend: alphaswarm-admin-frontend
        ingester: alphaswarm-ingester
        ml: alphaswarm-ml
    auth:
      provider: aws_cognito
      required: true
      oidc_issuer: hydrate://ssm/alphaswarm/${env}/cognito_user_pool_endpoint
      audience: hydrate://ssm/alphaswarm/${env}/cognito_shared_client_id
      scim_enabled: false
      secret_refs:
        cognito_client_secret: hydrate://ssm/alphaswarm/${env}/cognito_shared_client_secret
    services:
      # EKS Karpenter — quant runtime tier (unchanged from rpi composition).
      - alphaswarm-core
      - alphaswarm-worker
      - alphaswarm-beat
      - alphaswarm-client
      - alphaswarm-cp
      - postgres
      - redis
      - neo4j
      - chromadb
      - mlflow
      - otel-collector
      - jaeger
      - kafka
      - schema-registry
      - redpanda
      - flink
      - questdb
      - polaris
      - hudi
      - spark-operator
      - prometheus
      - grafana
      - tempo
      - loki
      - phoenix
      - postgres-shared
      - redis-shared
      - minio
      - dagster
      - airbyte
      - datahub
      # ECS Fargate + Bedrock + Edge — new Phase G entries.
      - alb
      - ecs-fargate-cluster
      - cloudfront-distribution
      - cognito-user-pool
      - bedrock-agentcore-runtime
      - bedrock-agentcore-gateway
      - bedrock-agentcore-memory
      - bedrock-kb-collection
      - eventbridge-stepfunctions

# =============================================================================
# Phase 3 §6.2 (RESTRUCTURING_PLAN.md) — Cell registry bootstrap seed.
#
# A "cell" is the deployment-layer unit that composes with the
# application-layer ``TenancyStrategy``. The mapping (from §6.1):
#
#   shared-std    -> shared_schema_rls         (one ns, many tenants, RLS)
#   shared-prem   -> schema_per_tenant         (one ns, one schema/tenant)
#   silo-reg      -> database_per_enterprise   (one ns, one tenant, own DB)
#   silo-custom   -> hybrid                    (per-contract)
#
# This list is the BOOTSTRAP SEED. Once the control plane has the
# ``cells`` table populated via Alembic 0082, live updates flow
# through the ``/manage/cells/*`` routes. Reseeding the YAML is the
# disaster-recovery path; the ORM table is the source of truth in
# normal operation.
#
# Cells in ``provisioning`` state are visible to the cell-router for
# health checks but not for tenant placement. ``active`` cells accept
# new tenants up to ``capacity_max_tenants``. ``draining`` cells stop
# accepting new tenants and migrate existing ones to a sibling cell.
# =============================================================================
cells:
  # Default development cell — runs the existing single-namespace
  # shared deployment as the canonical ``cell-shared-std-local`` so
  # everything you build today is already cell-addressable.
  - id: cell-shared-std-local
    tier: shared-std
    tenancy_strategy: shared_schema_rls
    region: local
    availability_zone: local-1
    k8s_namespace: alphaswarm
    capacity_max_tenants: 100
    state: active
    routes:
      api: http://localhost:8000
      ws: ws://localhost:8000/ws
    labels:
      alphaswarm.io/cell-tier: shared-std
      alphaswarm.io/cell-region: local
    annotations:
      alphaswarm.internal/description: "Default local-dev cell mapped to the existing 'alphaswarm' namespace."

  # Phase 3 §6.5 worked example: AWS us-east-1a shared-std cell.
  # Not yet provisioned in any environment — defined here so the
  # ``alphaswarm_platform/deployments/kubernetes/cells/shared-std-us-east-1a/``
  # overlay tree has a referent.
  - id: cell-shared-std-us-east-1a
    tier: shared-std
    tenancy_strategy: shared_schema_rls
    region: us-east-1
    availability_zone: us-east-1a
    k8s_namespace: cell-shared-std-us-east-1a
    capacity_max_tenants: 5000
    # Phase G of the AWS hybrid rollout — promoted from
    # ``provisioning`` to ``active`` once the matching
    # ``alphaswarm_platform/terraform/environments/live/`` apply landed and
    # the EKS cluster + ECS Fargate + Bedrock AgentCore stacks reported
    # healthy. The annotation below tracks the matching commit.
    state: active
    target: aws  # binds the cell to the ``targets.aws`` entry
    routes:
      api: https://us-east-1a.shared-std.alpha-swarm.ai
      ws: wss://us-east-1a.shared-std.alpha-swarm.ai/ws
    # Phase 6 §9 — per-cell data plane. The cell-data-plane Helm chart
    # stamps these endpoints; the application reads them via
    # RequestContext.cell_id.
    data_plane:
      postgres_dsn_secret: secret/alphaswarm/cells/cell-shared-std-us-east-1a/postgres
      redis_url: redis://alphaswarm-cell-redis.cell-shared-std-us-east-1a.svc.cluster.local:6379/0
      minio_endpoint: http://alphaswarm-cell-minio.cell-shared-std-us-east-1a.svc.cluster.local:9000
      minio_bucket_prefix: alphaswarm-cell-shared-std-us-east-1a
      mlflow_tracking_uri: http://alphaswarm-cell-mlflow.cell-shared-std-us-east-1a.svc.cluster.local:5000
      iceberg_rest_uri: http://alphaswarm-cell-iceberg-rest.cell-shared-std-us-east-1a.svc.cluster.local:8181
      iceberg_warehouse_uri: s3://alphaswarm-cell-shared-std-us-east-1a-warehouse/
      vault_transit_key: alphaswarm-cell-shared-std-us-east-1a
    labels:
      alphaswarm.io/cell-tier: shared-std
      alphaswarm.io/cell-region: us-east-1
    annotations:
      alphaswarm.internal/description: "AWS us-east-1a shared-std cell (Phase 3 worked example, not yet active)."

  - id: cell-shared-prem-us-east-1a
    tier: shared-prem
    tenancy_strategy: schema_per_tenant
    region: us-east-1
    availability_zone: us-east-1a
    k8s_namespace: cell-shared-prem-us-east-1a
    capacity_max_tenants: 200
    state: provisioning
    routes:
      api: https://us-east-1a.shared-prem.alpha-swarm.ai
      ws: wss://us-east-1a.shared-prem.alpha-swarm.ai/ws
    data_plane:
      postgres_dsn_secret: secret/alphaswarm/cells/cell-shared-prem-us-east-1a/postgres
      redis_url: redis://alphaswarm-cell-redis.cell-shared-prem-us-east-1a.svc.cluster.local:6379/0
      minio_endpoint: http://alphaswarm-cell-minio.cell-shared-prem-us-east-1a.svc.cluster.local:9000
      minio_bucket_prefix: alphaswarm-cell-shared-prem-us-east-1a
      mlflow_tracking_uri: http://alphaswarm-cell-mlflow.cell-shared-prem-us-east-1a.svc.cluster.local:5000
      iceberg_rest_uri: http://alphaswarm-cell-iceberg-rest.cell-shared-prem-us-east-1a.svc.cluster.local:8181
      iceberg_warehouse_uri: s3://alphaswarm-cell-shared-prem-us-east-1a-warehouse/
      vault_transit_key: alphaswarm-cell-shared-prem-us-east-1a
    labels:
      alphaswarm.io/cell-tier: shared-prem
      alphaswarm.io/cell-region: us-east-1
    annotations:
      alphaswarm.internal/description: "AWS us-east-1a shared-prem cell (Phase 3 worked example)."

  - id: cell-silo-reg-acme
    tier: silo-reg
    tenancy_strategy: database_per_enterprise
    region: us-east-1
    availability_zone: us-east-1a
    k8s_namespace: cell-silo-reg-acme
    capacity_max_tenants: 1
    state: provisioning
    pinned_tenants:
      - tenant_acme
    routes:
      api: https://acme.silo-reg.alpha-swarm.ai
      ws: wss://acme.silo-reg.alpha-swarm.ai/ws
    # silo-reg cells MUST set vault_transit_key per Phase 6 §9.7 — the
    # cryptographic data-plane separation is the FINRA / ISO 27001 lever.
    data_plane:
      postgres_dsn_secret: secret/alphaswarm/cells/cell-silo-reg-acme/postgres
      redis_url: redis://alphaswarm-cell-redis.cell-silo-reg-acme.svc.cluster.local:6379/0
      minio_endpoint: http://alphaswarm-cell-minio.cell-silo-reg-acme.svc.cluster.local:9000
      minio_bucket_prefix: alphaswarm-cell-silo-reg-acme
      mlflow_tracking_uri: http://alphaswarm-cell-mlflow.cell-silo-reg-acme.svc.cluster.local:5000
      iceberg_rest_uri: http://alphaswarm-cell-iceberg-rest.cell-silo-reg-acme.svc.cluster.local:8181
      iceberg_warehouse_uri: s3://alphaswarm-cell-silo-reg-acme-warehouse/
      vault_transit_key: alphaswarm-cell-silo-reg-acme
    labels:
      alphaswarm.io/cell-tier: silo-reg
      alphaswarm.io/cell-region: us-east-1
    annotations:
      alphaswarm.internal/description: "Dedicated silo-reg cell for the Acme tenant (Phase 3 worked example; FINRA / ISO 27001 posture)."
