#!/bin/bash # To be run by user llm to create the pod and container with # PyTorch + HTTP API, to pull ColNomic embedding model if missing and # to create the systemd service set -e # Environment variables POD_NAME='pytorch_pod' CTR_NAME='pytorch_ctr' # NVIDIA NGC PyTorch container with CUDA 13.0 (25.08 release) BASE_IMAGE='nvcr.io/nvidia/pytorch:25.08-py3' CUSTOM_IMAGE='localhost/pytorch-api:25.08-cuda13.0' HF_MODEL_ID='nomic-ai/colnomic-embed-multimodal-7b' HF_MODEL_URL='https://huggingface.co/nomic-ai/colnomic-embed-multimodal-7b' HOST_LOCAL_IP='127.0.0.1' PYTORCH_HOST_PORT='8086' PYTORCH_CONTAINER_PORT='8000' BIND_DIR="$HOME/.local/share/$POD_NAME" AI_MODELS_DIR="$BIND_DIR/ai-models" PYTHON_APPS_DIR="$BIND_DIR/python-apps" USER_SYSTEMD_DIR="$HOME/.config/systemd/user" CONTAINERFILE="$BIND_DIR/containerfile" PY_APP="$PYTHON_APPS_DIR/embed-multimodal-7b.py" echo "$PY_APP" # Prepare directories mkdir -p "$AI_MODELS_DIR" "$PYTHON_APPS_DIR" "$USER_SYSTEMD_DIR" # Generate containerfile cat >"$CONTAINERFILE" <<'EOF' # Containerfile for PyTorch + FastAPI + ColPali (ColNomic embed model support) ARG BASE_IMAGE FROM ${BASE_IMAGE} # Hugging Face caches and Python apps directory (bind-mounted at runtime) ENV HF_HOME=/models/hf \ TRANSFORMERS_CACHE=/models/hf/transformers \ PYTHON_APPS_DIR=/python-apps # Ensure directories exist RUN mkdir -p /models/hf/transformers /python-apps # Install git (for colpali) and clean apt lists RUN apt-get update && \ apt-get install -y --no-install-recommends git && \ rm -rf /var/lib/apt/lists/* # Upgrade pip and install runtime dependencies: # - fastapi, uvicorn for the HTTP API # - transformers, accelerate, peft for HF + ColPali ecosystem # - flash-attn to provide FlashAttention-2 kernels # - colpali pinned to specific commit, installed WITHOUT deps to avoid # overriding the PyTorch provided by the base image. RUN python -m pip install --upgrade pip && \ python -m pip install --no-cache-dir \ fastapi \ "uvicorn[standard]" \ transformers \ accelerate \ peft && \ python -m pip install --no-cache-dir flash-attn --no-build-isolation && \ python -m pip install --no-cache-dir --no-deps \ "git+https://github.com/illuin-tech/colpali.git@97e389a" && \ python -m pip cache purge # Make /python-apps importable by default ENV PYTHONPATH=/python-apps:${PYTHONPATH} WORKDIR /workspace # Default command can be overridden by podman run. CMD ["bash"] EOF # Build custom container image podman build \ --build-arg BASE_IMAGE="$BASE_IMAGE" \ -t "$CUSTOM_IMAGE" \ -f "$CONTAINERFILE" \ "$(dirname "$CONTAINERFILE")" # Create pod if not yet existing if ! podman pod exists "$POD_NAME"; then podman pod create -n "$POD_NAME" \ -p "$HOST_LOCAL_IP:$PYTORCH_HOST_PORT:$PYTORCH_CONTAINER_PORT" echo "Pod '$POD_NAME' created (rc=$?)" else echo "Pod '$POD_NAME' already exists." fi # PyTorch + HTTP API container # Remove old container podman rm -f "$CTR_NAME" # New container podman run -d --name "$CTR_NAME" --pod "$POD_NAME" \ --device nvidia.com/gpu=all \ -e HF_MODEL_ID="$HF_MODEL_ID" \ -e HF_MODEL_URL="$HF_MODEL_URL" \ -e PYTORCH_CONTAINER_PORT="$PYTORCH_CONTAINER_PORT" \ -v "$AI_MODELS_DIR":/models \ -v "$PYTHON_APPS_DIR":/python-apps \ "$CUSTOM_IMAGE" \ python /python-apps/embed-multimodal-7b.py # Wait for API readiness (/health) HEALTH_URL="http://$HOST_LOCAL_IP:$PYTORCH_HOST_PORT/health" echo -n "Waiting for PyTorch API at $HEALTH_URL ..." for attempt in $(seq 1 30); do if curl -fsS "$HEALTH_URL" >/dev/null 2>&1; then echo "ready." break fi sleep 2 if [ "$attempt" -eq 30 ]; then echo "timeout error." >&2 echo "Container logs:" >&2 podman logs "$CTR_NAME" exit 1 fi done # Smoke tests # GPU availability GPU_JSON="$( podman exec "$CTR_NAME" python -c ' import json, sys try: import torch except Exception as e: # Exit code 1 -> internal error (import torch failed, etc.) print(json.dumps({"error": f"import torch failed: {e}"})) sys.exit(1) data = { "cuda_available": bool(torch.cuda.is_available()), "device_count": int(torch.cuda.device_count()), } print(json.dumps(data)) # Exit code 0 -> cuda_available is True # Exit code 2 -> cuda_available is False sys.exit(0 if data["cuda_available"] else 2) ' )" GPU_RC=$? # echo "podman exec exit code: $GPU_RC" # echo "GPU_JSON: $GPU_JSON" if [ "$GPU_RC" -eq 0 ]; then echo "GPU is available in container $CTR_NAME (cuda_available == true)." elif [ "$GPU_RC" -eq 2 ]; then echo "ERROR: CUDA GPU is NOT available inside the container." >&2 echo "Details: $GPU_JSON" >&2 echo "This may be due to missing NVIDIA CDI configuration or SELinux labeling." >&2 exit 1 else echo "ERROR: podman exec GPU test failed (exit code $GPU_RC)." >&2 echo "Details: $GPU_JSON" >&2 echo "Container logs for debugging:" >&2 podman logs "$CTR_NAME" || true exit 1 fi # Python API /health HEALTH_JSON="$(curl -fsS "$HEALTH_URL")" echo "$HEALTH_JSON" if ! printf '%s' "$HEALTH_JSON" | grep -q '"status":"ok"'; then echo "ERROR: /health endpoint did not report status \"ok\"." >&2 exit 1 fi # Python API /embed EMBED_URL="http://$HOST_LOCAL_IP:$PYTORCH_HOST_PORT/embed-texts" EMBED_JSON="$(curl -fsS -X POST "$EMBED_URL" \ -H "Content-Type: application/json" \ -d '{"texts":["hello world from colnomic"]}')" echo "$EMBED_JSON" if ! printf '%s' "$EMBED_JSON" | grep -q '"results"'; then echo "ERROR: /embed endpoint did not return embeddings as expected." >&2 exit 1 fi # Generate systemd service files cd "$USER_SYSTEMD_DIR" podman generate systemd --name --new --files "$POD_NAME" echo "Generated systemd service files (rc=$?)" # Stop & remove live pod and containers podman pod stop --ignore --time 15 "$POD_NAME" podman pod rm -f --ignore "$POD_NAME" if podman pod exists "$POD_NAME"; then echo "ERROR: Pod $POD_NAME still exists." >&2 exit 1 else echo "Stopped & removed live pod $POD_NAME and containers" fi # Enable systemd service systemctl --user daemon-reload systemctl --user enable --now "pod-${POD_NAME}.service" systemctl --user is-enabled "pod-$POD_NAME.service" systemctl --user is-active "pod-$POD_NAME.service" echo "Enabled systemd service pod-${POD_NAME}.service (rc=$?)" echo "To view status: systemctl --user status pod-${POD_NAME}.service" echo "To view logs: journalctl --user -u pod-${POD_NAME}.service -f" systemctl --user enable --now "container-${CTR_NAME}.service" systemctl --user is-enabled "container-${CTR_NAME}.service" systemctl --user is-active "container-${CTR_NAME}.service" echo "Enabled systemd service container-${CTR_NAME}.service (rc=$?)" echo "To view status: systemctl --user status container-${CTR_NAME}.service" echo "To view logs: journalctl --user -u container-${CTR_NAME}.service -f" echo "PyTorch API is reachable at http://$HOST_LOCAL_IP:$PYTORCH_HOST_PORT"