139 lines
3.4 KiB
Markdown
139 lines
3.4 KiB
Markdown
# Deployment: Cloud Run, Vertex AI, GKE
|
|
|
|
## Development Modes
|
|
|
|
```bash
|
|
adk web samples/agents/my_agent.py:agent --port 8080
|
|
adk run samples/agents/my_agent.py:agent "What is 2+2?" --streaming
|
|
adk api_server samples/agents/my_agent.py:agent --port 8000
|
|
```
|
|
|
|
Endpoints: `/chat`, `/stream`, `/health`
|
|
|
|
## Cloud Run
|
|
|
|
```dockerfile
|
|
FROM python:3.11-slim
|
|
WORKDIR /app
|
|
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
|
COPY pyproject.toml uv.lock ./
|
|
COPY src/ ./src/
|
|
RUN uv sync --frozen --no-cache
|
|
EXPOSE 8080
|
|
CMD ["uv", "run", "adk", "api_server", "src/my_agent.py:agent", "--host", "0.0.0.0", "--port", "8080"]
|
|
```
|
|
|
|
```bash
|
|
export PROJECT_ID=my-project REGION=us-central1
|
|
gcloud builds submit --tag gcr.io/$PROJECT_ID/my-agent
|
|
gcloud run deploy my-agent \
|
|
--image gcr.io/$PROJECT_ID/my-agent \
|
|
--region $REGION \
|
|
--set-env-vars GOOGLE_API_KEY=$GOOGLE_API_KEY
|
|
|
|
# Secret Manager
|
|
echo -n "key" | gcloud secrets create google-api-key --data-file=-
|
|
gcloud run deploy my-agent --set-secrets GOOGLE_API_KEY=google-api-key:latest
|
|
```
|
|
|
|
## Vertex AI
|
|
|
|
```bash
|
|
adk deploy --target vertex --agent my_agent.py:agent --project my-project
|
|
```
|
|
|
|
```yaml
|
|
agent:
|
|
name: my-agent
|
|
model: gemini-2.5-flash
|
|
region: us-central1
|
|
scaling: {min_instances: 1, max_instances: 10}
|
|
resources: {cpu: 2, memory: 4Gi}
|
|
```
|
|
|
|
```python
|
|
from google.cloud import aiplatform
|
|
aiplatform.init(project='my-project', location='us-central1')
|
|
endpoint = aiplatform.Endpoint('projects/123/locations/us-central1/endpoints/456')
|
|
response = endpoint.predict(instances=[{'prompt': 'What is 2+2?'}])
|
|
```
|
|
|
|
## GKE
|
|
|
|
```yaml
|
|
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata: {name: my-agent}
|
|
spec:
|
|
replicas: 3
|
|
template:
|
|
spec:
|
|
containers:
|
|
- name: agent
|
|
image: gcr.io/my-project/my-agent:latest
|
|
ports: [{containerPort: 8080}]
|
|
env:
|
|
- name: GOOGLE_API_KEY
|
|
valueFrom: {secretKeyRef: {name: google-api-key, key: key}}
|
|
resources:
|
|
requests: {memory: "2Gi", cpu: "1"}
|
|
limits: {memory: "4Gi", cpu: "2"}
|
|
---
|
|
apiVersion: v1
|
|
kind: Service
|
|
metadata: {name: my-agent}
|
|
spec:
|
|
type: LoadBalancer
|
|
ports: [{port: 80, targetPort: 8080}]
|
|
---
|
|
apiVersion: autoscaling/v2
|
|
kind: HorizontalPodAutoscaler
|
|
metadata: {name: my-agent-hpa}
|
|
spec:
|
|
scaleTargetRef: {kind: Deployment, name: my-agent}
|
|
minReplicas: 2
|
|
maxReplicas: 10
|
|
metrics:
|
|
- type: Resource
|
|
resource: {name: cpu, target: {type: Utilization, averageUtilization: 70}}
|
|
```
|
|
|
|
```bash
|
|
gcloud container clusters create my-cluster --region us-central1 --num-nodes 3
|
|
gcloud container clusters get-credentials my-cluster --region us-central1
|
|
kubectl create secret generic google-api-key --from-literal=key=$GOOGLE_API_KEY
|
|
kubectl apply -f deployment.yaml
|
|
```
|
|
|
|
## Best Practices
|
|
|
|
```python
|
|
# config.py
|
|
import os
|
|
from dataclasses import dataclass
|
|
|
|
@dataclass
|
|
class Config:
|
|
model_id: str = os.getenv('MODEL_ID', 'gemini-2.5-flash')
|
|
api_key: str = os.getenv('GOOGLE_API_KEY')
|
|
log_level: str = os.getenv('LOG_LEVEL', 'INFO')
|
|
|
|
# Health checks
|
|
@app.get('/health')
|
|
async def health(): return {'status': 'healthy'}
|
|
|
|
# Logging
|
|
from google.cloud import logging
|
|
client = logging.Client()
|
|
client.setup_logging()
|
|
|
|
# Rate limiting
|
|
from slowapi import Limiter
|
|
limiter = Limiter(key_func=get_remote_address)
|
|
|
|
@app.post('/chat')
|
|
@limiter.limit('10/minute')
|
|
async def chat(request: Request, prompt: str):
|
|
return {'response': (await agent.run(prompt)).text}
|
|
```
|