init
This commit is contained in:
@@ -0,0 +1,138 @@
|
||||
# Deployment: Cloud Run, Vertex AI, GKE
|
||||
|
||||
## Development Modes
|
||||
|
||||
```bash
|
||||
adk web samples/agents/my_agent.py:agent --port 8080
|
||||
adk run samples/agents/my_agent.py:agent "What is 2+2?" --streaming
|
||||
adk api_server samples/agents/my_agent.py:agent --port 8000
|
||||
```
|
||||
|
||||
Endpoints: `/chat`, `/stream`, `/health`
|
||||
|
||||
## Cloud Run
|
||||
|
||||
```dockerfile
|
||||
FROM python:3.11-slim
|
||||
WORKDIR /app
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
|
||||
COPY pyproject.toml uv.lock ./
|
||||
COPY src/ ./src/
|
||||
RUN uv sync --frozen --no-cache
|
||||
EXPOSE 8080
|
||||
CMD ["uv", "run", "adk", "api_server", "src/my_agent.py:agent", "--host", "0.0.0.0", "--port", "8080"]
|
||||
```
|
||||
|
||||
```bash
|
||||
export PROJECT_ID=my-project REGION=us-central1
|
||||
gcloud builds submit --tag gcr.io/$PROJECT_ID/my-agent
|
||||
gcloud run deploy my-agent \
|
||||
--image gcr.io/$PROJECT_ID/my-agent \
|
||||
--region $REGION \
|
||||
--set-env-vars GOOGLE_API_KEY=$GOOGLE_API_KEY
|
||||
|
||||
# Secret Manager
|
||||
echo -n "key" | gcloud secrets create google-api-key --data-file=-
|
||||
gcloud run deploy my-agent --set-secrets GOOGLE_API_KEY=google-api-key:latest
|
||||
```
|
||||
|
||||
## Vertex AI
|
||||
|
||||
```bash
|
||||
adk deploy --target vertex --agent my_agent.py:agent --project my-project
|
||||
```
|
||||
|
||||
```yaml
|
||||
agent:
|
||||
name: my-agent
|
||||
model: gemini-2.5-flash
|
||||
region: us-central1
|
||||
scaling: {min_instances: 1, max_instances: 10}
|
||||
resources: {cpu: 2, memory: 4Gi}
|
||||
```
|
||||
|
||||
```python
|
||||
from google.cloud import aiplatform
|
||||
aiplatform.init(project='my-project', location='us-central1')
|
||||
endpoint = aiplatform.Endpoint('projects/123/locations/us-central1/endpoints/456')
|
||||
response = endpoint.predict(instances=[{'prompt': 'What is 2+2?'}])
|
||||
```
|
||||
|
||||
## GKE
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata: {name: my-agent}
|
||||
spec:
|
||||
replicas: 3
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: agent
|
||||
image: gcr.io/my-project/my-agent:latest
|
||||
ports: [{containerPort: 8080}]
|
||||
env:
|
||||
- name: GOOGLE_API_KEY
|
||||
valueFrom: {secretKeyRef: {name: google-api-key, key: key}}
|
||||
resources:
|
||||
requests: {memory: "2Gi", cpu: "1"}
|
||||
limits: {memory: "4Gi", cpu: "2"}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata: {name: my-agent}
|
||||
spec:
|
||||
type: LoadBalancer
|
||||
ports: [{port: 80, targetPort: 8080}]
|
||||
---
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata: {name: my-agent-hpa}
|
||||
spec:
|
||||
scaleTargetRef: {kind: Deployment, name: my-agent}
|
||||
minReplicas: 2
|
||||
maxReplicas: 10
|
||||
metrics:
|
||||
- type: Resource
|
||||
resource: {name: cpu, target: {type: Utilization, averageUtilization: 70}}
|
||||
```
|
||||
|
||||
```bash
|
||||
gcloud container clusters create my-cluster --region us-central1 --num-nodes 3
|
||||
gcloud container clusters get-credentials my-cluster --region us-central1
|
||||
kubectl create secret generic google-api-key --from-literal=key=$GOOGLE_API_KEY
|
||||
kubectl apply -f deployment.yaml
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
```python
|
||||
# config.py
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
model_id: str = os.getenv('MODEL_ID', 'gemini-2.5-flash')
|
||||
api_key: str = os.getenv('GOOGLE_API_KEY')
|
||||
log_level: str = os.getenv('LOG_LEVEL', 'INFO')
|
||||
|
||||
# Health checks
|
||||
@app.get('/health')
|
||||
async def health(): return {'status': 'healthy'}
|
||||
|
||||
# Logging
|
||||
from google.cloud import logging
|
||||
client = logging.Client()
|
||||
client.setup_logging()
|
||||
|
||||
# Rate limiting
|
||||
from slowapi import Limiter
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
|
||||
@app.post('/chat')
|
||||
@limiter.limit('10/minute')
|
||||
async def chat(request: Request, prompt: str):
|
||||
return {'response': (await agent.run(prompt)).text}
|
||||
```
|
||||
Reference in New Issue
Block a user