init

2026-04-12 01:06:31 +07:00
commit 10d660cbcb
1066 changed files with 228596 additions and 0 deletions
--- a/.opencode/skills/ai-multimodal/.env.example
+++ b/.opencode/skills/ai-multimodal/.env.example
@@ -0,0 +1,230 @@
+# Google Gemini API Configuration
+
+# ============================================================================
+# OPTION 1: Google AI Studio (Default - Recommended for most users)
+# ============================================================================
+# Get your API key: https://aistudio.google.com/apikey
+GEMINI_API_KEY=your_api_key_here
+
+# ============================================================================
+# API Key Rotation (Optional - For high-volume usage)
+# ============================================================================
+# Add multiple API keys for automatic rotation on rate limit errors.
+# Free tier accounts are heavily rate-limited; rotation helps distribute load.
+#
+# Format: GEMINI_API_KEY_N where N is 2, 3, 4, etc.
+# The primary GEMINI_API_KEY is always used first.
+#
+# GEMINI_API_KEY_2=your_second_api_key
+# GEMINI_API_KEY_3=your_third_api_key
+# GEMINI_API_KEY_4=your_fourth_api_key
+#
+# Features:
+# - Auto-rotates on RESOURCE_EXHAUSTED / 429 errors
+# - 60-second cooldown per key after rate limit
+# - Logs rotation events with --verbose flag
+# - Backward compatible: single key still works
+
+# ============================================================================
+# OPTION 2: Vertex AI (Google Cloud Platform)
+# ============================================================================
+# Uncomment these lines to use Vertex AI instead of Google AI Studio
+# GEMINI_USE_VERTEX=true
+# VERTEX_PROJECT_ID=your-gcp-project-id
+# VERTEX_LOCATION=us-central1
+
+# ============================================================================
+# Model Selection (Optional)
+# ============================================================================
+# Override default models for specific capabilities
+# If not set, intelligent defaults are used based on task type
+
+# --- Image Generation ---
+# Used by: --task generate (image)
+# Default: gemini-2.5-flash-image (Nano Banana Flash - fast, cost-effective)
+# Alternative: imagen-4.0-generate-001 (production quality)
+# NOTE: All image generation requires billing - no free tier available (limit: 0)
+# Options:
+#   gemini-2.5-flash-image            - Nano Banana Flash: fast, ~$1/1M tokens (DEFAULT)
+#   gemini-3-pro-image-preview        - Nano Banana Pro: 4K text, reasoning (requires billing)
+#   imagen-4.0-generate-001           - Imagen 4 Standard: production quality (~$0.02/image)
+#   imagen-4.0-ultra-generate-001     - Imagen 4 Ultra: maximum quality (~$0.04/image)
+#   imagen-4.0-fast-generate-001      - Imagen 4 Fast: speed-optimized (~$0.01/image)
+# IMAGE_GEN_MODEL=gemini-2.5-flash-image
+
+# --- Video Generation ---
+# Used by: --task generate-video (new capability)
+# Default: veo-3.1-generate-preview
+# NOTE: Video generation requires billing - no free tier fallback available
+# Options:
+#   veo-3.1-generate-preview       - Latest, native audio, frame control (requires billing)
+#   veo-3.1-fast-generate-preview  - Speed-optimized for business (requires billing)
+#   veo-3.0-generate-001           - Stable, native audio, 8s videos (requires billing)
+#   veo-3.0-fast-generate-001      - Stable fast variant (requires billing)
+# VIDEO_GEN_MODEL=veo-3.1-generate-preview
+
+# --- Multimodal Analysis ---
+# Used by: --task analyze, transcribe, extract
+# Default: gemini-2.5-flash
+# Options:
+#   gemini-3-pro-preview           - Latest, agentic workflows, 1M context
+#   gemini-2.5-flash               - Best price/performance (recommended)
+#   gemini-2.5-pro                 - Highest quality
+# MULTIMODAL_MODEL=gemini-2.5-flash
+
+# --- Legacy Compatibility ---
+# Generic model override (use specific variables above instead)
+# GEMINI_MODEL=gemini-2.5-flash
+# GEMINI_IMAGE_GEN_MODEL=gemini-2.5-flash-image
+
+# ============================================================================
+# MiniMax API Configuration (Optional - for image/video/speech/music generation)
+# ============================================================================
+# Get your API key: https://platform.minimax.io/user-center/basic-information/interface-key
+# MINIMAX_API_KEY=your_minimax_api_key_here
+
+# --- MiniMax Image Generation ---
+# Models: image-01 (standard), image-01-live (enhanced)
+# Cost: ~$0.03/image | Rate: 10 RPM
+# MINIMAX_IMAGE_MODEL=image-01
+
+# --- MiniMax Video Generation (Hailuo) ---
+# Models: MiniMax-Hailuo-2.3, MiniMax-Hailuo-2.3-Fast, MiniMax-Hailuo-02, S2V-01
+# Cost: $0.25-0.52/video | Rate: 5 RPM
+# MINIMAX_VIDEO_MODEL=MiniMax-Hailuo-2.3
+
+# --- MiniMax Speech/TTS ---
+# Models: speech-2.8-hd (best), speech-2.8-turbo (fast)
+# Cost: $30-50/1M chars | Rate: 60 RPM | 300+ voices, 40+ languages
+# MINIMAX_SPEECH_MODEL=speech-2.8-hd
+
+# --- MiniMax Music Generation ---
+# Models: music-2.5 (4-minute songs with vocals)
+# Cost: $0.03-0.075/gen | Rate: 120 RPM
+# MINIMAX_MUSIC_MODEL=music-2.5
+
+# ============================================================================
+# Rate Limiting Configuration (Optional)
+# ============================================================================
+# Requests per minute limit (adjust based on your tier)
+# GEMINI_RPM_LIMIT=15
+
+# Tokens per minute limit
+# GEMINI_TPM_LIMIT=4000000
+
+# Requests per day limit
+# GEMINI_RPD_LIMIT=1500
+
+# ============================================================================
+# Video Generation Options (Optional)
+# ============================================================================
+# Video duration in seconds (8s only for now)
+# VEO_DURATION=8
+
+# Video resolution: 720p or 1080p
+# VEO_RESOLUTION=1080p
+
+# Aspect ratio: 16:9, 9:16, 1:1 (16:9 is default)
+# VEO_ASPECT_RATIO=16:9
+
+# Frame rate: 24fps (fixed for now)
+# VEO_FPS=24
+
+# Enable native audio generation
+# VEO_AUDIO=true
+
+# ============================================================================
+# Image Generation Options (Optional)
+# ============================================================================
+# Number of images to generate (1-4)
+# IMAGEN_NUM_IMAGES=1
+
+# Image size: 1K or 2K (Ultra/Standard only)
+# IMAGEN_SIZE=1K
+
+# Aspect ratio: 1:1, 16:9, 9:16, 4:3, 3:4
+# IMAGEN_ASPECT_RATIO=1:1
+
+# Enable person generation (restricted in EEA, CH, UK)
+# IMAGEN_PERSON_GENERATION=true
+
+# Add SynthID watermark (always enabled by default)
+# IMAGEN_WATERMARK=true
+
+# ============================================================================
+# Processing Options (Optional)
+# ============================================================================
+# Video resolution mode: default or low-res
+# low-res uses ~100 tokens/second vs ~300 for default
+# GEMINI_VIDEO_RESOLUTION=default
+
+# Audio quality: default (16 Kbps mono, auto-downsampled)
+# GEMINI_AUDIO_QUALITY=default
+
+# PDF processing mode: inline (<20MB) or file-api (>20MB, automatic)
+# GEMINI_PDF_MODE=auto
+
+# ============================================================================
+# Retry Configuration (Optional)
+# ============================================================================
+# Maximum retry attempts for failed requests
+# GEMINI_MAX_RETRIES=3
+
+# Initial retry delay in seconds (uses exponential backoff)
+# GEMINI_RETRY_DELAY=1
+
+# ============================================================================
+# Output Configuration (Optional)
+# ============================================================================
+# Default output directory for generated images
+# OUTPUT_DIR=./output
+
+# Image output format (png or jpeg)
+# IMAGE_FORMAT=png
+
+# Image quality for JPEG (1-100)
+# IMAGE_QUALITY=95
+
+# ============================================================================
+# Context Caching (Optional)
+# ============================================================================
+# Enable context caching for repeated queries on same file
+# GEMINI_ENABLE_CACHING=true
+
+# Cache TTL in seconds (default: 1800 = 30 minutes)
+# GEMINI_CACHE_TTL=1800
+
+# ============================================================================
+# Logging (Optional)
+# ============================================================================
+# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
+# LOG_LEVEL=INFO
+
+# Log file path
+# LOG_FILE=./logs/gemini.log
+
+# ============================================================================
+# Pricing Reference (as of 2025-11)
+# ============================================================================
+# Gemini 2.5 Flash: $1.00/1M input, $0.10/1M output
+# Gemini 2.5 Pro: $3.00/1M input, $12.00/1M output
+# Gemini 3 Pro: $2.00/1M input (<200k), $4.00 (>200k), $12/$18 output
+# Imagen 4: ~$0.01-$0.04 per image (varies by variant)
+# Veo 3: TBD (preview pricing)
+# Monitor: https://ai.google.dev/pricing
+
+# ============================================================================
+# Notes
+# ============================================================================
+# 1. Never commit API keys to version control
+# 2. Add .env to .gitignore
+# 3. API keys can be restricted in Google Cloud Console
+# 4. Monitor usage at: https://aistudio.google.com/apikey
+# 5. Free tier limits: 15 RPM, 1M-4M TPM, 1,500 RPD
+# 6. Vertex AI requires GCP authentication via gcloud CLI
+# 7. Model defaults (Dec 2025):
+#    - Image gen: gemini-2.5-flash-image (Nano Banana Flash - default)
+#    - Image gen: imagen-4.0-generate-001 (alternative for production)
+#    - Video gen: veo-3.1-generate-preview
+#    - Analysis: gemini-2.5-flash
+# 8. Preview models (veo-3.1, gemini-3) may have API changes