init
This commit is contained in:
297
.opencode/skills/agent-browser/SKILL.md
Normal file
297
.opencode/skills/agent-browser/SKILL.md
Normal file
@@ -0,0 +1,297 @@
|
||||
---
|
||||
name: ck:agent-browser
|
||||
description: AI-optimized browser automation CLI with context-efficient snapshots. Use for long autonomous sessions, self-verifying workflows, video recording, and cloud browser testing (Browserbase).
|
||||
license: Apache-2.0
|
||||
argument-hint: "[url or task]"
|
||||
metadata:
|
||||
author: claudekit
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# agent-browser Skill
|
||||
|
||||
Browser automation CLI designed for AI agents. Uses "snapshot + refs" paradigm for 93% less context than Playwright MCP.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Install globally
|
||||
npm install -g agent-browser
|
||||
|
||||
# Download Chromium (one-time)
|
||||
agent-browser install
|
||||
|
||||
# Linux: include system deps
|
||||
agent-browser install --with-deps
|
||||
|
||||
# Verify
|
||||
agent-browser --version
|
||||
```
|
||||
|
||||
## Core Workflow
|
||||
|
||||
The 4-step pattern for all browser automation:
|
||||
|
||||
```bash
|
||||
# 1. Navigate
|
||||
agent-browser open https://example.com
|
||||
|
||||
# 2. Snapshot (get interactive elements with refs)
|
||||
agent-browser snapshot -i
|
||||
# Output: button "Sign In" @e1, textbox "Email" @e2, ...
|
||||
|
||||
# 3. Interact using refs
|
||||
agent-browser fill @e2 "user@example.com"
|
||||
agent-browser click @e1
|
||||
|
||||
# 4. Re-snapshot after page changes
|
||||
agent-browser snapshot -i
|
||||
```
|
||||
|
||||
## When to Use (vs chrome-devtools)
|
||||
|
||||
| Use agent-browser | Use chrome-devtools |
|
||||
|-------------------|---------------------|
|
||||
| Long autonomous AI sessions | Quick one-off screenshots |
|
||||
| Context-constrained workflows | Custom Puppeteer scripts needed |
|
||||
| Video recording for debugging | WebSocket full frame debugging |
|
||||
| Cloud browsers (Browserbase) | Existing workflow integration |
|
||||
| Multi-tab handling | Need Sharp auto-compression |
|
||||
| Self-verifying build loops | Session with auth injection |
|
||||
|
||||
**Token efficiency:** ~280 chars/snapshot vs 8K+ for Playwright MCP.
|
||||
|
||||
## Command Reference
|
||||
|
||||
### Navigation
|
||||
```bash
|
||||
agent-browser open <url> # Navigate to URL
|
||||
agent-browser back # Go back
|
||||
agent-browser forward # Go forward
|
||||
agent-browser reload # Reload page
|
||||
agent-browser close # Close browser
|
||||
```
|
||||
|
||||
### Analysis (Snapshot)
|
||||
```bash
|
||||
agent-browser snapshot # Full accessibility tree
|
||||
agent-browser snapshot -i # Interactive elements only (recommended)
|
||||
agent-browser snapshot -c # Compact output
|
||||
agent-browser snapshot -d 3 # Limit depth
|
||||
agent-browser snapshot -s "nav" # Scope to CSS selector
|
||||
```
|
||||
|
||||
### Interactions (use @refs from snapshot)
|
||||
```bash
|
||||
agent-browser click @e1 # Click element
|
||||
agent-browser dblclick @e1 # Double-click
|
||||
agent-browser fill @e2 "text" # Clear and fill input
|
||||
agent-browser type @e2 "text" # Type without clearing
|
||||
agent-browser press Enter # Press key
|
||||
agent-browser hover @e1 # Hover over element
|
||||
agent-browser check @e3 # Check checkbox
|
||||
agent-browser uncheck @e3 # Uncheck checkbox
|
||||
agent-browser select @e4 "opt" # Select dropdown option
|
||||
agent-browser scroll @e1 # Scroll element into view
|
||||
agent-browser scroll down 500 # Scroll page by pixels
|
||||
agent-browser drag @e1 @e2 # Drag from e1 to e2
|
||||
agent-browser upload @e5 file.pdf # Upload file
|
||||
```
|
||||
|
||||
### Information Retrieval
|
||||
```bash
|
||||
agent-browser get text @e1 # Get text content
|
||||
agent-browser get html @e1 # Get HTML
|
||||
agent-browser get value @e2 # Get input value
|
||||
agent-browser get attr @e1 href # Get attribute
|
||||
agent-browser get title # Page title
|
||||
agent-browser get url # Current URL
|
||||
agent-browser get count "li" # Count elements
|
||||
agent-browser get box @e1 # Bounding box
|
||||
```
|
||||
|
||||
### State Checks
|
||||
```bash
|
||||
agent-browser is visible @e1 # Check visibility
|
||||
agent-browser is enabled @e1 # Check if enabled
|
||||
agent-browser is checked @e3 # Check if checked
|
||||
```
|
||||
|
||||
### Media
|
||||
```bash
|
||||
agent-browser screenshot # Capture viewport
|
||||
agent-browser screenshot --full # Full page
|
||||
agent-browser screenshot -o ss.png # Save to file
|
||||
agent-browser pdf -o page.pdf # Export PDF
|
||||
agent-browser record start # Start video recording
|
||||
agent-browser record stop # Stop and save video
|
||||
agent-browser record restart # Restart recording
|
||||
```
|
||||
|
||||
### Wait Conditions
|
||||
```bash
|
||||
agent-browser wait @e1 # Wait for element
|
||||
agent-browser wait --text "Success" # Wait for text to appear
|
||||
agent-browser wait --url "/dashboard" # Wait for URL pattern
|
||||
agent-browser wait --load # Wait for page load
|
||||
agent-browser wait --idle # Wait for network idle
|
||||
agent-browser wait --fn "() => window.ready" # Wait for JS condition
|
||||
```
|
||||
|
||||
### Browser Configuration
|
||||
```bash
|
||||
agent-browser viewport 1920 1080 # Set viewport size
|
||||
agent-browser device "iPhone 14" # Emulate device
|
||||
agent-browser geolocation 40.7 -74.0 # Set geolocation
|
||||
agent-browser offline true # Enable offline mode
|
||||
agent-browser headers '{"X-Custom":"val"}' # Set headers
|
||||
agent-browser credentials user pass # HTTP auth
|
||||
agent-browser color-scheme dark # Set color scheme
|
||||
```
|
||||
|
||||
### Storage Management
|
||||
```bash
|
||||
agent-browser cookies # List cookies
|
||||
agent-browser cookies set name=val # Set cookie
|
||||
agent-browser cookies clear # Clear cookies
|
||||
agent-browser storage local # Get localStorage
|
||||
agent-browser storage session # Get sessionStorage
|
||||
agent-browser state save auth.json # Save browser state
|
||||
agent-browser state load auth.json # Load browser state
|
||||
```
|
||||
|
||||
### Network Control
|
||||
```bash
|
||||
agent-browser network route "**/*.jpg" --abort # Block requests
|
||||
agent-browser network route "**/api/*" --body '{"data":[]}' # Mock response
|
||||
agent-browser network unroute "**/*.jpg" # Remove specific route
|
||||
agent-browser network requests # List intercepted requests
|
||||
```
|
||||
|
||||
### Semantic Finding
|
||||
```bash
|
||||
agent-browser find role button # Find by ARIA role
|
||||
agent-browser find text "Submit" # Find by text content
|
||||
agent-browser find label "Email" # Find by label
|
||||
agent-browser find placeholder "Search" # Find by placeholder
|
||||
agent-browser find testid "login-btn" # Find by data-testid
|
||||
agent-browser find first "button" # First matching element
|
||||
agent-browser find last "li" # Last matching element
|
||||
agent-browser find nth 2 "li" # Nth element (0-indexed)
|
||||
```
|
||||
|
||||
### Advanced
|
||||
```bash
|
||||
agent-browser tabs # List tabs
|
||||
agent-browser tab new # New tab
|
||||
agent-browser tab 2 # Switch to tab
|
||||
agent-browser tab close # Close current tab
|
||||
agent-browser frame 0 # Switch to frame
|
||||
agent-browser dialog accept # Accept dialog
|
||||
agent-browser dialog dismiss # Dismiss dialog
|
||||
agent-browser eval "document.title" # Execute JS
|
||||
agent-browser highlight @e1 # Highlight element visually
|
||||
agent-browser mouse move 100 200 # Move mouse to coordinates
|
||||
agent-browser mouse down # Mouse button down
|
||||
agent-browser mouse up # Mouse button up
|
||||
```
|
||||
|
||||
## Global Options
|
||||
|
||||
| Option | Description |
|
||||
|--------|-------------|
|
||||
| `--session <name>` | Named session for parallel testing |
|
||||
| `--json` | JSON output for parsing |
|
||||
| `--headed` | Show browser window |
|
||||
| `--cdp <port>` | Connect via Chrome DevTools Protocol |
|
||||
| `-p <provider>` | Cloud browser provider |
|
||||
| `--proxy <url>` | Proxy server |
|
||||
| `--headers <json>` | Custom HTTP headers |
|
||||
| `--executable-path` | Custom browser binary |
|
||||
| `--extension <path>` | Load browser extension |
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `AGENT_BROWSER_SESSION` | Default session name |
|
||||
| `AGENT_BROWSER_PROVIDER` | Cloud provider (e.g., browserbase) |
|
||||
| `AGENT_BROWSER_EXECUTABLE_PATH` | Browser binary location |
|
||||
| `AGENT_BROWSER_EXTENSIONS` | Comma-separated extension paths |
|
||||
| `AGENT_BROWSER_STREAM_PORT` | WebSocket streaming port |
|
||||
| `AGENT_BROWSER_HOME` | Custom installation directory |
|
||||
| `AGENT_BROWSER_PROFILE` | Browser profile directory |
|
||||
| `BROWSERBASE_API_KEY` | Browserbase API key |
|
||||
| `BROWSERBASE_PROJECT_ID` | Browserbase project ID |
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Form Submission
|
||||
```bash
|
||||
agent-browser open https://example.com/login
|
||||
agent-browser snapshot -i
|
||||
agent-browser fill @e1 "user@example.com"
|
||||
agent-browser fill @e2 "password123"
|
||||
agent-browser click @e3 # Submit button
|
||||
agent-browser wait url "/dashboard"
|
||||
```
|
||||
|
||||
### State Persistence (Auth)
|
||||
```bash
|
||||
# Save authenticated state
|
||||
agent-browser open https://example.com/login
|
||||
# ... login steps ...
|
||||
agent-browser state save auth.json
|
||||
|
||||
# Reuse in future sessions
|
||||
agent-browser state load auth.json
|
||||
agent-browser open https://example.com/dashboard
|
||||
```
|
||||
|
||||
### Video Recording (Debugging)
|
||||
```bash
|
||||
agent-browser open https://example.com
|
||||
agent-browser record start
|
||||
# ... perform actions ...
|
||||
agent-browser record stop # Saves to recording.webm
|
||||
```
|
||||
|
||||
### Parallel Sessions
|
||||
```bash
|
||||
# Terminal 1
|
||||
agent-browser --session test1 open https://example.com
|
||||
|
||||
# Terminal 2
|
||||
agent-browser --session test2 open https://example.com
|
||||
```
|
||||
|
||||
## Cloud Browsers (Browserbase)
|
||||
|
||||
For CI/CD or environments without local browser:
|
||||
|
||||
```bash
|
||||
# Set credentials
|
||||
export BROWSERBASE_API_KEY="your-api-key"
|
||||
export BROWSERBASE_PROJECT_ID="your-project-id"
|
||||
|
||||
# Use cloud browser
|
||||
agent-browser -p browserbase open https://example.com
|
||||
```
|
||||
|
||||
See `references/browserbase-cloud-setup.md` for detailed setup.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Issue | Solution |
|
||||
|-------|----------|
|
||||
| Command not found | Run `npm install -g agent-browser` |
|
||||
| Chromium missing | Run `agent-browser install` |
|
||||
| Linux deps missing | Run `agent-browser install --with-deps` |
|
||||
| Session stale | Close browser: `agent-browser close` |
|
||||
| Element not found | Re-run `snapshot -i` after page changes |
|
||||
|
||||
## Resources
|
||||
|
||||
- [GitHub Repository](https://github.com/vercel-labs/agent-browser)
|
||||
- [Official Documentation](https://github.com/vercel-labs/agent-browser#readme)
|
||||
- [Browserbase Docs](https://docs.browserbase.com/)
|
||||
0
.opencode/skills/agent-browser/references/.gitkeep
Normal file
0
.opencode/skills/agent-browser/references/.gitkeep
Normal file
@@ -0,0 +1,112 @@
|
||||
# agent-browser vs chrome-devtools
|
||||
|
||||
Detailed comparison guide for choosing between browser automation skills.
|
||||
|
||||
## Feature Comparison
|
||||
|
||||
| Feature | agent-browser | chrome-devtools |
|
||||
|---------|---------------|-----------------|
|
||||
| **Engine** | Playwright (via Rust CLI) | Puppeteer |
|
||||
| **Refs system** | `@e1` inline | `[ref=e1]` YAML |
|
||||
| **Session persistence** | Named sessions (`--session`) | `.browser-session.json` |
|
||||
| **Screenshot** | Basic | Auto-compress >5MB (Sharp) |
|
||||
| **Network intercept** | `route` command | `network.js` script |
|
||||
| **Console capture** | Basic | With filtering |
|
||||
| **WebSocket debug** | Limited | Full frames support |
|
||||
| **Video recording** | Built-in `record` | Not available |
|
||||
| **PDF export** | Built-in `pdf` | Via Puppeteer API |
|
||||
| **Auth persistence** | `state save/load` | `inject-auth.js` |
|
||||
| **Multi-tab** | Full support | Limited |
|
||||
| **Cloud browsers** | Browserbase native | Manual setup |
|
||||
| **Performance** | Rust CLI (fast) | Node.js |
|
||||
| **Custom scripts** | None (CLI only) | 20+ utilities |
|
||||
|
||||
## Token Efficiency Benchmarks
|
||||
|
||||
| Metric | agent-browser | chrome-devtools | Playwright MCP |
|
||||
|--------|---------------|-----------------|----------------|
|
||||
| Homepage snapshot | ~280 chars | ~300-500 chars | ~8,247 chars |
|
||||
| Context reduction | 93% vs MCP | 90% vs MCP | Baseline |
|
||||
| Tool definitions | ~2K tokens | 0 (CLI scripts) | ~17K tokens |
|
||||
|
||||
**Conclusion:** Both agent-browser and chrome-devtools are similarly efficient. Both dramatically outperform Playwright MCP.
|
||||
|
||||
## Use Case Decision Tree
|
||||
|
||||
```
|
||||
Need browser automation?
|
||||
|
|
||||
+-- Long autonomous AI session?
|
||||
| +-- YES --> agent-browser (better context efficiency)
|
||||
| +-- NO --> Continue
|
||||
|
|
||||
+-- Need video recording?
|
||||
| +-- YES --> agent-browser (built-in)
|
||||
| +-- NO --> Continue
|
||||
|
|
||||
+-- Cloud browser (CI/CD)?
|
||||
| +-- YES --> agent-browser (Browserbase native)
|
||||
| +-- NO --> Continue
|
||||
|
|
||||
+-- Custom Puppeteer scripts?
|
||||
| +-- YES --> chrome-devtools (20+ utilities)
|
||||
| +-- NO --> Continue
|
||||
|
|
||||
+-- WebSocket debugging?
|
||||
| +-- YES --> chrome-devtools (full frames)
|
||||
| +-- NO --> Continue
|
||||
|
|
||||
+-- Screenshot auto-compression?
|
||||
| +-- YES --> chrome-devtools (Sharp)
|
||||
| +-- NO --> agent-browser OR chrome-devtools
|
||||
```
|
||||
|
||||
## Parallel Usage Patterns
|
||||
|
||||
Both skills can coexist - use the right tool for each task:
|
||||
|
||||
```bash
|
||||
# Quick screenshot with compression -> chrome-devtools
|
||||
node "$SKILL_DIR/screenshot.js" --url https://example.com --output ss.png
|
||||
|
||||
# Long autonomous session -> agent-browser
|
||||
agent-browser --session test1 open https://example.com
|
||||
agent-browser snapshot -i
|
||||
# ... many interactions ...
|
||||
agent-browser close
|
||||
```
|
||||
|
||||
## Migration Guide
|
||||
|
||||
### From chrome-devtools to agent-browser
|
||||
|
||||
| chrome-devtools | agent-browser |
|
||||
|-----------------|---------------|
|
||||
| `node navigate.js --url X` | `agent-browser open X` |
|
||||
| `node aria-snapshot.js --url X` | `agent-browser open X && agent-browser snapshot -i` |
|
||||
| `node select-ref.js --ref e5 --action click` | `agent-browser click @e5` |
|
||||
| `node fill.js --selector "#email" --value "X"` | `agent-browser fill @e1 "X"` |
|
||||
| `node screenshot.js --output X.png` | `agent-browser screenshot -o X.png` |
|
||||
| `node console.js --types error` | No direct equivalent |
|
||||
| `node network.js` | No direct equivalent |
|
||||
|
||||
### Key Differences
|
||||
|
||||
1. **Refs format:** `[ref=e5]` vs `@e5`
|
||||
2. **Session:** File-based vs named sessions
|
||||
3. **Commands:** Node scripts vs CLI commands
|
||||
4. **Output:** JSON always vs JSON with `--json` flag
|
||||
|
||||
## When to Switch
|
||||
|
||||
**Switch to agent-browser when:**
|
||||
- Starting new long-running automation
|
||||
- Need video recording capability
|
||||
- Moving to cloud browsers (Browserbase)
|
||||
- Want simpler CLI syntax
|
||||
|
||||
**Keep chrome-devtools when:**
|
||||
- Existing workflows depend on custom scripts
|
||||
- Need WebSocket full-frame debugging
|
||||
- Need automatic screenshot compression
|
||||
- Need fine-grained console log filtering
|
||||
@@ -0,0 +1,161 @@
|
||||
# Browserbase Cloud Setup
|
||||
|
||||
Configure agent-browser to use Browserbase cloud browsers for CI/CD and headless environments.
|
||||
|
||||
## Overview
|
||||
|
||||
Browserbase provides remote browser infrastructure. Use when:
|
||||
- Running in CI/CD pipelines
|
||||
- Local browser not available
|
||||
- Need consistent browser environment
|
||||
- Scaling parallel browser sessions
|
||||
|
||||
## Account Setup
|
||||
|
||||
1. Sign up at [browserbase.com](https://browserbase.com)
|
||||
2. Create a project
|
||||
3. Get API key from dashboard
|
||||
4. Note your project ID
|
||||
|
||||
## Environment Variables
|
||||
|
||||
```bash
|
||||
# Required
|
||||
export BROWSERBASE_API_KEY="bb_live_xxxxxxxxxxxxx"
|
||||
export BROWSERBASE_PROJECT_ID="proj_xxxxxxxxxxxxx"
|
||||
|
||||
# Optional: set provider default
|
||||
export AGENT_BROWSER_PROVIDER="browserbase"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Explicit Provider Flag
|
||||
```bash
|
||||
agent-browser -p browserbase open https://example.com
|
||||
agent-browser snapshot -i
|
||||
agent-browser click @e1
|
||||
agent-browser close
|
||||
```
|
||||
|
||||
### With Default Provider (env var)
|
||||
```bash
|
||||
# After setting AGENT_BROWSER_PROVIDER=browserbase
|
||||
agent-browser open https://example.com # Uses Browserbase automatically
|
||||
```
|
||||
|
||||
## CI/CD Integration
|
||||
|
||||
### GitHub Actions
|
||||
```yaml
|
||||
name: Browser Tests
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '20'
|
||||
|
||||
- name: Install agent-browser
|
||||
run: npm install -g agent-browser
|
||||
|
||||
- name: Run browser tests
|
||||
env:
|
||||
BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }}
|
||||
BROWSERBASE_PROJECT_ID: ${{ secrets.BROWSERBASE_PROJECT_ID }}
|
||||
AGENT_BROWSER_PROVIDER: browserbase
|
||||
run: |
|
||||
agent-browser open https://example.com
|
||||
agent-browser snapshot -i
|
||||
agent-browser screenshot -o screenshot.png
|
||||
agent-browser close
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: screenshots
|
||||
path: screenshot.png
|
||||
```
|
||||
|
||||
### GitLab CI
|
||||
```yaml
|
||||
browser-test:
|
||||
image: node:20
|
||||
variables:
|
||||
AGENT_BROWSER_PROVIDER: browserbase
|
||||
script:
|
||||
- npm install -g agent-browser
|
||||
- agent-browser open https://example.com
|
||||
- agent-browser snapshot -i
|
||||
- agent-browser close
|
||||
artifacts:
|
||||
paths:
|
||||
- "*.png"
|
||||
```
|
||||
|
||||
## Session Management
|
||||
|
||||
Browserbase sessions are managed automatically. Each `open` creates a new session, `close` terminates it.
|
||||
|
||||
```bash
|
||||
# Long-running session
|
||||
agent-browser -p browserbase open https://example.com
|
||||
# ... many commands ...
|
||||
agent-browser close # Terminates Browserbase session
|
||||
```
|
||||
|
||||
## Parallel Sessions
|
||||
|
||||
Use named sessions for parallel browser instances:
|
||||
|
||||
```bash
|
||||
# Session 1
|
||||
agent-browser -p browserbase --session user1 open https://example.com
|
||||
|
||||
# Session 2 (separate terminal/process)
|
||||
agent-browser -p browserbase --session user2 open https://example.com
|
||||
```
|
||||
|
||||
## Debugging
|
||||
|
||||
### View Session Logs
|
||||
Check Browserbase dashboard for:
|
||||
- Session recordings
|
||||
- Network logs
|
||||
- Console output
|
||||
- Screenshots
|
||||
|
||||
### Local Fallback
|
||||
If Browserbase unavailable, remove provider flag to use local browser:
|
||||
```bash
|
||||
agent-browser open https://example.com # Uses local Chromium
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
| Issue | Solution |
|
||||
|-------|----------|
|
||||
| Authentication failed | Verify API key is correct and not expired |
|
||||
| Project not found | Check BROWSERBASE_PROJECT_ID matches dashboard |
|
||||
| Connection timeout | Check network/firewall allows outbound connections |
|
||||
| Session limit reached | Upgrade Browserbase plan or wait for sessions to expire |
|
||||
| Commands hang | Ensure previous session closed properly |
|
||||
|
||||
## Pricing Considerations
|
||||
|
||||
- Browserbase charges per session minute
|
||||
- Close sessions promptly with `agent-browser close`
|
||||
- Use local browser for development, cloud for CI/CD
|
||||
- Monitor usage in Browserbase dashboard
|
||||
|
||||
## Resources
|
||||
|
||||
- [Browserbase Documentation](https://docs.browserbase.com/)
|
||||
- [Browserbase Dashboard](https://browserbase.com/dashboard)
|
||||
- [agent-browser GitHub](https://github.com/vercel-labs/agent-browser)
|
||||
123
.opencode/skills/ai-artist/SKILL.md
Normal file
123
.opencode/skills/ai-artist/SKILL.md
Normal file
@@ -0,0 +1,123 @@
|
||||
---
|
||||
name: ck:ai-artist
|
||||
description: "Generate images via Nano Banana with 129 curated prompts. Mandatory validation interview refines style/mood/colors (use --skip to bypass). 3 modes: search, creative, wild. Styles: Ukiyo-e, Bento grid, cyberpunk, cinematic, vintage patent."
|
||||
metadata:
|
||||
author: claudekit
|
||||
version: 3.1.0
|
||||
argument-hint: "[concept] [--mode search|creative|wild|all] [--skip]"
|
||||
---
|
||||
|
||||
# AI Artist - Nano Banana Image Generation
|
||||
|
||||
Generate images using 129 curated prompts from awesome-nano-banana-pro-prompts collection.
|
||||
|
||||
**Validation interview is mandatory** (use `--skip` to bypass).
|
||||
|
||||
## Workflow
|
||||
|
||||
**IMPORTANT:** Follow `references/validation-workflow.md` when this skill is activated.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
python3 scripts/generate.py "<concept>" -o <output.png> [--mode MODE]
|
||||
```
|
||||
|
||||
### Generation Modes
|
||||
|
||||
| Mode | Description |
|
||||
|------|-------------|
|
||||
| `search` | Find best matching prompt from 129 curated prompts (default) |
|
||||
| `creative` | Remix elements from top 3 matching prompts |
|
||||
| `wild` | Out-of-the-box creative interpretation (random style transform) |
|
||||
| `all` | Generate all 3 variations |
|
||||
|
||||
### Examples
|
||||
|
||||
```bash
|
||||
# Default search mode
|
||||
python3 scripts/generate.py "tech conference banner" -o banner.png -ar 16:9
|
||||
|
||||
# Creative remix (combines multiple prompts)
|
||||
python3 scripts/generate.py "AI workshop" -o workshop.png --mode creative
|
||||
|
||||
# Wild/experimental (random artistic transformation)
|
||||
python3 scripts/generate.py "product showcase" -o product.png --mode wild
|
||||
|
||||
# Generate all 3 variations at once
|
||||
python3 scripts/generate.py "futuristic city" -o city.png --mode all -v
|
||||
```
|
||||
|
||||
### Options
|
||||
|
||||
| Flag | Description |
|
||||
|------|-------------|
|
||||
| `-o, --output` | Output path (required) |
|
||||
| `-m, --mode` | search, creative, wild, or all |
|
||||
| `-ar, --aspect-ratio` | 1:1, 16:9, 9:16, etc. |
|
||||
| `--model` | flash2 (default, fast+quality), flash (previous), pro (quality/4K) |
|
||||
| `-v, --verbose` | Show matched prompts and details |
|
||||
| `--dry-run` | Show prompt without generating |
|
||||
| `--skip` | Bypass validation interview |
|
||||
|
||||
---
|
||||
|
||||
## Prompt Database
|
||||
|
||||
**129 curated prompts** extracted from awesome-nano-banana-pro-prompts:
|
||||
|
||||
```bash
|
||||
# Search prompts
|
||||
python3 scripts/search.py "<query>" --domain awesome
|
||||
|
||||
# View all prompts
|
||||
cat data/awesome-prompts.csv
|
||||
```
|
||||
|
||||
### Categories include:
|
||||
- **Profile/Avatar**: Thought-leader headshots, mirror selfies
|
||||
- **Infographics**: Bento grid, chalkboard, ingredient labels
|
||||
- **Social Media**: Quote cards, banners, thumbnails
|
||||
- **Product**: Commercial shots, e-commerce, Apple-style
|
||||
- **Artistic**: Ukiyo-e, patent documents, vaporwave, cyberpunk
|
||||
- **Character**: Anime, chibi, comic storyboards
|
||||
|
||||
---
|
||||
|
||||
## Wild Mode Transformations
|
||||
|
||||
The `wild` mode randomly applies one of these artistic transformations:
|
||||
|
||||
- Japanese Ukiyo-e woodblock print
|
||||
- Premium liquid glass Bento grid infographic
|
||||
- Vintage 1800s patent document
|
||||
- Surreal dreamscape with volumetric god rays
|
||||
- Cyberpunk neon aesthetic with holograms
|
||||
- Hand-drawn chalkboard explanation
|
||||
- Isometric 3D diorama
|
||||
- Cinematic movie poster
|
||||
- Vaporwave aesthetic with glitch effects
|
||||
- Apple-style product showcase
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
| Topic | File |
|
||||
|-------|------|
|
||||
| **Validation Workflow** | `references/validation-workflow.md` |
|
||||
| All Prompts | `data/awesome-prompts.csv` |
|
||||
| Nano Banana Guide | `references/nano-banana.md` |
|
||||
| Image Prompting | `references/image-prompting.md` |
|
||||
| Source | `references/awesome-nano-banana-pro-prompts.md` |
|
||||
|
||||
---
|
||||
|
||||
## Scripts
|
||||
|
||||
| Script | Purpose |
|
||||
|--------|---------|
|
||||
| `generate.py` | Main image generation with 3 modes |
|
||||
| `search.py` | Search prompts database |
|
||||
| `extract_prompts.py` | Extract prompts from markdown |
|
||||
| `core.py` | BM25 search engine |
|
||||
3592
.opencode/skills/ai-artist/data/awesome-prompts.csv
Normal file
3592
.opencode/skills/ai-artist/data/awesome-prompts.csv
Normal file
File diff suppressed because it is too large
Load Diff
19
.opencode/skills/ai-artist/data/lighting.csv
Normal file
19
.opencode/skills/ai-artist/data/lighting.csv
Normal file
@@ -0,0 +1,19 @@
|
||||
STT,Lighting Type,Category,Keywords,Description,Mood,Best For,Prompt Keywords,Technical Notes
|
||||
1,Golden Hour,Natural,"golden, sunset, warm, soft, magic hour, golden light","Warm directional sunlight during first/last hour of day","Romantic, warm, dreamy, nostalgic","Portraits, landscapes, lifestyle","golden hour lighting, warm sunset light, magic hour, soft directional sunlight","Sun 15-20° above horizon; warm color temp ~3000K; long shadows"
|
||||
2,Blue Hour,Natural,"blue, twilight, dusk, dawn, cool, moody, serene","Cool ambient light just after sunset or before sunrise","Calm, mysterious, ethereal, melancholic","Cityscapes, moody portraits, atmospheric scenes","blue hour, twilight lighting, cool ambient light, pre-dawn atmosphere","No direct sun; sky as diffused light source; ~4000-6000K; city lights mix well"
|
||||
3,Overcast,Natural,"overcast, cloudy, diffused, soft, even, flat","Evenly diffused light from cloud cover","Neutral, natural, intimate, documentary","Portraits, products, nature close-ups","overcast lighting, soft diffused daylight, cloudy day light","Giant softbox effect; minimal shadows; color temp ~6500K; flattering for skin"
|
||||
4,Harsh Midday,Natural,"harsh, midday, direct, high contrast, hard shadows","Direct overhead sunlight with strong shadows","Dramatic, bold, stark, energetic","Fashion, architectural, dramatic scenes","harsh midday sun, direct overhead lighting, high contrast sunlight","Sun directly overhead; hard shadows under features; challenging for portraits"
|
||||
5,Rembrandt,Portrait,"rembrandt, dramatic, chiaroscuro, triangle, classic","Single light source creating triangle on cheek","Dramatic, artistic, classic, serious","Dramatic portraits, fine art, character studies","Rembrandt lighting, triangle light on cheek, chiaroscuro, single source dramatic","45° angle, slightly above; small triangle under eye on shadow side"
|
||||
6,Butterfly,Portrait,"butterfly, paramount, glamour, beauty, hollywood, classic","Light directly in front and above subject","Glamorous, elegant, flattering, hollywood","Beauty shots, fashion, headshots","butterfly lighting, paramount lighting, beauty light, overhead frontal","Creates shadow under nose; symmetrical; very flattering; fill from below optional"
|
||||
7,Split,Portrait,"split, half lit, dramatic, mysterious, moody","Light illuminating exactly half the face","Mysterious, dramatic, intense, artistic","Dramatic portraits, conceptual, film noir","split lighting, half-face illumination, dramatic side lighting","Light at 90° to face; maximum drama; villain lighting; strong emotion"
|
||||
8,Loop,Portrait,"loop, natural, versatile, slight shadow, classic","Slight shadow from nose toward cheek","Natural, approachable, versatile, professional","Professional headshots, corporate, general portraits","loop lighting, natural portrait light, slight nose shadow","Between butterfly and Rembrandt; 30-45° from camera; most versatile"
|
||||
9,Rim Light,Accent,"rim, back, edge, separation, halo, outline","Light from behind creating edge highlight","Dramatic, separated, defined, ethereal","Silhouettes, product separation, dramatic portraits","rim lighting, backlight, edge light, hair light, subject separation","Behind subject; creates outline; great for hair; adds depth"
|
||||
10,Volumetric,Atmospheric,"volumetric, god rays, light rays, foggy, atmospheric","Visible light rays in atmosphere","Mystical, spiritual, dramatic, cinematic","Forests, churches, dramatic scenes, fantasy","volumetric lighting, god rays, light beams, atmospheric fog lighting","Requires particles/fog; directional light source; dramatic effect"
|
||||
11,Neon,Artificial,"neon, cyberpunk, colorful, glow, synthetic, urban","Colored artificial light sources creating glow","Futuristic, edgy, urban, energetic","Cyberpunk, night scenes, portraits, gaming","neon lighting, cyberpunk glow, colored light sources, neon signs","Multiple color sources; pink/cyan common; creates color mixing on skin"
|
||||
12,Studio Softbox,Studio,"softbox, studio, professional, even, controlled","Large diffused artificial light source","Professional, clean, commercial, polished","Product shots, headshots, e-commerce","studio softbox lighting, professional studio light, soft even illumination","Large source = soft light; multiple setups possible; controllable"
|
||||
13,Ring Light,Studio,"ring light, even, beauty, catchlight, influencer","Circular light source around camera lens","Modern, clean, beauty, social media","Beauty content, vlogs, product reviews, selfies","ring light, circular catchlight, even facial illumination","Creates distinctive circular catchlight in eyes; very even; popular for influencers"
|
||||
14,Natural Window,Indoor,"window, natural indoor, side light, soft ambient","Daylight coming through windows","Intimate, natural, homey, authentic","Lifestyle, portraits, indoor scenes","natural window light, side lighting from window, soft indoor daylight","Direction and quality vary with time; curtains diffuse; very flattering"
|
||||
15,Low Key,Mood,"low key, dark, moody, shadows, dramatic, noir","Mostly shadow with selective illumination","Mysterious, dramatic, artistic, moody","Film noir, dramatic portraits, conceptual","low key lighting, dramatic shadows, selective illumination, dark mood","High contrast; dark background; minimal fill; emphasis on shadows"
|
||||
16,High Key,Mood,"high key, bright, clean, minimal shadows, airy","Bright even illumination with minimal shadows","Clean, optimistic, fresh, commercial","Commercial, beauty, product, children","high key lighting, bright even illumination, minimal shadows, clean white","Multiple light sources; white/light backgrounds; reduced contrast"
|
||||
17,Practical,Scene,"practical, motivated, realistic, in-scene, diegetic","Light sources visible within the scene","Realistic, cinematic, grounded, authentic","Interior scenes, realistic settings, film","practical lighting, motivated light sources, in-scene illumination","Lamps, candles, screens as light sources; adds realism; matches setting"
|
||||
18,Color Gel,Creative,"gel, colored, creative, mood, theatrical","Artificially colored light for creative effect","Creative, emotional, theatrical, stylized","Creative portraits, music, events, artistic","colored gel lighting, theatrical colored light, mood lighting","Colors convey emotion; complementary or contrasting; theatrical effect"
|
||||
|
17
.opencode/skills/ai-artist/data/nano-banana-templates.csv
Normal file
17
.opencode/skills/ai-artist/data/nano-banana-templates.csv
Normal file
@@ -0,0 +1,17 @@
|
||||
Category,Template Name,Keywords,Prompt Template,Aspect Ratio,Tips
|
||||
Quote Card,Wide Quote Card Portrait,"quote,inspiration,motivational,portrait,typography","A wide quote card featuring a famous person, with a {background_color} background and a {font_color} serif font for the quote: ""{quote}"" and smaller text: ""—{author}."" There is a large, subtle quotation mark before the text. The portrait of the person is on the left, the text on the right. The text occupies two-thirds of the image and the portrait one-third, with a slight gradient transition effect on the portrait.",16:9,Use brown/earthy backgrounds with light-gold text for elegance
|
||||
Infographic,Bento Grid Product Infographic,"infographic,product,bento,glass,premium,educational","Premium liquid glass Bento grid product infographic with 8 modules. Product: {product}. Language: {language}. 1) Hero card (28-30%): Product photo/3D glass rendering. 2) Core Benefits: 4 key benefits with icons. 3) How to Use: 4 usage methods with icons. 4) Key Metrics: 5 data points. 5) Who It's For: 4 recommended + 3 caution groups. 6) Important Notes: 4 precautions. 7) Quick Reference: Specs/certifications. 8) Did You Know: 3 interesting facts. Background: Apple liquid glass cards (85-90% transparent). Ethereal macro/pattern/context background.",16:9,Use consistent icon style; ensure text legibility at mobile size
|
||||
Header Banner,Hand-drawn Blog Header,"header,banner,blog,hand-drawn,gradient,tech","A header image for a blog article where a person introduces ""{topic}"". Aspect ratio: horizontal 16:9. Style: simple, hand-drawn style, italic lines. Colors: {color1} and {color2} gradient. Title text: ""{title}"" in clean typography.",16:9,Keep space for text overlay; use complementary gradient colors
|
||||
Map,Watercolor Map with Labels,"map,watercolor,educational,geography,artistic","Generate a map of {country} in watercolor style, on which all {regions} are labeled in ballpoint pen handwriting. Soft color gradients for each region. Paper texture visible.",1:1,Use muted watercolor palette; ensure label readability
|
||||
Portrait,2x2 Grid Photo Studio,"portrait,grid,fashion,studio,magazine","High-end photo studio 2x2 grid photo. Top-left panel ({color1} background): Subject wears {outfit1}, holds prop with ""{text1}"". Top-right panel ({color2} background): Same person in {outfit2}, holds prop with ""{text2}"". Bottom-left panel ({color3} background): Same person in {outfit3}, holds prop with ""{text3}"". Bottom-right panel ({color4} background): Same person in {outfit4}, completing the composition. Clear makeup, bright ring light, 85mm lens, f/1.8 aperture, fashion magazine style. MUST maintain consistent facial features across all panels.",1:1,Maintain identity lock across panels; use complementary color scheme
|
||||
Patent,Vintage Patent Document,"patent,vintage,technical,document,invention","A vintage patent document for {invention}, styled after late 1800s United States Patent Office filings. Precise technical drawings with numbered callouts (Fig. 1, Fig. 2, Fig. 3) showing front, side, and exploded views. Handwritten annotations in fountain-pen ink describe mechanisms. Aged ivory paper with foxing stains and soft fold creases. Official embossed seal and red wax stamp in corner. Hand-signed inventor's name and date at bottom. Authoritative, historic, slightly mysterious feel.",3:4,Include multiple figure views; add authentic aging effects
|
||||
Chalkboard,Chalkboard News Summary,"educational,chalkboard,news,diagram,hand-written","Using the following content, summarize the information in a chalkboard-style, hand-written look. Break it down with diagrams and easy-to-understand expressions as if a teacher had written it. Topic: {topic}. Key points to cover: {points}. Include arrows, boxes, and visual connections between concepts.",16:9,Use chalk texture; include visual hierarchy with boxes and arrows
|
||||
Mirror Selfie,Otaku Room Mirror Selfie,"selfie,mirror,room,lifestyle,casual","Scene: Mirror selfie in an otaku-style room corner, {color} tone. Subject: {gender}, around {age}, {ethnicity}, {body_type}. Outfit: {clothing}. Room elements: {room_details}. Smartphone held for selfie via mirror. Natural lighting from window. Authentic lifestyle photography feel.",9:16,Include room context; ensure natural pose
|
||||
Style Transform,Ukiyo-e Modern Reimagining,"ukiyo-e,Japanese,woodblock,artistic,transformation","A Japanese Edo-period Ukiyo-e woodblock print reimagining {modern_scene}. Collaboration between masters like Hokusai and Hiroshige, reimagining modern technology through an ancient lens. Characters wear Edo-era kimono but perform modern actions. Tech transformation: {modern_item} becomes {traditional_item}. Composition: Flattened perspective, bold ink outlines. Texture: Wood grain, paper fibers, pigment bleeding. Colors: Prussian blue, vermilion red, muted ochre. Include vertical Japanese calligraphy and red artist seal.",3:4,Transform modern elements into period equivalents
|
||||
Social Banner,Tech Conference Announcement,"conference,tech,announcement,professional,corporate","A professional tech conference promotional image. Scene: Futuristic {venue_type} where humans and AI work together harmoniously. Holographic display shows ""{date}"". Color palette: deep {primary_color} as primary, electric {accent_color} as accent, pure white text elements. {style} aesthetic. Clean minimal design with subtle gradient lighting. Professional photography style, soft ambient lighting. Text overlay area at bottom for event details. NEVER include unwanted text. DO NOT add watermarks.",16:9,Reserve bottom 20% for text overlay; use corporate color scheme
|
||||
Product Hero,Premium Product on Surface,"product,commercial,studio,luxury,hero","A premium {product} positioned on {surface}. Materials: {material_finish} finish catching light. Lighting: {lighting_setup} creating {mood} mood. Camera: {angle} angle, 85mm lens, f/2.8. Background: {background_type}, slightly out of focus. Style: {style} commercial photography. Highlights on edges, subtle reflections. NEVER add text or watermarks.",1:1,Use 3-point lighting; include subtle reflections
|
||||
Character,Cyberpunk Character Portrait,"character,cyberpunk,neon,portrait,sci-fi","A {style} character portrait in cyberpunk setting. Subject: {description} with {distinctive_features}. Outfit: {outfit} with neon accents. Background: Rain-slicked street with holographic ads, neon signs in {colors}. Lighting: Neon rim light from behind, soft key light on face. Atmosphere: Moody, atmospheric fog. Camera: 85mm portrait lens, shallow DOF. STRICT identity lock if reference provided.",2:3,Use complementary neon colors; include atmospheric effects
|
||||
Food,Overhead Food Photography,"food,overhead,culinary,lifestyle,restaurant","Overhead shot of {dish} on {surface}. Plating: {plating_style} presentation. Props: {props} arranged around main dish. Lighting: {lighting} creating appetizing shadows. Include human element: {human_element}. Steam/freshness visible. Color palette: {colors}. Style: {magazine} magazine quality. NEVER add text.",1:1,Include human hand or utensil for scale; show steam/freshness
|
||||
Architecture,Interior Visualization,"interior,architecture,modern,visualization,lifestyle","A {style} interior photograph of {room_type}. View: {view_angle} angle capturing full space. Materials: {materials} creating {atmosphere} feel. Lighting: {time_of_day} light through {window_type}. Furniture: {furniture_style} pieces. Decorative elements: {decor}. Camera: Wide angle architectural lens, everything in focus. NEVER include people unless specified.",16:9,Use natural lighting; show material textures
|
||||
Speaker,Conference Speaker Spotlight,"speaker,conference,professional,portrait,tech","Professional conference speaker promotional image. Subject: {demographics} presenting on stage or in modern setting. Background: Abstract flowing data visualizations, {topic}-related holographic elements. Color scheme: {primary}, {secondary}, {accent}. Lighting: Cinematic with soft key light on face, dramatic rim lighting. Modern corporate photography style. Clean space at bottom for speaker name/title overlay. NEVER include text in image.",1:1,Leave space for text overlay; use brand colors
|
||||
Event CTA,Event Registration Urgency,"event,registration,urgency,countdown,promotional","Dynamic call-to-action promotional image for {event_type}. Scene: Excited people entering modern {venue} with {brand} elements. Countdown visualization floating in air. Golden VIP pass/ticket element representing {benefit}. Limited seats visual metaphor. Color palette: urgent {color1}, premium {color2}, trust {color3}. Energy and excitement atmosphere with motion blur on crowd. Clean bottom area for registration CTA. NEVER include specific text.",1:1,Use motion blur for energy; include urgency elements
|
||||
|
11
.opencode/skills/ai-artist/data/platforms.csv
Normal file
11
.opencode/skills/ai-artist/data/platforms.csv
Normal file
@@ -0,0 +1,11 @@
|
||||
STT,Platform,Type,Keywords,Prompt Style,Key Parameters,Strengths,Limitations,Aspect Ratios,Best Practices
|
||||
1,Midjourney,Commercial,"midjourney, MJ, Discord, v6, stylize, chaos, artistic","[prompt] --ar 16:9 --style raw --v 6.1","--ar (aspect), --style (raw/default), --stylize (0-1000), --chaos (0-100), --weird (0-3000), --seed, --no","Artistic interpretation, consistent style, excellent composition, great for concepts","No API, Discord-only, limited control, no inpainting in v6","1:1, 16:9, 9:16, 4:3, 3:2, 21:9, 2:3","Multi-prompt weighting cat::2; use /describe for reverse prompting; --style raw for photorealism"
|
||||
2,DALL-E 3,Commercial,"dalle, dall-e, openai, gpt-4, natural language, API","Natural language description without parameters. Be descriptive, conversational.","HD quality (in prompt), vivid style (in prompt), natural size (in prompt)","Excellent text rendering, natural language understanding, API access, safety guardrails","Limited style control, no parameters, no negative prompts, can refuse prompts","1024x1024, 1792x1024, 1024x1792","Write like describing to a human; specify text content, font, placement explicitly; avoid keyword lists"
|
||||
3,Stable Diffusion,Open Source,"SD, SDXL, ComfyUI, A1111, local, open source, LoRA","(important:1.3), normal, (less:0.8) + Negative: ugly, blurry, deformed","CFG Scale (7-12), Sampler (DPM++), Steps (20-50), LoRA, Embeddings, Weights (word:1.2)","Full control, local/private, LoRAs, inpainting, ControlNet, customizable","Learning curve, requires hardware, quality varies by model","Custom any ratio","Use (word:1.2) for emphasis; negative prompts essential; CFG 7-12; DPM++ 2M Karras sampler"
|
||||
4,Flux,Open Source,"flux, schnell, dev, pro, BFL, open source, fast","Natural language, weighted prompts, --guidance scale","--guidance (strength), aspect ratio in prompt","Fast generation, good quality, natural prompts, open weights","Newer platform, fewer resources, limited community models","Various via prompt","Use natural descriptions; specify style directly; guidance scale 3.5 for balanced results"
|
||||
5,Nano Banana Pro,Google,"nano banana, gemini, google, imagen, multimodal, text rendering","Narrative paragraphs. 32K context. ALL CAPS emphasis. Hex colors #9F2B68.","aspect_ratio (1:1 to 21:9), image_size (1K/2K/4K), responseModalities","Best text rendering, multimodal input (14 images), search grounding, thinking mode","Newer platform, learning curve, style consistency","1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9","Narrative > keywords; ALL CAPS for critical; hex colors for precision; NEVER for negatives; photography terms anchor quality"
|
||||
6,Imagen 4,Google,"imagen, google, photorealistic, high quality, commercial","Natural language, descriptive, aspect ratio in text","Quality level, aspect ratio specified in prompt text","Photorealistic quality, good text rendering, commercial use","Limited style range, newer platform","Various via prompt","Be descriptive; specify aspect in prompt text; use photography terminology"
|
||||
7,Veo 3.1,Google,"veo, video, google, AI video, motion, cinematography","Descriptive cinematography language, camera movements, scene transitions","Duration, camera movements (pan, tilt, dolly), scene transitions (cut, fade)","Video generation, cinematography understanding, smooth motion","Video-only, newer, generation time","16:9, 9:16","Use cinematography keywords; describe camera movements explicitly; include scene transitions"
|
||||
8,Ideogram,Commercial,"ideogram, text, typography, logo, creative, accurate text","Natural language with emphasis on text content and styling","Aspect ratio, magic prompt (on/off), style type","Excellent typography, good for logos, creative designs","Fewer style options, focused on text/design use cases","1:1, 16:9, 9:16, 4:3, 3:4","Describe text content precisely; specify font characteristics; great for logos and typography"
|
||||
9,Leonardo AI,Commercial,"leonardo, AI, creative, finetune, custom models, game assets","Natural language + negative prompts, model selection","Models, Alchemy, PhotoReal, Fidelity, Contrast, seed","Game assets, custom model training, good controls, consistent style","Subscription tiers, model selection complexity","1:1, 16:9, 9:16, others","Use Alchemy for enhanced results; PhotoReal for photography; explore different models for styles"
|
||||
10,Adobe Firefly,Commercial,"firefly, adobe, creative cloud, commercial safe, enterprise","Natural language, style references, structure references","Style intensity, effects, structure reference","Commercially safe, Adobe integration, reference images, good for design","Limited to Adobe ecosystem, conservative outputs","Various","Use for commercial projects; leverage style references; integrates with Photoshop/Illustrator"
|
||||
|
26
.opencode/skills/ai-artist/data/styles.csv
Normal file
26
.opencode/skills/ai-artist/data/styles.csv
Normal file
@@ -0,0 +1,26 @@
|
||||
STT,Style Name,Category,Keywords,Description,Key Characteristics,Color Palette,Best For,Platforms,Prompt Keywords
|
||||
1,Photorealistic,Photography,"photo, realistic, camera, natural, authentic, real, photography","Ultra-realistic images that mimic professional photography","Natural lighting, authentic textures, camera-specific artifacts, realistic proportions","Natural colors, realistic skin tones, environmental colors","Portraits, product shots, scenes","Nano Banana, Midjourney, SD","photorealistic, ultra realistic, 8K, RAW photo, natural lighting, professional photography"
|
||||
2,Cinematic,Photography,"film, movie, cinematic, dramatic, theatrical, hollywood, widescreen","Film-like imagery with dramatic lighting and composition","Anamorphic lens effects, color grading, shallow DOF, dramatic lighting, widescreen","Teal-orange, desaturated, film stock colors","Movie scenes, dramatic portraits, storytelling","Midjourney, Nano Banana","cinematic, film still, anamorphic, color graded, shallow depth of field, dramatic lighting"
|
||||
3,Anime Manga,Illustration,"anime, manga, Japanese, otaku, animated, cel-shaded","Japanese animation and comic style artwork","Large expressive eyes, stylized proportions, cel shading, dynamic poses, speed lines","Vibrant, saturated, anime-typical palettes","Characters, fan art, storytelling, avatars","Midjourney, SD, Nano Banana","anime style, manga, cel shaded, Japanese animation, vibrant colors, expressive eyes"
|
||||
4,3D Render,Digital,"3D, render, CGI, Blender, Cinema4D, octane, dimensional","Computer-generated 3D imagery with realistic or stylized rendering","Material reflections, global illumination, subsurface scattering, hard/soft shadows","Scene-dependent, PBR materials","Products, characters, architecture, abstract","Midjourney, SD, Nano Banana","3D render, octane render, Cinema4D, Blender, CGI, ray tracing, PBR materials"
|
||||
5,Illustration,Art,"illustrated, drawn, digital art, vector, graphic, artistic","Digital or traditional illustration styles","Clean lines, stylized forms, artistic interpretation, consistent style","Style-dependent, often bold or pastel","Editorial, children's books, branding","Midjourney, DALL-E, SD","illustration, digital art, illustrated, artistic, stylized, graphic design"
|
||||
6,Pixel Art,Retro,"pixel, 8-bit, 16-bit, retro, game, sprite, nostalgic","Retro video game aesthetic with visible pixels","Grid-based, limited color palette, dithering, nostalgic feel, sprite-like","Limited palette, 8-bit/16-bit colors","Games, avatars, nostalgic content","Midjourney, SD","pixel art, 8-bit, 16-bit, retro gaming, sprite, pixelated"
|
||||
7,Watercolor,Traditional,"watercolor, paint, artistic, soft, flowing, traditional, aquarelle","Traditional watercolor painting aesthetic","Soft edges, color bleeds, paper texture, transparent layers, organic flow","Soft, flowing, often pastel or earthy","Portraits, landscapes, artistic content","Midjourney, SD, DALL-E","watercolor painting, aquarelle, soft edges, color bleed, paper texture, artistic"
|
||||
8,Oil Painting,Traditional,"oil, painting, classical, fine art, canvas, masterpiece, textured","Classical oil painting technique","Visible brushstrokes, rich textures, layered colors, impasto, chiaroscuro","Rich, deep, Old Master palette","Portraits, landscapes, fine art","Midjourney, SD, DALL-E","oil painting, impasto, brushstrokes, classical art, canvas texture, fine art"
|
||||
9,Sketch Line Art,Drawing,"sketch, line art, drawing, pencil, ink, minimal, outlined","Hand-drawn sketch or line art style","Clean or rough lines, minimal shading, crosshatching, pen/pencil textures","Monochrome, limited color","Concepts, technical, artistic","Midjourney, SD, DALL-E","sketch, line art, pencil drawing, ink drawing, hand-drawn, crosshatching"
|
||||
10,Ink Chinese Style,Traditional,"ink wash, Chinese, Japanese, sumi-e, brush, traditional, Eastern","East Asian ink wash painting technique","Brush strokes, ink gradients, negative space, calligraphic elements, minimalism","Black ink, subtle grays, minimal color accents","Nature, calligraphy, cultural art","Midjourney, SD","ink wash, sumi-e, Chinese painting, brush strokes, traditional Asian art"
|
||||
11,Chibi Q-Style,Illustration,"chibi, kawaii, cute, SD, super deformed, adorable, tiny","Super-deformed cute character style","Oversized head, small body, simple features, kawaii expressions, round shapes","Bright, pastel, candy colors","Mascots, stickers, merchandise","Midjourney, SD","chibi, kawaii, super deformed, cute, big head, small body, adorable"
|
||||
12,Isometric,Technical,"isometric, flat, geometric, technical, diagram, 30-degree","Isometric projection technical illustration","30-degree angles, no vanishing point, geometric precision, clean lines","Often bold, flat colors","Infographics, games, technical","Midjourney, SD, DALL-E","isometric, isometric view, 30 degree angle, flat design, geometric, technical illustration"
|
||||
13,Cyberpunk Sci-Fi,Genre,"cyberpunk, neon, future, sci-fi, dystopian, tech noir, blade runner","Futuristic dystopian aesthetic","Neon lights, rain-slicked streets, high-tech low-life, holographics, chrome","Neon pink, cyan, purple on dark","Sci-fi, gaming, urban","Midjourney, SD, Nano Banana","cyberpunk, neon lights, dystopian future, sci-fi, blade runner, tech noir"
|
||||
14,Retro Vintage,Nostalgic,"retro, vintage, old school, nostalgia, classic, throwback, period","Nostalgic imagery from past eras","Era-specific aesthetics, grain/texture, period-accurate colors, vintage wear","Muted, sepia, era-specific palettes","Period pieces, nostalgia marketing","Midjourney, DALL-E, SD","vintage, retro, 1950s/60s/70s/80s, nostalgic, old school, period accurate"
|
||||
15,Minimalism,Modern,"minimal, clean, simple, modern, white space, essential, less","Clean, essential design with maximum white space","Simple forms, limited elements, high contrast, essential details only","Monochrome, limited accent colors","Modern branding, UI/UX, editorial","Midjourney, DALL-E","minimalist, minimal, clean design, simple, white space, essential"
|
||||
16,Vaporwave,Aesthetic,"vaporwave, aesthetic, 90s, glitch, synthwave, retro-futuristic, nostalgic","90s-inspired surreal aesthetic","Glitch effects, Roman statues, gradient skies, grid patterns, Japanese text","Pink, purple, cyan, sunset gradients","Music, artistic, aesthetic content","Midjourney, SD","vaporwave, aesthetic, synthwave, 90s, glitch, roman statue, sunset gradient"
|
||||
17,Glassmorphism,UI,"glass, frosted, transparent, blur, modern, UI, Apple","Frosted glass UI aesthetic","Transparency, backdrop blur, subtle borders, layered depth, vibrant backgrounds","Translucent white on vibrant BG","UI mockups, modern design","Nano Banana, Midjourney","glassmorphism, frosted glass, backdrop blur, translucent, modern UI"
|
||||
18,Claymorphism,UI,"clay, soft, 3D, playful, rounded, bubbly, toy-like","Soft 3D clay-like UI aesthetic","Rounded forms, soft shadows, pastel colors, toy-like appearance, friendly","Soft pastels, warm tones","Friendly apps, children's content","Midjourney, SD","claymorphism, clay 3D, soft shadows, rounded, pastel, playful"
|
||||
19,Portrait Photography,Photography,"portrait, headshot, face, professional, fashion, editorial, beauty","Professional portrait photography","Lighting setups (Rembrandt, butterfly), skin detail, eye focus, composition","Natural skin tones, background depends on style","Portraits, headshots, fashion","Nano Banana, Midjourney","portrait photography, professional headshot, studio lighting, 85mm lens, bokeh"
|
||||
20,Fashion Editorial,Photography,"fashion, editorial, vogue, model, haute couture, magazine, runway","High fashion magazine photography","Dramatic poses, designer clothing, artistic lighting, editorial composition","High fashion palettes, often bold or muted","Fashion, luxury, editorial","Midjourney, Nano Banana","fashion editorial, vogue, haute couture, model photography, dramatic lighting"
|
||||
21,Product Commercial,Photography,"product, commercial, advertising, studio, hero shot, professional","Professional product photography","Clean isolation, material accuracy, controlled lighting, hero angles","Product-accurate, often neutral BG","E-commerce, advertising","Nano Banana, Midjourney, SD","product photography, commercial, studio lighting, white background, hero shot"
|
||||
22,Landscape Nature,Photography,"landscape, nature, outdoor, scenic, wilderness, vista, environment","Natural landscape photography","Wide vistas, natural lighting, composition rules, atmospheric effects","Natural greens, blues, earth tones","Travel, nature, wallpapers","Midjourney, SD, Nano Banana","landscape photography, nature, scenic, golden hour, wide angle, vista"
|
||||
23,Street Documentary,Photography,"street, documentary, candid, urban, lifestyle, real, authentic","Candid street and documentary photography","Authentic moments, urban settings, available light, spontaneous composition","Natural, urban colors","Lifestyle, journalism, authentic","Midjourney, Nano Banana","street photography, documentary, candid, urban, authentic, lifestyle"
|
||||
24,Macro Close-up,Photography,"macro, close-up, detail, micro, extreme detail, texture","Extreme close-up detail photography","Shallow DOF, extreme detail, texture visibility, scale distortion","Subject-dependent","Products, nature, textures","Midjourney, SD, Nano Banana","macro photography, close-up, extreme detail, shallow depth of field, texture"
|
||||
25,Ukiyo-e,Traditional,"ukiyo-e, Japanese, woodblock, Hokusai, Hiroshige, Edo, waves","Traditional Japanese woodblock print style","Flat colors, bold outlines, wave patterns, woodgrain texture, calligraphy","Prussian blue, vermilion, muted ochre","Japanese themes, artistic","Midjourney, SD","ukiyo-e, Japanese woodblock print, Hokusai, flat colors, bold outlines"
|
||||
|
19
.opencode/skills/ai-artist/data/techniques.csv
Normal file
19
.opencode/skills/ai-artist/data/techniques.csv
Normal file
@@ -0,0 +1,19 @@
|
||||
STT,Technique,Category,Keywords,Description,When to Use,Syntax Example,Platforms,Tips
|
||||
1,Emphasis Capitalization,Formatting,"caps, emphasis, important, critical, priority, MUST","Use ALL CAPS for critical requirements that must not be ignored","When specific elements are non-negotiable","MUST include blue sky. NEVER add watermarks. DO NOT modify face.","Nano Banana, DALL-E","Use sparingly - 2-3 emphasis points max; overuse dilutes impact"
|
||||
2,Weighted Prompting,Control,"weight, emphasis, priority, stronger, weaker, balance","Assign numerical weights to prompt terms for relative importance","When balancing multiple elements or emphasizing specific aspects","(important term:1.3), (less important:0.7) OR cat::2 dog::1","SD, Flux, Midjourney","1.0 is default; 1.2-1.5 for emphasis; 0.5-0.8 to reduce; Midjourney uses :: syntax"
|
||||
3,Negative Prompts,Refinement,"negative, avoid, exclude, remove, no, without, ban","Specify what to exclude from the generated image","To prevent common artifacts or unwanted elements","Negative: ugly, blurry, deformed, watermark, extra limbs","SD, Flux, Leonardo","Essential for SD/Flux; organize by category (anatomy, quality, style); keep concise"
|
||||
4,Multi-Image Reference,Identity,"reference, identity lock, face match, style transfer, consistency","Use multiple images to maintain subject identity or style","For consistent character generation or style matching","Upload 6 object + 5 human refs (Nano Banana up to 14 images)","Nano Banana, Midjourney, SD","Collage multiple refs into single image for efficiency; specify which image for what purpose"
|
||||
5,JSON Structured Prompts,Organization,"JSON, structured, organized, layered, metadata, schema","Use JSON format for complex, multi-layer prompt organization","For detailed scene descriptions with many parameters","{ ""subject"": {}, ""environment"": {}, ""camera"": {}, ""lighting"": {} }","Nano Banana, any","Helps organize complex prompts; include meta_data, subject_layer, environment_layer, composition_and_tech"
|
||||
6,Narrative Description,Natural,"narrative, paragraph, story, descriptive, natural language","Write prompts as flowing paragraphs like briefing a photographer","For models that understand natural language well (32K+ context)","A professional portrait of a woman in her 30s, standing in an urban setting at golden hour...","Nano Banana, DALL-E","More effective than keyword lists for Gemini; include context, mood, and intention"
|
||||
7,Photography Anchoring,Quality,"photography, camera, lens, DSLR, professional, shot","Use photography terms to anchor quality and realism","For photorealistic results and specific camera aesthetics","Captured with Canon EOS 90D, 85mm lens, f/1.8, shallow DOF, natural lighting","All platforms","Include camera model, lens, aperture, lighting setup; triggers photorealistic training data"
|
||||
8,Color Precision,Accuracy,"hex, color code, precise color, #, RGB, specific color","Use hex color codes instead of vague color names","When exact colors are critical","Background: #9F2B68 (dark magenta) instead of 'dark magenta'","Nano Banana, any","Hex codes are unambiguous; include color name for context; use for brand colors"
|
||||
9,Aspect Ratio Control,Composition,"aspect ratio, ar, dimensions, ratio, format, orientation","Specify exact aspect ratio for output dimensions","When specific dimensions are required","--ar 16:9 (Midjourney) OR aspect_ratio='16:9' (Nano Banana)","All platforms","Match platform requirements; 1:1 social, 9:16 stories, 16:9 widescreen, 21:9 cinematic"
|
||||
10,Iterative Refinement,Workflow,"iteration, refine, version, variation, A/B test, improve","Generate variations and refine based on results","For achieving optimal results through multiple generations","Generate 4 variations → Select best → Refine weights → Upscale","All platforms","Start broad, narrow down; save seeds for reproducibility; document successful prompts"
|
||||
11,Scene Layering,Composition,"foreground, midground, background, layers, depth, z-index","Describe scene in distinct depth layers","For complex scenes with depth","Foreground: cherry blossoms. Midground: person walking. Background: Mount Fuji at sunset.","All platforms","Creates visual depth; helps AI understand spatial relationships"
|
||||
12,Style Mixing,Creative,"blend, mix, fusion, combine, hybrid, mashup","Combine multiple artistic styles","For unique hybrid aesthetics","Art nouveau meets cyberpunk, or '80s synthwave crossed with ukiyo-e woodblock print","Midjourney, SD, Nano Banana","Use 'meets', 'crossed with', 'fusion of'; weight styles if needed"
|
||||
13,Negative Constraints,Safety,"NEVER, avoid, exclude, prohibit, ban, NO","Explicit statements of what must not appear","To prevent unwanted content or artifacts","NEVER include text or watermarks. DO NOT add any UI elements. Avoid red tones.","Nano Banana, DALL-E","More effective than negative prompts for some models; be specific and explicit"
|
||||
14,Camera Movement (Video),Motion,"pan, tilt, dolly, tracking, crane, zoom, motion","Specify camera movements for video generation","For AI video generation","Slow dolly forward, then gentle pan right revealing the landscape. Fade to next scene.","Veo, video platforms","Use cinematography terms; specify speed and direction; include transitions"
|
||||
15,Identity Lock,Consistency,"identity, face lock, same person, consistent, preserve","Techniques to maintain subject identity across generations","For consistent character representation","Use reference as EXACT facial reference. STRICT identity lock. NO face modification.","Nano Banana, SD (LoRA)","Upload clear reference; explicitly state preservation requirements; use JSON locks"
|
||||
16,Multi-Panel Composition,Layout,"panel, grid, collage, multi-image, split, diptych","Request images with multiple panels or sections","For comics, comparisons, before/after","2x2 grid showing: [top-left description], [top-right description], [bottom-left], [bottom-right]","Nano Banana, Midjourney","Describe each panel clearly; maintain consistency across panels; specify layout (2x2, 1x3, etc.)"
|
||||
17,Search Grounding,Real-time,"search, real-time, current, live data, grounding, factual","Enable real-time data for accurate current information","For current events, accurate data visualization","tools=[{'google_search': {}}] - enables real-time weather, charts, events","Nano Banana only","Use for infographics with current data; weather-accurate scenes; current event imagery"
|
||||
18,Thinking Mode,Complex,"thinking, reasoning, complex, planning, elaborate","Enable AI reasoning for complex compositions","For intricate scenes requiring planning","Enable thinking mode for complex multi-character scenes with specific interactions","Nano Banana Pro only","Response includes part.thought; use for complex logic like 'kittens MUST have eyes matching fur'"
|
||||
|
16
.opencode/skills/ai-artist/data/use-cases.csv
Normal file
16
.opencode/skills/ai-artist/data/use-cases.csv
Normal file
@@ -0,0 +1,16 @@
|
||||
STT,Use Case,Category,Keywords,Prompt Template,Key Elements,Best Platforms,Aspect Ratios,Tips,Example
|
||||
1,Profile Avatar,Portrait,"avatar, profile, headshot, professional, LinkedIn, social media, portrait","A professional {style} portrait of [subject], {background}, {lighting}. High-quality, sharp focus on eyes.","Subject appearance, background, lighting style, expression","Nano Banana, Midjourney, DALL-E","1:1, 4:5","Use reference image for identity lock; specify skin texture preference","Professional LinkedIn headshot with soft daylight, blurred office background"
|
||||
2,Social Media Post,Marketing,"social, Instagram, Twitter, viral, engagement, post, story, reel","A {style} image for social media showing [concept]. {composition}. Vibrant colors, eye-catching, shareable.","Main subject, brand colors, call-to-action elements, mobile-first composition","DALL-E, Midjourney, Nano Banana","1:1, 9:16, 4:5","Design for mobile viewing; use bold colors; include negative space for text overlay","Minimalist product flat-lay for Instagram with pastel background"
|
||||
3,YouTube Thumbnail,Marketing,"thumbnail, YouTube, clickbait, attention, video, preview, engaging","A {style} YouTube thumbnail showing [scene]. Bold text '{text}' in {font}. High contrast, {emotion} expression.","Subject with expressive face, bold readable text, contrasting colors, clear focal point","Nano Banana, Midjourney","16:9","Use surprised/excited expressions; limit text to 3-5 words; ensure 1080p quality","Shocked reaction face with glowing background and 'YOU WON'T BELIEVE THIS' text"
|
||||
4,Product Marketing,Commercial,"product, commercial, advertising, e-commerce, hero, marketing, lifestyle","A {style} product shot of [product] on {surface}. {lighting} creating {mood}. {camera_angle} view.","Product as hero, clean background, professional lighting, brand consistency","Nano Banana, DALL-E, SD","1:1, 4:3, 3:2","Use studio lighting terms; specify material reflections; include lifestyle context","Premium cosmetic bottle on marble surface with soft rim lighting"
|
||||
5,E-commerce Main Image,Commercial,"e-commerce, Amazon, Shopify, product, main image, white background, listing","A clean {style} product image of [product] on pure white background. Studio lighting, multiple angles available.","Product isolation, pure white (#FFFFFF) background, consistent lighting, accurate colors","Nano Banana, SD, Midjourney","1:1, 4:3","Match platform requirements; ensure color accuracy; remove all shadows or use soft drop shadow","Wireless earbuds case on pure white background with soft shadows"
|
||||
6,Infographic Visual,Educational,"infographic, educational, diagram, chart, visual, data, explainer","A {style} infographic about [topic]. Bento grid layout, {color_scheme} palette, icons for each section.","Clear hierarchy, data visualization, iconography, readable text, logical flow","Nano Banana, DALL-E","16:9, 4:3, 1:1","Use Bento grid for modules; limit to 8 sections max; ensure text legibility","Glass-style infographic about nutrition with 8 data modules"
|
||||
7,Comic Storyboard,Narrative,"comic, manga, storyboard, panel, sequence, narrative, story","A {panels}-panel {style} comic showing [story]. Consistent character design, speech bubbles, {mood} atmosphere.","Panel layout, character consistency, speech bubbles, visual flow, action lines","Midjourney, SD, Nano Banana","2:3, 3:4, 16:9","Maintain character identity across panels; use manga techniques for action; clear panel boundaries","4-panel manga sequence showing character discovering a secret message"
|
||||
8,Game Asset,Gaming,"game, asset, sprite, character, icon, item, UI, gaming","A {style} game asset: [item/character]. Transparent background ready, {view} view, consistent with {art_style} aesthetic.","Isolation-ready design, consistent art style, multiple states/angles, scalable","SD, Midjourney","1:1, varies","Design for transparency; create sprite sheets; maintain style consistency across assets","Pixel art treasure chest with open/closed states"
|
||||
9,Poster Flyer,Print,"poster, flyer, print, event, promotional, announcement, advertising","A {style} poster for [event/product]. Headline: '{headline}', {layout} composition, {color_scheme} colors.","Typography hierarchy, visual hook, brand elements, print-ready composition","DALL-E, Midjourney, Nano Banana","3:4, 2:3, A4","Use CMYK-safe colors; ensure 300dpi concept; include bleed area concept","Vintage music festival poster with psychedelic typography and sunset gradient"
|
||||
10,App Web Design,UI/UX,"app, web, UI, UX, mockup, interface, screen, dashboard","A {style} {device} mockup showing [app/website]. {color_scheme} palette, {layout} layout, modern UI elements.","Device frame, UI components, realistic content, responsive hints","Nano Banana, Midjourney, DALL-E","varies by device","Show realistic content; use design system patterns; include status bar details","iPhone mockup showing a meditation app with glassmorphism cards"
|
||||
11,Quote Card,Social,"quote, inspiration, motivational, card, typography, wisdom","A {style} quote card with text: '{quote}' — {author}. {background} background, {font_style} typography.","Quote text, author attribution, decorative elements, readable font","Nano Banana, DALL-E","1:1, 4:5, 16:9","Use large quotation marks; ensure text contrast; add subtle decorative elements","Brown gradient quote card with Steve Jobs quote and portrait silhouette"
|
||||
12,Header Banner,Marketing,"header, banner, hero, website, blog, article, cover","A {style} header image for [topic]. {composition}, {mood} atmosphere, horizontal format ready for text overlay.","Wide aspect ratio, focal point consideration, text-safe zones, brand alignment","DALL-E, Midjourney, Nano Banana","16:9, 21:9, 3:1","Leave negative space for text; ensure horizontal composition; consider responsive cropping","Hand-drawn style header for tech blog with blue-green gradient"
|
||||
13,Character Design,Creative,"character, OC, original, design, concept, persona, avatar","A {style} character design of [description]. {pose} pose, {outfit} outfit, {personality} expression.","Full body or bust, consistent proportions, outfit details, personality expression","Midjourney, SD, Nano Banana","2:3, 3:4, 1:1","Include turnaround views; specify body proportions; describe personality through visual cues","Anime-style female character with cyberpunk outfit and neon accessories"
|
||||
14,Architecture Interior,Visualization,"architecture, interior, room, space, design, rendering, visualization","A {style} {view} of [space type]. {materials} materials, {lighting} lighting, {atmosphere} atmosphere.","Spatial composition, material textures, lighting quality, furnishing details","Midjourney, SD","16:9, 4:3, 3:2","Use architectural photography terms; specify time of day; describe material finishes","Modern minimalist living room with floor-to-ceiling windows and warm afternoon light"
|
||||
15,Food Photography,Commercial,"food, culinary, recipe, dish, restaurant, cooking, delicious","A {style} food photo of [dish]. {plating} presentation, {lighting} lighting, {props} styling.","Hero dish focus, complementary props, appetizing lighting, styled setting","Nano Banana, Midjourney","1:1, 4:5, 4:3","Use overhead or 45-degree angles; include human elements for scale; describe steam/freshness","Overhead shot of artisan pizza with stretchy cheese pull and rustic wooden board"
|
||||
|
184
.opencode/skills/ai-artist/references/advanced-techniques.md
Normal file
184
.opencode/skills/ai-artist/references/advanced-techniques.md
Normal file
@@ -0,0 +1,184 @@
|
||||
# Advanced Prompt Engineering
|
||||
|
||||
## Prompt Optimization
|
||||
|
||||
### DSPy Framework
|
||||
Automatic prompt optimization through:
|
||||
1. Define task with input/output signatures
|
||||
2. Compile with optimizer (BootstrapFewShot, MIPRO)
|
||||
3. Model learns optimal prompting strategy
|
||||
4. Export optimized prompts for production
|
||||
|
||||
### Meta-Prompting
|
||||
```
|
||||
You are a prompt engineer. Create 5 variations for [task]:
|
||||
1. Direct instruction approach
|
||||
2. Role-based approach
|
||||
3. Few-shot example approach
|
||||
4. Chain of thought approach
|
||||
5. Constraint-focused approach
|
||||
|
||||
Evaluate each, select best.
|
||||
```
|
||||
|
||||
### Self-Refinement Loop
|
||||
```
|
||||
Generate: [Initial response]
|
||||
Critique: "What's wrong? Score 1-10."
|
||||
Refine: "Fix issues, improve score."
|
||||
Repeat until score ≥ 8.
|
||||
```
|
||||
|
||||
## Prompt Chaining
|
||||
|
||||
### Sequential Chain
|
||||
```
|
||||
Chain 1: [Input] → Extract key points
|
||||
Chain 2: Key points → Create outline
|
||||
Chain 3: Outline → Write draft
|
||||
Chain 4: Draft → Edit and polish
|
||||
```
|
||||
|
||||
### Parallel Chain
|
||||
Run independent subtasks simultaneously, merge results.
|
||||
|
||||
### Conditional Chain
|
||||
```
|
||||
If [condition A]: Execute prompt variant 1
|
||||
If [condition B]: Execute prompt variant 2
|
||||
Else: Execute default prompt
|
||||
```
|
||||
|
||||
### Loop Pattern
|
||||
```
|
||||
While not [success condition]:
|
||||
Generate attempt
|
||||
Evaluate against criteria
|
||||
If pass: break
|
||||
Else: refine with feedback
|
||||
```
|
||||
|
||||
## Evaluation Methods
|
||||
|
||||
### LLM-as-Judge
|
||||
```
|
||||
Rate this [output] on:
|
||||
1. Accuracy (1-10)
|
||||
2. Completeness (1-10)
|
||||
3. Clarity (1-10)
|
||||
4. Relevance (1-10)
|
||||
|
||||
Provide reasoning for each score.
|
||||
Final: Pass/Fail threshold = 7 average.
|
||||
```
|
||||
|
||||
### A/B Testing Protocol
|
||||
1. Single variable per test
|
||||
2. 20+ samples minimum
|
||||
3. Score on defined criteria
|
||||
4. Statistical significance check (p < 0.05)
|
||||
5. Document winner, roll out
|
||||
|
||||
### Regression Testing
|
||||
- Maintain test set of critical examples
|
||||
- Run before deploying prompt changes
|
||||
- Compare scores to baseline
|
||||
- Block deployment if regression detected
|
||||
|
||||
## Agent Prompting
|
||||
|
||||
### Tool Use Design
|
||||
```
|
||||
You have access to these tools:
|
||||
- search(query): Search the web
|
||||
- calculate(expression): Math operations
|
||||
- code(language, code): Execute code
|
||||
|
||||
To use: <tool_name>arguments</tool_name>
|
||||
Wait for result before continuing.
|
||||
```
|
||||
|
||||
### Planning Prompt
|
||||
```
|
||||
Task: [Complex goal]
|
||||
|
||||
Before acting:
|
||||
1. Break into subtasks
|
||||
2. Identify dependencies
|
||||
3. Plan execution order
|
||||
4. Note potential blockers
|
||||
|
||||
Then execute step by step.
|
||||
```
|
||||
|
||||
### Reflection Pattern
|
||||
```
|
||||
After each step:
|
||||
- What worked?
|
||||
- What didn't?
|
||||
- Adjust approach for next step.
|
||||
```
|
||||
|
||||
## Parameter Tuning
|
||||
|
||||
| Parameter | Low | High | Use Case |
|
||||
|-----------|-----|------|----------|
|
||||
| Temperature | 0.0-0.3 | 0.7-1.0 | Factual vs Creative |
|
||||
| Top-P | 0.8 | 0.95 | Focused vs Diverse |
|
||||
| Top-K | 10 | 100 | Conservative vs Exploratory |
|
||||
|
||||
**Rule**: Tune temperature first. Only adjust top-p if needed. Never both at once.
|
||||
|
||||
## Safety Patterns
|
||||
|
||||
### Output Filtering
|
||||
```
|
||||
Before responding, check:
|
||||
- No PII exposure
|
||||
- No harmful content
|
||||
- No policy violations
|
||||
- Aligned with guidelines
|
||||
|
||||
If any fail: "I can't help with that."
|
||||
```
|
||||
|
||||
### Jailbreak Prevention
|
||||
- Clear system boundaries upfront
|
||||
- Repeat constraints at end
|
||||
- "Ignore previous" pattern detection
|
||||
- Role-lock: "You are ONLY [role], never anything else"
|
||||
|
||||
### Confidence Calibration
|
||||
```
|
||||
For each claim, provide:
|
||||
- Confidence: High/Medium/Low
|
||||
- Source: [citation if available]
|
||||
- Caveat: [limitations]
|
||||
```
|
||||
|
||||
## Production Patterns
|
||||
|
||||
### Version Control
|
||||
- Git for prompt files
|
||||
- Semantic versioning (1.0.0, 1.1.0)
|
||||
- Changelog per version
|
||||
- Rollback capability
|
||||
|
||||
### Caching
|
||||
- Cache common queries
|
||||
- TTL based on content freshness
|
||||
- Invalidate on prompt update
|
||||
|
||||
### Fallbacks
|
||||
```
|
||||
Try: Primary prompt
|
||||
If fail: Simplified fallback prompt
|
||||
If still fail: Human escalation
|
||||
Log all failures for analysis.
|
||||
```
|
||||
|
||||
### Cost Optimization
|
||||
- Shorter prompts = fewer tokens
|
||||
- Remove redundant examples
|
||||
- Use smaller model for simple tasks
|
||||
- Batch similar requests
|
||||
File diff suppressed because it is too large
Load Diff
66
.opencode/skills/ai-artist/references/domain-code.md
Normal file
66
.opencode/skills/ai-artist/references/domain-code.md
Normal file
@@ -0,0 +1,66 @@
|
||||
# Code Generation Patterns
|
||||
|
||||
## Function Implementation
|
||||
```
|
||||
Write a [language] function:
|
||||
- Input: [params with types]
|
||||
- Output: [return type]
|
||||
- Behavior: [logic]
|
||||
|
||||
Requirements:
|
||||
- Edge cases: [list]
|
||||
- Error handling: [approach]
|
||||
- Performance: O([complexity])
|
||||
```
|
||||
|
||||
## Code Review
|
||||
```
|
||||
Review for:
|
||||
1. Bugs/logic errors
|
||||
2. Security vulnerabilities
|
||||
3. Performance issues
|
||||
4. Style violations
|
||||
5. Missing edge cases
|
||||
|
||||
Format: Issue, line number, severity, fix.
|
||||
```
|
||||
|
||||
## Refactoring
|
||||
```
|
||||
Refactor to:
|
||||
- [Improvement goal]
|
||||
- Maintain backward compatibility
|
||||
- Keep public API
|
||||
- Add comments for complex logic
|
||||
Show before/after.
|
||||
```
|
||||
|
||||
## Debugging
|
||||
```
|
||||
<error>[Error message/behavior]</error>
|
||||
<code>[Relevant code]</code>
|
||||
<context>[When it occurs]</context>
|
||||
|
||||
Analyze:
|
||||
1. Root cause
|
||||
2. Why it happens
|
||||
3. Fix with explanation
|
||||
4. Prevention strategy
|
||||
```
|
||||
|
||||
## Test Generation
|
||||
```
|
||||
Generate tests for [function/class]:
|
||||
- Framework: [jest/pytest/etc]
|
||||
- Coverage: happy path, edge cases, errors
|
||||
- Include: setup, assertion, cleanup
|
||||
- Mock: [external dependencies]
|
||||
```
|
||||
|
||||
## Documentation
|
||||
```
|
||||
Document this [function/class/API]:
|
||||
- Format: [JSDoc/docstring/OpenAPI]
|
||||
- Include: description, params, returns, examples
|
||||
- Note: edge cases, errors, deprecations
|
||||
```
|
||||
72
.opencode/skills/ai-artist/references/domain-data.md
Normal file
72
.opencode/skills/ai-artist/references/domain-data.md
Normal file
@@ -0,0 +1,72 @@
|
||||
# Data & Analysis Patterns
|
||||
|
||||
## Structured Extraction
|
||||
```
|
||||
Extract from text:
|
||||
<text>[content]</text>
|
||||
|
||||
Return JSON:
|
||||
{
|
||||
"field1": "value or null",
|
||||
"field2": ["array"]
|
||||
}
|
||||
|
||||
Rules:
|
||||
- Exact matches only
|
||||
- Confidence score if uncertain
|
||||
- null for missing
|
||||
```
|
||||
|
||||
## Document Analysis
|
||||
```
|
||||
Analyze [document type]:
|
||||
1. Summary (2-3 sentences)
|
||||
2. Key entities (people, orgs, dates)
|
||||
3. Main topics (ranked)
|
||||
4. Sentiment: positive/neutral/negative
|
||||
5. Action items
|
||||
```
|
||||
|
||||
## Comparison
|
||||
```
|
||||
Compare [A] and [B]:
|
||||
| Criterion | A | B |
|
||||
|-----------|---|---|
|
||||
| [Factor 1] | | |
|
||||
| [Factor 2] | | |
|
||||
|
||||
Recommendation: [choice] for [use case]
|
||||
```
|
||||
|
||||
## Problem Solving
|
||||
```
|
||||
Problem: [description]
|
||||
|
||||
Analyze:
|
||||
1. Root cause (5 whys)
|
||||
2. Contributing factors
|
||||
3. Options (pros/cons)
|
||||
4. Recommendation
|
||||
5. Implementation steps
|
||||
6. Risk mitigation
|
||||
```
|
||||
|
||||
## Data Transformation
|
||||
```
|
||||
Transform data:
|
||||
- Input format: [CSV/JSON/etc]
|
||||
- Output format: [target]
|
||||
- Rules: [mapping logic]
|
||||
- Validation: [constraints]
|
||||
|
||||
Handle: missing values, type mismatches.
|
||||
```
|
||||
|
||||
## Summarization
|
||||
```
|
||||
Summarize [content]:
|
||||
- Length: [sentences/words]
|
||||
- Focus: [key themes]
|
||||
- Audience: [technical/general]
|
||||
- Preserve: [critical details]
|
||||
```
|
||||
66
.opencode/skills/ai-artist/references/domain-marketing.md
Normal file
66
.opencode/skills/ai-artist/references/domain-marketing.md
Normal file
@@ -0,0 +1,66 @@
|
||||
# Marketing Copy Patterns
|
||||
|
||||
## Headlines
|
||||
```
|
||||
Write 5 headline variations for [product].
|
||||
Frameworks:
|
||||
1. How to [benefit]
|
||||
2. [Number] ways to [solve problem]
|
||||
3. The secret to [outcome]
|
||||
4. Why [audience] love [product]
|
||||
5. [Timeframe] to [transformation]
|
||||
```
|
||||
|
||||
## Product Descriptions
|
||||
```
|
||||
<product>[Name, features]</product>
|
||||
<audience>[Demographics, pain points]</audience>
|
||||
|
||||
Write description that:
|
||||
- Opens with benefit (not feature)
|
||||
- Addresses [main objection]
|
||||
- Social proof placeholder
|
||||
- Clear CTA
|
||||
- Tone: [brand voice]
|
||||
- Length: [word count]
|
||||
```
|
||||
|
||||
## Email Subject Lines
|
||||
```
|
||||
Generate 10 subject lines for [campaign].
|
||||
Mix approaches:
|
||||
- Curiosity gap
|
||||
- Urgency/scarcity
|
||||
- Personalization
|
||||
- Question format
|
||||
- Number/list
|
||||
Under 50 chars. Test 2-3 with emojis.
|
||||
```
|
||||
|
||||
## Ad Copy
|
||||
```
|
||||
Platform: [Google/Meta/LinkedIn]
|
||||
Objective: [awareness/conversion]
|
||||
Character limit: [limit]
|
||||
|
||||
Create [N] variations with:
|
||||
- Hook (first 5 words critical)
|
||||
- Value proposition
|
||||
- Social proof element
|
||||
- CTA matching platform norms
|
||||
```
|
||||
|
||||
## Landing Pages
|
||||
```
|
||||
<offer>[Product/service]</offer>
|
||||
<goal>[signup/purchase/download]</goal>
|
||||
|
||||
Write sections:
|
||||
1. Hero headline + subhead
|
||||
2. Problem agitation
|
||||
3. Solution introduction
|
||||
4. 3-5 benefit bullets
|
||||
5. Social proof
|
||||
6. CTA with urgency
|
||||
7. FAQ (3 objections)
|
||||
```
|
||||
33
.opencode/skills/ai-artist/references/domain-patterns.md
Normal file
33
.opencode/skills/ai-artist/references/domain-patterns.md
Normal file
@@ -0,0 +1,33 @@
|
||||
# Domain-Specific Prompt Patterns
|
||||
|
||||
Quick reference index. Load specific domain file for detailed patterns.
|
||||
|
||||
## Domains
|
||||
|
||||
| Domain | File | Use Cases |
|
||||
|--------|------|-----------|
|
||||
| Marketing | `domain-marketing.md` | Headlines, product copy, emails, ads |
|
||||
| Code | `domain-code.md` | Functions, review, refactoring, debugging |
|
||||
| Writing | `domain-writing.md` | Stories, characters, dialogue, editing |
|
||||
| Data | `domain-data.md` | Extraction, analysis, comparison, reasoning |
|
||||
|
||||
## Universal Pattern
|
||||
|
||||
All domain prompts follow:
|
||||
```
|
||||
<context>
|
||||
[Domain-specific background]
|
||||
</context>
|
||||
|
||||
<task>
|
||||
[Specific action]
|
||||
</task>
|
||||
|
||||
<constraints>
|
||||
[Quality criteria, format, length, tone]
|
||||
</constraints>
|
||||
|
||||
<output>
|
||||
[Expected structure]
|
||||
</output>
|
||||
```
|
||||
68
.opencode/skills/ai-artist/references/domain-writing.md
Normal file
68
.opencode/skills/ai-artist/references/domain-writing.md
Normal file
@@ -0,0 +1,68 @@
|
||||
# Creative Writing Patterns
|
||||
|
||||
## Story Outline
|
||||
```
|
||||
Create [length] story outline:
|
||||
- Genre: [genre]
|
||||
- Protagonist: [brief]
|
||||
- Conflict: [type]
|
||||
- Setting: [time/place]
|
||||
|
||||
Structure:
|
||||
1. Hook/Opening
|
||||
2. Inciting incident
|
||||
3. Rising action (3 beats)
|
||||
4. Climax
|
||||
5. Resolution
|
||||
```
|
||||
|
||||
## Character Voice
|
||||
```
|
||||
Write as [character]:
|
||||
- Background: [history]
|
||||
- Speech: [patterns, vocab]
|
||||
- Emotion: [current state]
|
||||
- Goal: [scene objective]
|
||||
|
||||
Maintain voice consistency.
|
||||
```
|
||||
|
||||
## Dialogue
|
||||
```
|
||||
Write dialogue between [A] and [B]:
|
||||
- Scene: [context]
|
||||
- Tension: [conflict source]
|
||||
- Subtext: [what's unsaid]
|
||||
|
||||
Each character distinct voice. Show don't tell.
|
||||
```
|
||||
|
||||
## Scene Description
|
||||
```
|
||||
Describe [scene]:
|
||||
- POV: [character/omniscient]
|
||||
- Focus: [sensory details]
|
||||
- Mood: [atmosphere]
|
||||
- Pacing: [fast/slow/measured]
|
||||
|
||||
Use active verbs, concrete details.
|
||||
```
|
||||
|
||||
## Editing Pass
|
||||
```
|
||||
Edit this [content type]:
|
||||
Focus: [clarity/flow/voice/grammar]
|
||||
Preserve: [author's style]
|
||||
Flag: [major issues only]
|
||||
|
||||
Provide tracked changes with rationale.
|
||||
```
|
||||
|
||||
## Genre Adaptation
|
||||
```
|
||||
Rewrite [content] as [genre]:
|
||||
- Keep: core plot/message
|
||||
- Add: genre conventions
|
||||
- Tone: [genre-appropriate]
|
||||
- Tropes: [use/subvert specific tropes]
|
||||
```
|
||||
141
.opencode/skills/ai-artist/references/image-prompting.md
Normal file
141
.opencode/skills/ai-artist/references/image-prompting.md
Normal file
@@ -0,0 +1,141 @@
|
||||
# Image Generation Prompting
|
||||
|
||||
## Universal Structure
|
||||
```
|
||||
[Subject + Details] [Action/Pose] [Setting/Environment]
|
||||
[Style/Medium] [Artist/Movement Reference]
|
||||
[Lighting] [Camera/Lens] [Composition]
|
||||
[Quality Modifiers] [Aspect Ratio]
|
||||
```
|
||||
|
||||
## Platform Reference
|
||||
|
||||
### Midjourney v6.1
|
||||
```
|
||||
[prompt] --ar 16:9 --style raw --v 6.1
|
||||
```
|
||||
|
||||
| Parameter | Values | Effect |
|
||||
|-----------|--------|--------|
|
||||
| `--ar` | 1:1, 16:9, 9:16, 4:3, 3:2, 21:9 | Aspect ratio |
|
||||
| `--style` | raw, default | raw=photorealistic |
|
||||
| `--stylize` | 0-1000 | Artistic interpretation (0=literal) |
|
||||
| `--chaos` | 0-100 | Variation between outputs |
|
||||
| `--weird` | 0-3000 | Unusual/experimental elements |
|
||||
| `--quality` | .25, .5, 1, 2 | Detail level (cost) |
|
||||
| `--seed` | number | Reproducibility |
|
||||
| `--no` | [term] | Negative prompt inline |
|
||||
| `--tile` | - | Seamless patterns |
|
||||
|
||||
**Multi-prompt weighting**: `cat::2 dog::1` (cat 2x stronger)
|
||||
**Describe**: Upload image → get prompt suggestions
|
||||
**Blend**: `/blend` to merge 2-5 images
|
||||
|
||||
### DALL-E 3
|
||||
- Natural language only, no parameters
|
||||
- Be descriptive, not keyword-heavy
|
||||
- Specify: "HD quality" or "vivid style" in prompt
|
||||
- Text rendering: Describe font, placement, content explicitly
|
||||
- Avoid: Lists of keywords, technical jargon
|
||||
|
||||
### Stable Diffusion / SDXL / Flux
|
||||
```
|
||||
(important term:1.3), normal term, (less important:0.8)
|
||||
Negative prompt: ugly, blurry, deformed, watermark
|
||||
```
|
||||
|
||||
| Feature | Syntax |
|
||||
|---------|--------|
|
||||
| Weight up | `(word:1.2)` to `(word:1.5)` |
|
||||
| Weight down | `(word:0.5)` to `(word:0.8)` |
|
||||
| LoRA | `<lora:model_name:0.8>` |
|
||||
| Embedding | `embedding:name` |
|
||||
| Blend | `[cat|dog]` alternating |
|
||||
|
||||
**CFG Scale**: 7-12 typical (higher=more prompt adherence)
|
||||
**Samplers**: DPM++ 2M Karras (quality), Euler a (speed)
|
||||
|
||||
### Nano Banana (Gemini)
|
||||
```
|
||||
[Narrative description, not keywords]
|
||||
Captured with 85mm lens, soft bokeh, natural lighting
|
||||
```
|
||||
|
||||
**Key features**:
|
||||
- 32K token context (complex prompts OK)
|
||||
- Narrative paragraphs > keyword lists
|
||||
- Hex colors for precision: `#9F2B68`
|
||||
- Text rendering: Describe font, placement explicitly
|
||||
- Multi-image: Up to 14 reference images
|
||||
- Search grounding: Real-time data (weather, events)
|
||||
- Thinking mode: Complex composition reasoning
|
||||
|
||||
**Aspect ratios**: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
|
||||
**Resolution**: 1K, 2K, 4K (use uppercase K)
|
||||
|
||||
**Best practices**:
|
||||
- ALL CAPS for critical requirements
|
||||
- Markdown lists for multiple rules
|
||||
- "NEVER include..." for negative constraints
|
||||
- Photography terms anchor quality
|
||||
|
||||
### Imagen 4 / Veo 3.1
|
||||
- Natural language, descriptive
|
||||
- Aspect ratio in text: "16:9 landscape format"
|
||||
- Veo: Cinematography keywords most powerful
|
||||
- Camera movements: pan, tilt, dolly, crane, tracking
|
||||
- Scene transitions: cut, fade, dissolve
|
||||
|
||||
## Style Keywords
|
||||
|
||||
### Art Movements
|
||||
photorealistic, hyperrealistic, impressionist, expressionist,
|
||||
surrealist, art nouveau, art deco, pop art, cyberpunk, steampunk,
|
||||
solarpunk, vaporwave, synthwave, brutalist, minimalist
|
||||
|
||||
### Media Types
|
||||
oil painting, watercolor, digital art, 3D render, vector art,
|
||||
pencil sketch, ink drawing, pastel, charcoal, gouache, fresco
|
||||
|
||||
### Photography Styles
|
||||
portrait, landscape, macro, street, documentary, fashion,
|
||||
editorial, product, architectural, aerial, underwater
|
||||
|
||||
## Lighting Vocabulary
|
||||
|
||||
| Term | Effect |
|
||||
|------|--------|
|
||||
| Golden hour | Warm, soft, directional |
|
||||
| Blue hour | Cool, moody, twilight |
|
||||
| Rembrandt | Triangle on cheek, dramatic |
|
||||
| Butterfly | Shadow under nose, glamorous |
|
||||
| Split | Half face lit, mysterious |
|
||||
| Rim/back | Edge highlight, separation |
|
||||
| Volumetric | Light rays visible |
|
||||
| Neon glow | Colorful, cyberpunk |
|
||||
|
||||
## Camera/Lens Terms
|
||||
- 50mm (standard), 85mm (portrait), 35mm (wide)
|
||||
- Telephoto (compressed), Macro (close-up), Fisheye (distorted)
|
||||
- Shallow DOF, Deep DOF, Bokeh
|
||||
- Low angle, High angle, Dutch angle, Bird's eye, Worm's eye
|
||||
|
||||
## Composition Keywords
|
||||
rule of thirds, golden ratio, centered, symmetrical,
|
||||
leading lines, framing, negative space, filling frame,
|
||||
foreground interest, layered depth
|
||||
|
||||
## Negative Prompts (SD/Flux)
|
||||
```
|
||||
ugly, deformed, blurry, low quality, bad anatomy,
|
||||
extra limbs, missing limbs, disfigured, watermark,
|
||||
text, signature, cropped, out of frame, duplicate,
|
||||
poorly drawn, bad proportions, gross proportions
|
||||
```
|
||||
|
||||
## Iterative Workflow
|
||||
1. Start: Subject + style + quality modifier
|
||||
2. Add: Lighting + composition + camera
|
||||
3. Test: Generate 4 variations
|
||||
4. Refine: Adjust weights, add negatives
|
||||
5. Upscale: Select winner, increase resolution
|
||||
165
.opencode/skills/ai-artist/references/llm-prompting.md
Normal file
165
.opencode/skills/ai-artist/references/llm-prompting.md
Normal file
@@ -0,0 +1,165 @@
|
||||
# LLM Prompting Reference
|
||||
|
||||
## Prompt Architecture
|
||||
|
||||
### System Prompt Structure
|
||||
```
|
||||
You are [ROLE] with expertise in [DOMAIN].
|
||||
|
||||
## Context
|
||||
[Background, constraints, tone]
|
||||
|
||||
## Instructions
|
||||
[Step-by-step task breakdown]
|
||||
|
||||
## Output Format
|
||||
[Exact structure with example]
|
||||
|
||||
## Constraints
|
||||
- [Hard limits]
|
||||
- [Guardrails]
|
||||
```
|
||||
|
||||
### User Prompt Structure
|
||||
```xml
|
||||
<context>[Background information]</context>
|
||||
<task>[Specific action required]</task>
|
||||
<format>[Output structure]</format>
|
||||
<constraints>[Additional limits]</constraints>
|
||||
```
|
||||
|
||||
## Reasoning Techniques
|
||||
|
||||
### Chain of Thought (CoT)
|
||||
| Variant | Trigger | Best For |
|
||||
|---------|---------|----------|
|
||||
| Zero-shot | "Think step by step" | Quick reasoning tasks |
|
||||
| Few-shot | 2-3 reasoning examples | Complex multi-step |
|
||||
| Auto-CoT | "Let's approach systematically" | General reasoning |
|
||||
|
||||
### Tree of Thoughts (ToT)
|
||||
```
|
||||
Explore 3 approaches to [problem]:
|
||||
For each: 1) Method 2) Pros/cons 3) Success probability
|
||||
Evaluate branches, select best path.
|
||||
```
|
||||
|
||||
### Self-Consistency
|
||||
Run same prompt 3-5x with temp=0.7, take majority answer. Best for: math, logic, factual.
|
||||
|
||||
### ReAct Pattern
|
||||
```
|
||||
Thought: [Current reasoning]
|
||||
Action: [Tool/step to take]
|
||||
Observation: [Result]
|
||||
...repeat...
|
||||
Final Answer: [Conclusion]
|
||||
```
|
||||
|
||||
### Least-to-Most
|
||||
```
|
||||
Break [complex task] into subproblems.
|
||||
Solve easiest first, build up.
|
||||
```
|
||||
|
||||
## Instruction Optimization
|
||||
|
||||
### Self-Refine Pattern
|
||||
```
|
||||
1. Generate initial response
|
||||
2. Critique: "What's wrong with this?"
|
||||
3. Refine: "Fix identified issues"
|
||||
4. Repeat until satisfactory
|
||||
```
|
||||
|
||||
### Role Optimization
|
||||
- **Expert persona**: "As a senior [role] with 20 years..."
|
||||
- **Constraint persona**: "You only respond with..."
|
||||
- **Teaching persona**: "Explain as if to a..."
|
||||
|
||||
### Task Decomposition
|
||||
```
|
||||
<subtasks>
|
||||
1. [First step - output X]
|
||||
2. [Second step - using X, output Y]
|
||||
3. [Final step - using Y, output Z]
|
||||
</subtasks>
|
||||
```
|
||||
|
||||
## Output Control
|
||||
|
||||
### JSON Enforcement
|
||||
```
|
||||
Respond in valid JSON only:
|
||||
{"field": "type", "required": true}
|
||||
No markdown, no explanation, just JSON.
|
||||
```
|
||||
|
||||
### Length Control
|
||||
| Goal | Phrase |
|
||||
|------|--------|
|
||||
| Brief | "In 2-3 sentences" |
|
||||
| Detailed | "Comprehensive analysis in 500 words" |
|
||||
| Structured | "5 bullet points, max 20 words each" |
|
||||
|
||||
### Hallucination Reduction
|
||||
- "Only use information from provided context"
|
||||
- "If unsure, say 'I don't know'"
|
||||
- "Cite sources for each claim"
|
||||
- "Confidence: high/medium/low for each point"
|
||||
|
||||
## Model-Specific Tips
|
||||
|
||||
### Claude
|
||||
- XML tags: `<thinking>`, `<answer>`, `<context>`
|
||||
- Extended thinking: "Think deeply before responding"
|
||||
- Prefill: Start assistant response to guide format
|
||||
|
||||
### GPT-4
|
||||
- JSON mode: `response_format: {"type": "json_object"}`
|
||||
- Function calling for structured output
|
||||
- System message for persistent instructions
|
||||
|
||||
### Gemini
|
||||
- Multimodal: Image + text in same prompt
|
||||
- Grounding: Enable Google Search for facts
|
||||
- Safety settings: Adjust thresholds
|
||||
|
||||
## Context Engineering
|
||||
|
||||
### RAG Prompt Pattern
|
||||
```
|
||||
<retrieved_context>
|
||||
[Document chunks with sources]
|
||||
</retrieved_context>
|
||||
|
||||
Answer based ONLY on context above.
|
||||
If not in context, say "Not found in documents."
|
||||
```
|
||||
|
||||
### Window Optimization
|
||||
- Front-load critical info (primacy effect)
|
||||
- Repeat key constraints at end (recency effect)
|
||||
- Chunk long documents with summaries
|
||||
|
||||
## Few-Shot Examples
|
||||
|
||||
### Structure
|
||||
```
|
||||
Example 1:
|
||||
Input: [representative input]
|
||||
Output: [ideal output]
|
||||
|
||||
Example 2:
|
||||
Input: [edge case]
|
||||
Output: [handling]
|
||||
|
||||
Now apply to:
|
||||
Input: [actual task]
|
||||
```
|
||||
|
||||
### Selection Criteria
|
||||
- Diverse examples > similar examples
|
||||
- Include edge cases
|
||||
- Match complexity of target task
|
||||
- 2-5 examples optimal (diminishing returns beyond)
|
||||
136
.opencode/skills/ai-artist/references/nano-banana.md
Normal file
136
.opencode/skills/ai-artist/references/nano-banana.md
Normal file
@@ -0,0 +1,136 @@
|
||||
# Nano Banana Pro (Gemini Image)
|
||||
|
||||
## Models
|
||||
|
||||
| Model ID | Type | Best For |
|
||||
|----------|------|----------|
|
||||
| `gemini-2.5-flash-image` | Flash | Speed, high-volume |
|
||||
| `gemini-3-pro-image-preview` | Pro | Text rendering, complex prompts |
|
||||
|
||||
## Core Principle
|
||||
|
||||
**Narrative paragraphs > keyword lists** (32K context). Write like briefing a photographer.
|
||||
|
||||
## Parameters
|
||||
|
||||
```python
|
||||
responseModalities=['TEXT', 'IMAGE']
|
||||
aspect_ratio="16:9" # 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
|
||||
image_size="2K" # 1K, 2K, 4K - MUST be uppercase K
|
||||
```
|
||||
|
||||
## Prompt Templates
|
||||
|
||||
**Photorealistic**: `A [subject] in [location], [lens] lens. [Lighting] creates [mood]. [Details]. [Camera angle]. Professional photography, natural lighting.`
|
||||
|
||||
**Illustration**: `[Art style] illustration of [subject]. [Color palette]. [Line style]. [Background]. [Mood].`
|
||||
|
||||
**Text in Image**: `Image with text "[EXACT]" in [font]. Font: [style]. Color: [hex/#FF5733]. Position: [top/center/bottom]. Background: [desc]. Context: [poster/sign].`
|
||||
|
||||
**Product**: `[Product] on [surface]. Materials: [finish]. Lighting: [setup]. Camera: [angle]. Background: [type]. Style: [commercial/lifestyle].`
|
||||
|
||||
**Infographic**: `Premium liquid glass Bento grid infographic with 8 modules. Product: [item]. Language: [lang]. Hero card: 28-30%. Background: [ethereal/macro/pattern/context].`
|
||||
|
||||
## Prompt Collection / Prompt Search
|
||||
|
||||
Read `references/awesome-prompts.csv` directly or search for relevant prompts using `python3 ../scripts/search.py "<query>"`.
|
||||
|
||||
## JSON Structured Prompts
|
||||
|
||||
For complex scenes, use JSON structure:
|
||||
|
||||
```json
|
||||
{
|
||||
"meta_data": { "prompt_version": "2.0", "use_case": "..." },
|
||||
"subject_layer": {
|
||||
"anatomy": { "demographics": {}, "face_detail": {}, "hair": {} },
|
||||
"attire_layer": { "garment_main": {}, "accessories": {} },
|
||||
"pose_dynamics": { "posture": "", "limb_placement": {} }
|
||||
},
|
||||
"environment_layer": { "setting_type": "", "spatial_layout": {} },
|
||||
"composition_and_tech": {
|
||||
"framing": { "type": "", "angle": "" },
|
||||
"lighting": { "source": "", "direction": "" },
|
||||
"aesthetic_style": { "visual_core": "", "vibe": "" }
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Techniques
|
||||
|
||||
| Technique | Example |
|
||||
|-----------|---------|
|
||||
| Emphasis | `ALL CAPS` for critical requirements |
|
||||
| Precision colors | `#9F2B68` instead of "dark magenta" |
|
||||
| Negative constraints | `NEVER include text/watermarks. DO NOT add labels.` |
|
||||
| Realism trigger | `Natural lighting, DOF. Captured with Canon EOS 90D DSLR.` |
|
||||
| Structured edits | `Make ALL edits: - [1] - [2] - [3]` |
|
||||
| Complex logic | `Kittens MUST have heterochromatic eyes matching fur colors` |
|
||||
| Identity lock | `Use reference as EXACT facial reference. STRICT identity lock.` |
|
||||
|
||||
## Advanced Features
|
||||
|
||||
**Multi-Image Input** (up to 14): 6 object + 5 human refs. Tip: collage refs into single image.
|
||||
|
||||
**Search Grounding**: `tools=[{"google_search": {}}]` — real-time data (weather, charts, events).
|
||||
|
||||
**Thinking Mode** (Pro only): `part.thought` in response for complex reasoning.
|
||||
|
||||
## Popular Use Case Templates
|
||||
|
||||
### Quote Card
|
||||
```
|
||||
A wide quote card with {background} background, {font_style} font.
|
||||
Quote: "{quote_text}" — {author}
|
||||
Large subtle quotation mark before text. Portrait on left, text right.
|
||||
Text: 2/3 width, portrait: 1/3 width. Gradient transition on portrait.
|
||||
```
|
||||
|
||||
### Infographic (Bento Grid)
|
||||
```
|
||||
Premium liquid glass Bento grid product infographic with 8 modules.
|
||||
Product: [name]. Language: [lang].
|
||||
1) Hero card (28-30%): Product photo/3D glass
|
||||
2) Core Benefits: 4 benefits + icons
|
||||
3) How to Use: 4 methods + icons
|
||||
4) Key Metrics: 5 data points
|
||||
5) Who It's For: 4 recommended + 3 caution groups
|
||||
6) Important Notes: 4 precautions
|
||||
7) Quick Reference: Specs/certifications
|
||||
8) Did You Know: 3 facts
|
||||
Background: Apple liquid glass cards (85-90% transparent).
|
||||
```
|
||||
|
||||
### Mirror Selfie
|
||||
```
|
||||
Scene: Mirror selfie in [room type], [color] tone.
|
||||
Subject: [demographics], [body type], [hairstyle].
|
||||
Pose: [stance], holding smartphone.
|
||||
Clothing: [detailed outfit description].
|
||||
Environment: [room details, furnishings, lighting].
|
||||
Camera: Smartphone rear camera via mirror, [focal length]mm.
|
||||
Negative: [artifacts to avoid].
|
||||
```
|
||||
|
||||
### Style Transformation
|
||||
```
|
||||
A Japanese Edo-period Ukiyo-e woodblock print reimagining [modern scene].
|
||||
Characters: Edo-era kimono but modern actions.
|
||||
Tech transformation: Smartphones → glowing scrolls, trains → wooden carriages.
|
||||
Composition: Flattened perspective, bold ink outlines.
|
||||
Texture: Wood grain, paper fibers, pigment bleeding.
|
||||
Colors: Prussian blue, vermilion red, muted ochre.
|
||||
Include vertical Japanese calligraphy and red artist seal.
|
||||
```
|
||||
|
||||
## Workflow
|
||||
|
||||
1. Narrative description → 2. Photography terms → 3. ALL CAPS emphasis → 4. Multi-turn refine → 5. Negative constraints → 6. Set ratio/resolution
|
||||
|
||||
## Avoid
|
||||
|
||||
- Keyword spam ("4k, trending, masterpiece")
|
||||
- Vague text ("add some text" → specify exact text, font, position)
|
||||
- Lowercase resolution ("4k" rejected, use "4K")
|
||||
- Over-smoothed skin requests (leads to plastic look)
|
||||
- Generic prompts without specific details
|
||||
201
.opencode/skills/ai-artist/references/reasoning-techniques.md
Normal file
201
.opencode/skills/ai-artist/references/reasoning-techniques.md
Normal file
@@ -0,0 +1,201 @@
|
||||
# Reasoning Techniques Deep Dive
|
||||
|
||||
## Chain of Thought (CoT) Variants
|
||||
|
||||
### Zero-Shot CoT
|
||||
```
|
||||
[Task description]
|
||||
|
||||
Think step by step before answering.
|
||||
```
|
||||
**Use when**: Quick reasoning, no examples available
|
||||
**Effectiveness**: +40-60% on reasoning tasks
|
||||
|
||||
### Few-Shot CoT
|
||||
```
|
||||
Example 1:
|
||||
Q: [Question]
|
||||
A: Let me think through this...
|
||||
Step 1: [Reasoning]
|
||||
Step 2: [Reasoning]
|
||||
Therefore: [Answer]
|
||||
|
||||
Example 2:
|
||||
Q: [Question]
|
||||
A: Breaking this down...
|
||||
First: [Reasoning]
|
||||
Next: [Reasoning]
|
||||
So: [Answer]
|
||||
|
||||
Now solve:
|
||||
Q: [Your question]
|
||||
```
|
||||
**Use when**: Complex reasoning, pattern demonstration needed
|
||||
**Effectiveness**: +50-80% on complex tasks
|
||||
|
||||
### Auto-CoT
|
||||
```
|
||||
Let me approach this systematically:
|
||||
1. Identify the key elements
|
||||
2. Analyze relationships
|
||||
3. Apply relevant principles
|
||||
4. Draw conclusions
|
||||
5. Verify my reasoning
|
||||
```
|
||||
**Use when**: General problem-solving, exploratory reasoning
|
||||
|
||||
## Tree of Thoughts (ToT)
|
||||
|
||||
### Implementation Pattern
|
||||
```
|
||||
Problem: [Complex problem]
|
||||
|
||||
Generate 3 different approaches:
|
||||
|
||||
Approach A:
|
||||
- Method: [Description]
|
||||
- Reasoning: [Why this might work]
|
||||
- Potential issues: [Risks]
|
||||
- Confidence: [1-10]
|
||||
|
||||
Approach B:
|
||||
- Method: [Description]
|
||||
- Reasoning: [Why this might work]
|
||||
- Potential issues: [Risks]
|
||||
- Confidence: [1-10]
|
||||
|
||||
Approach C:
|
||||
- Method: [Description]
|
||||
- Reasoning: [Why this might work]
|
||||
- Potential issues: [Risks]
|
||||
- Confidence: [1-10]
|
||||
|
||||
Evaluate branches:
|
||||
- Which has highest success probability?
|
||||
- Which has fewest risks?
|
||||
- Which is most feasible?
|
||||
|
||||
Selected approach: [Best option with justification]
|
||||
Execution: [Step-by-step implementation]
|
||||
```
|
||||
|
||||
**Use when**: Strategic decisions, multiple valid paths, high-stakes problems
|
||||
|
||||
## Self-Consistency
|
||||
|
||||
### Process
|
||||
1. Generate 5 responses at temp=0.7
|
||||
2. Extract final answers from each
|
||||
3. Take majority vote
|
||||
4. Report confidence = agreement %
|
||||
|
||||
### Implementation
|
||||
```
|
||||
Run this prompt 5 times (or use n=5 parameter):
|
||||
[Your reasoning task]
|
||||
Think step by step and provide final answer.
|
||||
|
||||
Aggregate: If 4/5 agree = high confidence
|
||||
If 3/5 agree = medium confidence
|
||||
If split = low confidence, needs review
|
||||
```
|
||||
|
||||
**Use when**: Math, logic, factual questions with verifiable answers
|
||||
|
||||
## ReAct (Reasoning + Acting)
|
||||
|
||||
### Full Pattern
|
||||
```
|
||||
Task: [Goal to achieve]
|
||||
|
||||
Thought 1: I need to understand the current situation.
|
||||
Action 1: [Observation or tool use]
|
||||
Observation 1: [Result from action]
|
||||
|
||||
Thought 2: Based on this, I should [next logical step].
|
||||
Action 2: [Next action]
|
||||
Observation 2: [Result]
|
||||
|
||||
Thought 3: Now I can see that [insight].
|
||||
Action 3: [Verification or next step]
|
||||
Observation 3: [Result]
|
||||
|
||||
Thought 4: I have enough information to conclude.
|
||||
Final Answer: [Conclusion with reasoning]
|
||||
```
|
||||
|
||||
**Use when**: Tool-augmented reasoning, research tasks, multi-step analysis
|
||||
|
||||
## Least-to-Most Prompting
|
||||
|
||||
### Structure
|
||||
```
|
||||
Complex problem: [Full problem statement]
|
||||
|
||||
Step 1: Decomposition
|
||||
Break this into simpler subproblems, ordered from easiest to hardest:
|
||||
1. [Simplest subproblem]
|
||||
2. [Next subproblem, may depend on 1]
|
||||
3. [Harder subproblem, may depend on 1,2]
|
||||
4. [Final subproblem requiring all above]
|
||||
|
||||
Step 2: Sequential Solution
|
||||
Subproblem 1: [Solution]
|
||||
Using result from 1, Subproblem 2: [Solution]
|
||||
Using results from 1,2, Subproblem 3: [Solution]
|
||||
Using all results, Subproblem 4: [Solution]
|
||||
|
||||
Final integrated answer: [Complete solution]
|
||||
```
|
||||
|
||||
**Use when**: Mathematical word problems, multi-step procedures, compositional tasks
|
||||
|
||||
## Decomposed Prompting (DECOMP)
|
||||
|
||||
### Pattern
|
||||
```
|
||||
Task: [Complex task]
|
||||
|
||||
Required capabilities:
|
||||
- [Capability 1]: Use [specialized prompt/tool]
|
||||
- [Capability 2]: Use [specialized prompt/tool]
|
||||
- [Capability 3]: Use [specialized prompt/tool]
|
||||
|
||||
Orchestration:
|
||||
1. Call [Capability 1] with [input] → get [output1]
|
||||
2. Call [Capability 2] with [output1] → get [output2]
|
||||
3. Call [Capability 3] with [output2] → get [final output]
|
||||
|
||||
Integrate results: [Final response]
|
||||
```
|
||||
|
||||
**Use when**: Tasks requiring diverse expertise, specialized sub-tasks
|
||||
|
||||
## Constitutional AI Reasoning
|
||||
|
||||
### Self-Critique Pattern
|
||||
```
|
||||
Initial response: [Generated content]
|
||||
|
||||
Critique against principles:
|
||||
- Is it helpful? [Yes/No + reasoning]
|
||||
- Is it harmless? [Yes/No + reasoning]
|
||||
- Is it honest? [Yes/No + reasoning]
|
||||
|
||||
If any No:
|
||||
Revised response that addresses [specific issues]:
|
||||
[Improved content]
|
||||
```
|
||||
|
||||
## Choosing the Right Technique
|
||||
|
||||
| Task Type | Best Technique |
|
||||
|-----------|---------------|
|
||||
| Simple reasoning | Zero-shot CoT |
|
||||
| Complex multi-step | Few-shot CoT |
|
||||
| Strategic decisions | Tree of Thoughts |
|
||||
| Factual verification | Self-Consistency |
|
||||
| Tool-using tasks | ReAct |
|
||||
| Word problems | Least-to-Most |
|
||||
| Specialized sub-tasks | DECOMP |
|
||||
| Safety-critical | Constitutional AI |
|
||||
117
.opencode/skills/ai-artist/references/validation-workflow.md
Normal file
117
.opencode/skills/ai-artist/references/validation-workflow.md
Normal file
@@ -0,0 +1,117 @@
|
||||
# AI Artist Validation Workflow
|
||||
|
||||
Agent instructions for mandatory validation interview before image generation.
|
||||
|
||||
## Step 1: Parse Arguments
|
||||
|
||||
Extract from user input:
|
||||
- **concept**: The subject/description (required)
|
||||
- **--mode**: search (default), creative, or wild
|
||||
- **--skip**: If present, use defaults and skip to Step 4
|
||||
|
||||
**Defaults for --skip mode:** Style=Photorealistic, Mood=Professional, Colors=Auto, Aspect=16:9
|
||||
|
||||
## Step 2: Interview User
|
||||
|
||||
Use `AskUserQuestion` with these 4 questions in a single call:
|
||||
|
||||
```json
|
||||
{"questions": [
|
||||
{"question": "Visual style?", "header": "Style", "multiSelect": false, "options": [
|
||||
{"label": "Photorealistic (Recommended)", "description": "Professional photography, 8K"},
|
||||
{"label": "Cinematic", "description": "Film-like, dramatic lighting"},
|
||||
{"label": "Illustration", "description": "Digital art, stylized"},
|
||||
{"label": "Minimalist", "description": "Clean, white space"}
|
||||
]},
|
||||
{"question": "Mood?", "header": "Mood", "multiSelect": false, "options": [
|
||||
{"label": "Professional", "description": "Corporate, trustworthy"},
|
||||
{"label": "Energetic", "description": "Dynamic, bold"},
|
||||
{"label": "Calm", "description": "Peaceful, serene"},
|
||||
{"label": "Dramatic", "description": "High contrast, intense"}
|
||||
]},
|
||||
{"question": "Colors?", "header": "Colors", "multiSelect": false, "options": [
|
||||
{"label": "Auto-select (Recommended)", "description": "AI chooses"},
|
||||
{"label": "Warm tones", "description": "Oranges, reds"},
|
||||
{"label": "Cool tones", "description": "Blues, greens"},
|
||||
{"label": "High contrast", "description": "Blacks, neons"}
|
||||
]},
|
||||
{"question": "Aspect ratio?", "header": "Ratio", "multiSelect": false, "options": [
|
||||
{"label": "16:9 (Recommended)", "description": "Widescreen"},
|
||||
{"label": "1:1", "description": "Square"},
|
||||
{"label": "9:16", "description": "Vertical"},
|
||||
{"label": "4:3", "description": "Standard"}
|
||||
]}
|
||||
]}
|
||||
```
|
||||
|
||||
**Dynamic questions** (ask separately if concept matches):
|
||||
- "banner/poster/thumbnail" → Ask about text space
|
||||
- "product/showcase" → Ask about background preference
|
||||
|
||||
## Step 3: Build Prompt
|
||||
|
||||
Map answers to keywords:
|
||||
|
||||
| Style | Keywords |
|
||||
|-------|----------|
|
||||
| Photorealistic | photorealistic, professional photography, 8K, RAW |
|
||||
| Cinematic | cinematic, film still, anamorphic, dramatic lighting |
|
||||
| Illustration | digital illustration, artistic, stylized |
|
||||
| Minimalist | minimalist, clean design, white space |
|
||||
|
||||
| Mood | Keywords |
|
||||
|------|----------|
|
||||
| Professional | professional, clean, corporate, polished |
|
||||
| Energetic | dynamic, bold, vibrant, high energy |
|
||||
| Calm | serene, peaceful, soft, tranquil |
|
||||
| Dramatic | dramatic, high contrast, intense, moody |
|
||||
|
||||
| Colors | Keywords |
|
||||
|--------|----------|
|
||||
| Auto-select | (none) |
|
||||
| Warm tones | warm palette, golden tones, amber |
|
||||
| Cool tones | cool palette, blue tones, teal |
|
||||
| High contrast | high contrast, bold blacks, neon |
|
||||
|
||||
**Template:** `[concept], [style], [mood], [colors]. Professional quality. NEVER add watermarks.`
|
||||
|
||||
## Step 4: Confirm & Generate
|
||||
|
||||
Show preview, then ask confirmation:
|
||||
|
||||
```json
|
||||
{"questions": [{"question": "Generate?", "header": "Confirm", "multiSelect": false, "options": [
|
||||
{"label": "Yes, generate (Recommended)", "description": "Proceed"},
|
||||
{"label": "Edit prompt", "description": "Modify first"},
|
||||
{"label": "Start over", "description": "Re-answer"}
|
||||
]}]}
|
||||
```
|
||||
|
||||
**If "Edit prompt":** Ask user for edited text, use that instead.
|
||||
**If "Start over":** Return to Step 2.
|
||||
|
||||
Run generation:
|
||||
```bash
|
||||
cd .opencode/skills/ai-artist && .venv/bin/python3 scripts/generate.py "[concept]" \
|
||||
-o ./generated-$(date +%Y%m%d-%H%M%S).png \
|
||||
--mode [mode] \
|
||||
-ar [ratio] \
|
||||
-v
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
| Error | Action |
|
||||
|-------|--------|
|
||||
| API key missing | Tell user to set GEMINI_API_KEY |
|
||||
| Model error | Suggest `--model flash` |
|
||||
| No concept | Ask user for concept |
|
||||
|
||||
## Output
|
||||
|
||||
```
|
||||
[OK] Image generated: [path]
|
||||
Style: [style] | Mood: [mood] | Aspect: [ratio]
|
||||
|
||||
Tip: Use --skip to bypass interview next time.
|
||||
```
|
||||
197
.opencode/skills/ai-artist/scripts/core.py
Normal file
197
.opencode/skills/ai-artist/scripts/core.py
Normal file
@@ -0,0 +1,197 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
AI Artist Core - BM25 search engine for prompt engineering resources
|
||||
"""
|
||||
|
||||
import csv
|
||||
import re
|
||||
from pathlib import Path
|
||||
from math import log
|
||||
from collections import defaultdict
|
||||
|
||||
# ============ CONFIGURATION ============
|
||||
DATA_DIR = Path(__file__).parent.parent / "data"
|
||||
MAX_RESULTS = 3
|
||||
|
||||
CSV_CONFIG = {
|
||||
"use-case": {
|
||||
"file": "use-cases.csv",
|
||||
"search_cols": ["Use Case", "Category", "Keywords", "Best Platforms"],
|
||||
"output_cols": ["Use Case", "Category", "Keywords", "Prompt Template", "Key Elements", "Best Platforms", "Aspect Ratios", "Tips", "Example"]
|
||||
},
|
||||
"style": {
|
||||
"file": "styles.csv",
|
||||
"search_cols": ["Style Name", "Category", "Keywords", "Description", "Best For"],
|
||||
"output_cols": ["Style Name", "Category", "Description", "Key Characteristics", "Color Palette", "Best For", "Platforms", "Prompt Keywords"]
|
||||
},
|
||||
"platform": {
|
||||
"file": "platforms.csv",
|
||||
"search_cols": ["Platform", "Type", "Keywords", "Strengths"],
|
||||
"output_cols": ["Platform", "Type", "Prompt Style", "Key Parameters", "Strengths", "Limitations", "Aspect Ratios", "Best Practices"]
|
||||
},
|
||||
"technique": {
|
||||
"file": "techniques.csv",
|
||||
"search_cols": ["Technique", "Category", "Keywords", "Description", "When to Use"],
|
||||
"output_cols": ["Technique", "Category", "Description", "When to Use", "Syntax Example", "Platforms", "Tips"]
|
||||
},
|
||||
"lighting": {
|
||||
"file": "lighting.csv",
|
||||
"search_cols": ["Lighting Type", "Category", "Keywords", "Description", "Mood", "Best For"],
|
||||
"output_cols": ["Lighting Type", "Category", "Description", "Mood", "Best For", "Prompt Keywords", "Technical Notes"]
|
||||
},
|
||||
"template": {
|
||||
"file": "nano-banana-templates.csv",
|
||||
"search_cols": ["Category", "Template Name", "Keywords"],
|
||||
"output_cols": ["Category", "Template Name", "Keywords", "Prompt Template", "Aspect Ratio", "Tips"]
|
||||
},
|
||||
"awesome": {
|
||||
"file": "awesome-prompts.csv",
|
||||
"search_cols": ["title", "description", "prompt"],
|
||||
"output_cols": ["id", "title", "category", "description", "prompt", "author", "source"]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ============ BM25 IMPLEMENTATION ============
|
||||
class BM25:
|
||||
"""BM25 ranking algorithm for text search"""
|
||||
|
||||
def __init__(self, k1=1.5, b=0.75):
|
||||
self.k1 = k1
|
||||
self.b = b
|
||||
self.corpus = []
|
||||
self.doc_lengths = []
|
||||
self.avgdl = 0
|
||||
self.idf = {}
|
||||
self.doc_freqs = defaultdict(int)
|
||||
self.N = 0
|
||||
|
||||
def tokenize(self, text):
|
||||
"""Lowercase, split, remove punctuation, filter short words"""
|
||||
text = re.sub(r'[^\w\s]', ' ', str(text).lower())
|
||||
return [w for w in text.split() if len(w) > 2]
|
||||
|
||||
def fit(self, documents):
|
||||
"""Build BM25 index from documents"""
|
||||
self.corpus = [self.tokenize(doc) for doc in documents]
|
||||
self.N = len(self.corpus)
|
||||
if self.N == 0:
|
||||
return
|
||||
self.doc_lengths = [len(doc) for doc in self.corpus]
|
||||
self.avgdl = sum(self.doc_lengths) / self.N
|
||||
|
||||
for doc in self.corpus:
|
||||
seen = set()
|
||||
for word in doc:
|
||||
if word not in seen:
|
||||
self.doc_freqs[word] += 1
|
||||
seen.add(word)
|
||||
|
||||
for word, freq in self.doc_freqs.items():
|
||||
self.idf[word] = log((self.N - freq + 0.5) / (freq + 0.5) + 1)
|
||||
|
||||
def score(self, query):
|
||||
"""Score all documents against query"""
|
||||
query_tokens = self.tokenize(query)
|
||||
scores = []
|
||||
|
||||
for idx, doc in enumerate(self.corpus):
|
||||
score = 0
|
||||
doc_len = self.doc_lengths[idx]
|
||||
term_freqs = defaultdict(int)
|
||||
for word in doc:
|
||||
term_freqs[word] += 1
|
||||
|
||||
for token in query_tokens:
|
||||
if token in self.idf:
|
||||
tf = term_freqs[token]
|
||||
idf = self.idf[token]
|
||||
numerator = tf * (self.k1 + 1)
|
||||
denominator = tf + self.k1 * (1 - self.b + self.b * doc_len / self.avgdl)
|
||||
score += idf * numerator / denominator
|
||||
|
||||
scores.append((idx, score))
|
||||
|
||||
return sorted(scores, key=lambda x: x[1], reverse=True)
|
||||
|
||||
|
||||
# ============ SEARCH FUNCTIONS ============
|
||||
def _load_csv(filepath):
|
||||
"""Load CSV and return list of dicts"""
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
return list(csv.DictReader(f))
|
||||
|
||||
|
||||
def _search_csv(filepath, search_cols, output_cols, query, max_results):
|
||||
"""Core search function using BM25"""
|
||||
if not filepath.exists():
|
||||
return []
|
||||
|
||||
data = _load_csv(filepath)
|
||||
|
||||
# Build documents from search columns
|
||||
documents = [" ".join(str(row.get(col, "")) for col in search_cols) for row in data]
|
||||
|
||||
# BM25 search
|
||||
bm25 = BM25()
|
||||
bm25.fit(documents)
|
||||
ranked = bm25.score(query)
|
||||
|
||||
# Get top results with score > 0
|
||||
results = []
|
||||
for idx, score in ranked[:max_results]:
|
||||
if score > 0:
|
||||
row = data[idx]
|
||||
results.append({col: row.get(col, "") for col in output_cols if col in row})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def detect_domain(query):
|
||||
"""Auto-detect the most relevant domain from query"""
|
||||
query_lower = query.lower()
|
||||
|
||||
domain_keywords = {
|
||||
"use-case": ["avatar", "profile", "thumbnail", "poster", "social", "youtube", "instagram", "marketing", "product", "e-commerce", "infographic", "comic", "game", "app", "web", "header", "banner"],
|
||||
"style": ["style", "aesthetic", "photorealistic", "anime", "manga", "3d", "render", "illustration", "pixel", "watercolor", "oil", "cyberpunk", "vaporwave", "minimalist", "vintage", "retro"],
|
||||
"platform": ["midjourney", "dalle", "dall-e", "stable diffusion", "flux", "nano banana", "gemini", "imagen", "ideogram", "leonardo", "firefly", "platform", "tool"],
|
||||
"technique": ["prompt", "technique", "weight", "emphasis", "negative", "json", "structured", "iteration", "reference", "identity", "multi-panel", "search grounding"],
|
||||
"lighting": ["lighting", "light", "shadow", "golden hour", "blue hour", "rembrandt", "butterfly", "neon", "volumetric", "softbox", "rim light", "studio"]
|
||||
}
|
||||
|
||||
scores = {domain: sum(1 for kw in keywords if kw in query_lower) for domain, keywords in domain_keywords.items()}
|
||||
best = max(scores, key=scores.get)
|
||||
return best if scores[best] > 0 else "style"
|
||||
|
||||
|
||||
def search(query, domain=None, max_results=MAX_RESULTS):
|
||||
"""Main search function with auto-domain detection"""
|
||||
if domain is None:
|
||||
domain = detect_domain(query)
|
||||
|
||||
config = CSV_CONFIG.get(domain, CSV_CONFIG["style"])
|
||||
filepath = DATA_DIR / config["file"]
|
||||
|
||||
if not filepath.exists():
|
||||
return {"error": f"File not found: {filepath}", "domain": domain}
|
||||
|
||||
results = _search_csv(filepath, config["search_cols"], config["output_cols"], query, max_results)
|
||||
|
||||
return {
|
||||
"domain": domain,
|
||||
"query": query,
|
||||
"file": config["file"],
|
||||
"count": len(results),
|
||||
"results": results
|
||||
}
|
||||
|
||||
|
||||
def search_all_domains(query, max_per_domain=2):
|
||||
"""Search across all domains for comprehensive results"""
|
||||
all_results = {}
|
||||
for domain in CSV_CONFIG.keys():
|
||||
result = search(query, domain, max_per_domain)
|
||||
if result.get("count", 0) > 0:
|
||||
all_results[domain] = result
|
||||
return all_results
|
||||
102
.opencode/skills/ai-artist/scripts/extract_prompts.py
Normal file
102
.opencode/skills/ai-artist/scripts/extract_prompts.py
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Extract all prompts from awesome-nano-banana-pro-prompts.md into CSV."""
|
||||
|
||||
import re
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
def extract_prompts(md_content: str) -> list[dict]:
|
||||
"""Extract all prompts with their metadata."""
|
||||
prompts = []
|
||||
|
||||
# Split by prompt entries (### No. X:)
|
||||
entries = re.split(r'### No\. \d+:', md_content)
|
||||
|
||||
for i, entry in enumerate(entries[1:], 1): # Skip content before first entry
|
||||
prompt_data = {
|
||||
"id": i,
|
||||
"title": "",
|
||||
"category": "",
|
||||
"description": "",
|
||||
"prompt": "",
|
||||
"author": "",
|
||||
"source": "",
|
||||
}
|
||||
|
||||
# Extract title (first line after split)
|
||||
title_match = re.search(r'^([^\n]+)', entry.strip())
|
||||
if title_match:
|
||||
prompt_data["title"] = title_match.group(1).strip()
|
||||
|
||||
# Extract category from badges
|
||||
categories = re.findall(r'!\[([^\]]+)\]\([^)]+badge[^)]*\)', entry)
|
||||
if categories:
|
||||
# Filter out non-category badges
|
||||
cats = [c for c in categories if c not in ["Featured", "Raycast", "Language-ZH", "Language-EN", "Language-JA"]]
|
||||
prompt_data["category"] = ", ".join(cats[:3]) if cats else ""
|
||||
|
||||
# Extract description
|
||||
desc_match = re.search(r'#### 📖 Description\s*\n\n([^\n#]+)', entry)
|
||||
if desc_match:
|
||||
prompt_data["description"] = desc_match.group(1).strip()
|
||||
|
||||
# Extract prompt (between ``` markers after "#### 📝 Prompt")
|
||||
prompt_section = re.search(r'#### 📝 Prompt\s*\n\n```[^\n]*\n(.*?)```', entry, re.DOTALL)
|
||||
if prompt_section:
|
||||
prompt_data["prompt"] = prompt_section.group(1).strip()
|
||||
|
||||
# Extract author
|
||||
author_match = re.search(r'\*\*Author:\*\*\s*\[([^\]]+)\]', entry)
|
||||
if author_match:
|
||||
prompt_data["author"] = author_match.group(1).strip()
|
||||
|
||||
# Extract source URL
|
||||
source_match = re.search(r'\*\*Source:\*\*\s*\[([^\]]+)\]\(([^)]+)\)', entry)
|
||||
if source_match:
|
||||
prompt_data["source"] = source_match.group(2).strip()
|
||||
|
||||
if prompt_data["prompt"]: # Only add if we found a prompt
|
||||
prompts.append(prompt_data)
|
||||
|
||||
return prompts
|
||||
|
||||
|
||||
def save_to_csv(prompts: list[dict], output_path: Path):
|
||||
"""Save prompts to CSV file."""
|
||||
fieldnames = ["id", "title", "category", "description", "prompt", "author", "source"]
|
||||
|
||||
with open(output_path, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
|
||||
writer.writeheader()
|
||||
for p in prompts:
|
||||
writer.writerow(p)
|
||||
|
||||
print(f"Saved {len(prompts)} prompts to {output_path}")
|
||||
|
||||
|
||||
def main():
|
||||
script_dir = Path(__file__).parent
|
||||
md_path = script_dir.parent / "references" / "awesome-nano-banana-pro-prompts.md"
|
||||
csv_path = script_dir.parent / "data" / "awesome-prompts.csv"
|
||||
|
||||
print(f"Reading from: {md_path}")
|
||||
|
||||
with open(md_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
prompts = extract_prompts(content)
|
||||
print(f"Extracted {len(prompts)} prompts")
|
||||
|
||||
# Print sample
|
||||
if prompts:
|
||||
print("\nSample prompts:")
|
||||
for p in prompts[:3]:
|
||||
print(f"\n[{p['id']}] {p['title'][:50]}...")
|
||||
print(f" Category: {p['category']}")
|
||||
print(f" Prompt: {p['prompt'][:100]}...")
|
||||
|
||||
save_to_csv(prompts, csv_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
371
.opencode/skills/ai-artist/scripts/generate.py
Normal file
371
.opencode/skills/ai-artist/scripts/generate.py
Normal file
@@ -0,0 +1,371 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
AI Artist Generate - Nano Banana image generation with 3 creative modes
|
||||
|
||||
Uses 129 actual prompts from awesome-nano-banana-pro-prompts collection.
|
||||
|
||||
Usage:
|
||||
python generate.py "<concept>" --output <path.png> [options]
|
||||
|
||||
Modes:
|
||||
--mode search : Find best matching prompt (default)
|
||||
--mode creative : Remix elements from multiple prompts
|
||||
--mode wild : AI-enhanced out-of-the-box interpretation
|
||||
--mode all : Generate all 3 variations
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import random
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent for core imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
from core import search
|
||||
|
||||
# Gemini API setup
|
||||
CLAUDE_ROOT = Path.home() / '.claude'
|
||||
sys.path.insert(0, str(CLAUDE_ROOT / 'scripts'))
|
||||
PROJECT_CLAUDE = Path(__file__).parent.parent.parent.parent
|
||||
sys.path.insert(0, str(PROJECT_CLAUDE / 'scripts'))
|
||||
try:
|
||||
from resolve_env import resolve_env
|
||||
CENTRALIZED_RESOLVER = True
|
||||
except ImportError:
|
||||
CENTRALIZED_RESOLVER = False
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path.home() / '.claude' / '.env')
|
||||
load_dotenv(Path.home() / '.claude' / 'skills' / '.env')
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
GENAI_AVAILABLE = True
|
||||
except ImportError:
|
||||
GENAI_AVAILABLE = False
|
||||
|
||||
|
||||
# ============ CONFIGURATION ============
|
||||
NANO_BANANA_MODELS = {
|
||||
"flash2": "gemini-3.1-flash-image-preview", # Nano Banana 2 (new default)
|
||||
"flash": "gemini-2.5-flash-image",
|
||||
"pro": "gemini-3-pro-image-preview",
|
||||
}
|
||||
DEFAULT_MODEL = "flash2"
|
||||
ASPECT_RATIOS = ["1:1", "2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "9:16", "16:9", "21:9"]
|
||||
|
||||
|
||||
def get_api_key() -> str:
|
||||
"""Get Gemini API key from environment."""
|
||||
if CENTRALIZED_RESOLVER:
|
||||
return resolve_env('GEMINI_API_KEY', skill='ai-multimodal')
|
||||
return os.getenv('GEMINI_API_KEY')
|
||||
|
||||
|
||||
def adapt_prompt(template_prompt: str, concept: str, **kwargs) -> str:
|
||||
"""Adapt a template prompt to the user's concept.
|
||||
|
||||
Intelligently replaces variables and adapts the prompt while keeping
|
||||
the original structure and Nano Banana narrative style.
|
||||
"""
|
||||
prompt = template_prompt
|
||||
|
||||
# Replace common variable patterns
|
||||
replacements = {
|
||||
# Raycast-style arguments
|
||||
r'\{argument name="[^"]*" default="[^"]*"\}': concept,
|
||||
r'\{argument name=[^}]+\}': concept,
|
||||
# Bracket variables
|
||||
r'\[insert [^\]]+\]': concept,
|
||||
r'\[subject\]': concept,
|
||||
r'\[concept\]': concept,
|
||||
r'\[topic\]': concept,
|
||||
r'\[product\]': concept,
|
||||
r'\[scene\]': concept,
|
||||
r'\[description\]': concept,
|
||||
# Generic placeholders
|
||||
r'\{[^}]+\}': lambda m: kwargs.get(m.group(0)[1:-1], concept),
|
||||
}
|
||||
|
||||
for pattern, replacement in replacements.items():
|
||||
if callable(replacement):
|
||||
prompt = re.sub(pattern, replacement, prompt, flags=re.IGNORECASE)
|
||||
else:
|
||||
prompt = re.sub(pattern, replacement, prompt, flags=re.IGNORECASE)
|
||||
|
||||
# Ensure negative constraints exist (Nano Banana style)
|
||||
if "NEVER" not in prompt and "DO NOT" not in prompt:
|
||||
prompt += " NEVER add watermarks or unwanted text. DO NOT include labels."
|
||||
|
||||
return prompt
|
||||
|
||||
|
||||
def mode_search(concept: str, verbose: bool = False) -> tuple[str, dict]:
|
||||
"""Mode 1: Find best matching prompt from awesome collection."""
|
||||
result = search(concept, "awesome", 1)
|
||||
|
||||
if result.get("count", 0) > 0:
|
||||
match = result["results"][0]
|
||||
prompt = adapt_prompt(match["prompt"], concept)
|
||||
|
||||
if verbose:
|
||||
print(f" [SEARCH] Matched: {match['title'][:60]}...")
|
||||
print(f" Author: {match.get('author', 'Unknown')}")
|
||||
|
||||
return prompt, {"mode": "search", "match": match}
|
||||
|
||||
# Fallback to basic prompt
|
||||
prompt = f"A professional image of {concept}. High quality, detailed. Professional photography. NEVER add watermarks."
|
||||
return prompt, {"mode": "search", "match": None}
|
||||
|
||||
|
||||
def mode_creative(concept: str, verbose: bool = False) -> tuple[str, dict]:
|
||||
"""Mode 2: Creative remix - combine elements from multiple prompts."""
|
||||
# Get top 3 matches
|
||||
result = search(concept, "awesome", 3)
|
||||
matches = result.get("results", [])
|
||||
|
||||
if len(matches) < 2:
|
||||
return mode_search(concept, verbose)
|
||||
|
||||
# Extract key elements from each prompt
|
||||
elements = []
|
||||
for m in matches:
|
||||
prompt = m.get("prompt", "")
|
||||
# Extract style descriptions, lighting, composition hints
|
||||
if "style" in prompt.lower() or "lighting" in prompt.lower():
|
||||
elements.append(prompt[:200])
|
||||
|
||||
if verbose:
|
||||
print(f" [CREATIVE] Remixing {len(matches)} prompts:")
|
||||
for m in matches:
|
||||
print(f" - {m['title'][:50]}...")
|
||||
|
||||
# Build creative remix
|
||||
base = matches[0]["prompt"]
|
||||
style_hints = []
|
||||
|
||||
# Extract style from second match
|
||||
if len(matches) > 1:
|
||||
m2 = matches[1]["prompt"]
|
||||
style_match = re.search(r'(style[^.]+\.)', m2, re.IGNORECASE)
|
||||
if style_match:
|
||||
style_hints.append(style_match.group(1))
|
||||
|
||||
# Extract lighting/mood from third match
|
||||
if len(matches) > 2:
|
||||
m3 = matches[2]["prompt"]
|
||||
light_match = re.search(r'(lighting[^.]+\.)', m3, re.IGNORECASE)
|
||||
if light_match:
|
||||
style_hints.append(light_match.group(1))
|
||||
|
||||
# Adapt and enhance
|
||||
prompt = adapt_prompt(base, concept)
|
||||
if style_hints:
|
||||
prompt += " " + " ".join(style_hints)
|
||||
|
||||
return prompt, {"mode": "creative", "matches": [m["title"] for m in matches]}
|
||||
|
||||
|
||||
def mode_wild(concept: str, verbose: bool = False) -> tuple[str, dict]:
|
||||
"""Mode 3: Wild/Out-of-the-box - AI-enhanced creative interpretation."""
|
||||
result = search(concept, "awesome", 5)
|
||||
matches = result.get("results", [])
|
||||
|
||||
# Creative transformations
|
||||
transformations = [
|
||||
"reimagined as a Japanese Ukiyo-e woodblock print with Prussian blue and vermilion",
|
||||
"transformed into a premium liquid glass Bento grid infographic",
|
||||
"captured as a vintage 1800s patent document with technical drawings",
|
||||
"rendered as a surreal dreamscape with volumetric god rays",
|
||||
"depicted in cyberpunk neon aesthetic with holographic elements",
|
||||
"illustrated as a hand-drawn chalkboard explanation",
|
||||
"visualized as an isometric 3D diorama with miniature figures",
|
||||
"presented as a cinematic movie poster with dramatic lighting",
|
||||
"created as a vaporwave aesthetic with glitch effects and Roman statues",
|
||||
"designed as a premium Apple-style product showcase",
|
||||
]
|
||||
|
||||
# Pick random transformation
|
||||
transform = random.choice(transformations)
|
||||
|
||||
if matches:
|
||||
# Use structure from a random match but apply wild transformation
|
||||
base = random.choice(matches)
|
||||
prompt = f"{concept}, {transform}. "
|
||||
|
||||
# Extract any technical camera/quality settings from matched prompt
|
||||
tech_match = re.search(r'(\d+mm lens|f/[\d.]+|Canon|Nikon|professional photography)', base["prompt"])
|
||||
if tech_match:
|
||||
prompt += f"Shot with {tech_match.group(1)}. "
|
||||
|
||||
if verbose:
|
||||
print(f" [WILD] Transform: {transform}")
|
||||
print(f" Based on: {base['title'][:50]}...")
|
||||
else:
|
||||
prompt = f"{concept}, {transform}. Professional quality."
|
||||
|
||||
prompt += " NEVER add watermarks. DO NOT include unwanted text."
|
||||
|
||||
return prompt, {"mode": "wild", "transformation": transform}
|
||||
|
||||
|
||||
def generate_image(
|
||||
prompt: str,
|
||||
output_path: str,
|
||||
model: str = DEFAULT_MODEL,
|
||||
aspect_ratio: str = "1:1",
|
||||
size: str = "2K",
|
||||
verbose: bool = False
|
||||
) -> dict:
|
||||
"""Generate image using Nano Banana (Gemini image models)."""
|
||||
|
||||
if not GENAI_AVAILABLE:
|
||||
return {"status": "error", "error": "google-genai not installed. Run: pip install google-genai"}
|
||||
|
||||
api_key = get_api_key()
|
||||
if not api_key:
|
||||
return {"status": "error", "error": "GEMINI_API_KEY not found"}
|
||||
|
||||
model_id = NANO_BANANA_MODELS.get(model, model)
|
||||
|
||||
if verbose:
|
||||
print(f"\n[Nano Banana Generation]")
|
||||
print(f" Model: {model_id}")
|
||||
print(f" Aspect: {aspect_ratio}")
|
||||
print(f" Prompt: {prompt[:100]}...")
|
||||
|
||||
try:
|
||||
client = genai.Client(api_key=api_key)
|
||||
|
||||
# Build config
|
||||
image_config_args = {'aspect_ratio': aspect_ratio}
|
||||
if 'pro' in model_id.lower() and size:
|
||||
image_config_args['image_size'] = size
|
||||
|
||||
config = types.GenerateContentConfig(
|
||||
response_modalities=['IMAGE'],
|
||||
image_config=types.ImageConfig(**image_config_args)
|
||||
)
|
||||
|
||||
response = client.models.generate_content(
|
||||
model=model_id,
|
||||
contents=[prompt],
|
||||
config=config
|
||||
)
|
||||
|
||||
output_file = Path(output_path)
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if hasattr(response, 'candidates') and response.candidates:
|
||||
for part in response.candidates[0].content.parts:
|
||||
if part.inline_data:
|
||||
with open(output_file, 'wb') as f:
|
||||
f.write(part.inline_data.data)
|
||||
|
||||
if verbose:
|
||||
print(f" Generated: {output_file}")
|
||||
|
||||
return {"status": "success", "output": str(output_file), "model": model_id}
|
||||
|
||||
return {"status": "error", "error": "No image in response"}
|
||||
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": str(e)}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="AI Artist Generate - Nano Banana with 3 creative modes",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Modes:
|
||||
search : Find best matching prompt from 129 curated prompts (default)
|
||||
creative : Remix elements from multiple matching prompts
|
||||
wild : AI-enhanced out-of-the-box creative interpretation
|
||||
all : Generate all 3 variations
|
||||
|
||||
Examples:
|
||||
# Search mode (default)
|
||||
python generate.py "tech conference banner" -o banner.png
|
||||
|
||||
# Creative remix
|
||||
python generate.py "AI workshop" -o workshop.png --mode creative
|
||||
|
||||
# Wild/experimental
|
||||
python generate.py "product showcase" -o product.png --mode wild
|
||||
|
||||
# Generate all 3 variations
|
||||
python generate.py "futuristic city" -o city.png --mode all
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument("concept", help="Core concept/subject to generate")
|
||||
parser.add_argument("--output", "-o", required=True, help="Output image path")
|
||||
parser.add_argument("--mode", "-m", choices=["search", "creative", "wild", "all"],
|
||||
default="search", help="Generation mode")
|
||||
parser.add_argument("--model", choices=list(NANO_BANANA_MODELS.keys()),
|
||||
default=DEFAULT_MODEL, help="Model: flash2 (default, Nano Banana 2), flash, or pro")
|
||||
parser.add_argument("--aspect-ratio", "-ar", choices=ASPECT_RATIOS, default="1:1")
|
||||
parser.add_argument("--size", choices=["1K", "2K", "4K"], default="2K")
|
||||
parser.add_argument("--verbose", "-v", action="store_true")
|
||||
parser.add_argument("--show-prompt", action="store_true", help="Print generated prompt")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Build prompt without generating")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
print(f"[Concept: {args.concept}]")
|
||||
|
||||
# Determine modes to run
|
||||
modes = ["search", "creative", "wild"] if args.mode == "all" else [args.mode]
|
||||
|
||||
for mode in modes:
|
||||
if args.verbose or len(modes) > 1:
|
||||
print(f"\n{'='*50}")
|
||||
print(f"[Mode: {mode.upper()}]")
|
||||
|
||||
# Build prompt based on mode
|
||||
if mode == "search":
|
||||
prompt, meta = mode_search(args.concept, args.verbose)
|
||||
elif mode == "creative":
|
||||
prompt, meta = mode_creative(args.concept, args.verbose)
|
||||
elif mode == "wild":
|
||||
prompt, meta = mode_wild(args.concept, args.verbose)
|
||||
|
||||
if args.show_prompt or args.verbose:
|
||||
print(f"\n[Prompt]\n{prompt}\n")
|
||||
|
||||
if args.dry_run:
|
||||
print("[Dry run - no generation]")
|
||||
continue
|
||||
|
||||
# Generate output path for mode
|
||||
output_path = args.output
|
||||
if len(modes) > 1:
|
||||
base = Path(args.output)
|
||||
output_path = str(base.parent / f"{base.stem}-{mode}{base.suffix}")
|
||||
|
||||
result = generate_image(
|
||||
prompt=prompt,
|
||||
output_path=output_path,
|
||||
model=args.model,
|
||||
aspect_ratio=args.aspect_ratio,
|
||||
size=args.size,
|
||||
verbose=args.verbose
|
||||
)
|
||||
|
||||
if result["status"] == "success":
|
||||
print(f"✓ Generated: {result['output']}")
|
||||
else:
|
||||
print(f"✗ Error: {result['error']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
147
.opencode/skills/ai-artist/scripts/search.py
Normal file
147
.opencode/skills/ai-artist/scripts/search.py
Normal file
@@ -0,0 +1,147 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
AI Artist Search - BM25 search engine for prompt engineering resources
|
||||
Usage: python search.py "<query>" [--domain <domain>] [--max-results 3]
|
||||
python search.py "<query>" --prompt-system [--platform <platform>]
|
||||
|
||||
Domains: use-case, style, platform, technique, lighting
|
||||
Platforms: midjourney, dalle, sd, flux, nano-banana
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from core import CSV_CONFIG, MAX_RESULTS, search, search_all_domains
|
||||
|
||||
# Fix Windows cp1252 encoding: hardcoded emojis can't encode on Windows.
|
||||
# Reconfigure stdout to UTF-8 with replacement (Python 3.7+).
|
||||
if sys.stdout.encoding and sys.stdout.encoding.lower() != "utf-8":
|
||||
if hasattr(sys.stdout, 'reconfigure'):
|
||||
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
||||
|
||||
|
||||
def format_output(result):
|
||||
"""Format results for Claude consumption (token-optimized)"""
|
||||
if "error" in result:
|
||||
return f"Error: {result['error']}"
|
||||
|
||||
output = []
|
||||
output.append(f"## AI Artist Search Results")
|
||||
output.append(f"**Domain:** {result['domain']} | **Query:** {result['query']}")
|
||||
output.append(f"**Source:** {result['file']} | **Found:** {result['count']} results\n")
|
||||
|
||||
for i, row in enumerate(result['results'], 1):
|
||||
output.append(f"### Result {i}")
|
||||
for key, value in row.items():
|
||||
value_str = str(value)
|
||||
if len(value_str) > 400:
|
||||
value_str = value_str[:400] + "..."
|
||||
output.append(f"- **{key}:** {value_str}")
|
||||
output.append("")
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
|
||||
def generate_prompt_system(query, platform=None):
|
||||
"""Generate a comprehensive prompt system for a given concept"""
|
||||
output = []
|
||||
output.append(f"## 🎨 AI Artist Prompt System")
|
||||
output.append(f"**Concept:** {query}")
|
||||
if platform:
|
||||
output.append(f"**Target Platform:** {platform}")
|
||||
output.append("")
|
||||
|
||||
# Search relevant domains
|
||||
use_case = search(query, "use-case", 1)
|
||||
style = search(query, "style", 2)
|
||||
lighting = search(query, "lighting", 1)
|
||||
technique = search(query, "technique", 2)
|
||||
|
||||
# Use case / Template
|
||||
if use_case.get("count", 0) > 0:
|
||||
uc = use_case["results"][0]
|
||||
output.append("### 📋 Use Case Match")
|
||||
output.append(f"**{uc.get('Use Case', 'N/A')}** ({uc.get('Category', '')})")
|
||||
if uc.get("Prompt Template"):
|
||||
output.append(f"**Template:** `{uc.get('Prompt Template')}`")
|
||||
if uc.get("Key Elements"):
|
||||
output.append(f"**Key Elements:** {uc.get('Key Elements')}")
|
||||
if uc.get("Tips"):
|
||||
output.append(f"**Tips:** {uc.get('Tips')}")
|
||||
output.append("")
|
||||
|
||||
# Styles
|
||||
if style.get("count", 0) > 0:
|
||||
output.append("### 🎭 Recommended Styles")
|
||||
for s in style["results"]:
|
||||
output.append(f"**{s.get('Style Name', 'N/A')}** - {s.get('Description', '')}")
|
||||
if s.get("Prompt Keywords"):
|
||||
output.append(f" Keywords: `{s.get('Prompt Keywords')}`")
|
||||
output.append("")
|
||||
|
||||
# Lighting
|
||||
if lighting.get("count", 0) > 0:
|
||||
lt = lighting["results"][0]
|
||||
output.append("### 💡 Lighting Suggestion")
|
||||
output.append(f"**{lt.get('Lighting Type', 'N/A')}** - {lt.get('Description', '')}")
|
||||
output.append(f" Mood: {lt.get('Mood', '')} | Keywords: `{lt.get('Prompt Keywords', '')}`")
|
||||
output.append("")
|
||||
|
||||
# Techniques
|
||||
if technique.get("count", 0) > 0:
|
||||
output.append("### 🔧 Relevant Techniques")
|
||||
for t in technique["results"]:
|
||||
output.append(f"**{t.get('Technique', 'N/A')}**: {t.get('Description', '')}")
|
||||
if t.get("Syntax Example"):
|
||||
output.append(f" Example: `{t.get('Syntax Example')}`")
|
||||
output.append("")
|
||||
|
||||
# Platform-specific tips
|
||||
if platform:
|
||||
plat = search(platform, "platform", 1)
|
||||
if plat.get("count", 0) > 0:
|
||||
p = plat["results"][0]
|
||||
output.append(f"### 🖥️ {p.get('Platform', '')} Tips")
|
||||
output.append(f"**Prompt Style:** {p.get('Prompt Style', '')}")
|
||||
output.append(f"**Key Parameters:** {p.get('Key Parameters', '')}")
|
||||
output.append(f"**Best Practices:** {p.get('Best Practices', '')}")
|
||||
output.append("")
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="AI Artist Search")
|
||||
parser.add_argument("query", help="Search query")
|
||||
parser.add_argument("--domain", "-d", choices=list(CSV_CONFIG.keys()), help="Search domain")
|
||||
parser.add_argument("--max-results", "-n", type=int, default=MAX_RESULTS, help="Max results (default: 3)")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
# Prompt system generation
|
||||
parser.add_argument("--prompt-system", "-ps", action="store_true", help="Generate comprehensive prompt system")
|
||||
parser.add_argument("--platform", "-p", type=str, default=None, help="Target platform for prompt system")
|
||||
parser.add_argument("--all", "-a", action="store_true", help="Search all domains")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Prompt system generation
|
||||
if args.prompt_system:
|
||||
result = generate_prompt_system(args.query, args.platform)
|
||||
print(result)
|
||||
# Search all domains
|
||||
elif args.all:
|
||||
results = search_all_domains(args.query, args.max_results)
|
||||
if args.json:
|
||||
import json
|
||||
print(json.dumps(results, indent=2, ensure_ascii=False))
|
||||
else:
|
||||
for domain, result in results.items():
|
||||
print(format_output(result))
|
||||
print("---\n")
|
||||
# Domain search
|
||||
else:
|
||||
result = search(args.query, args.domain, args.max_results)
|
||||
if args.json:
|
||||
import json
|
||||
print(json.dumps(result, indent=2, ensure_ascii=False))
|
||||
else:
|
||||
print(format_output(result))
|
||||
230
.opencode/skills/ai-multimodal/.env.example
Normal file
230
.opencode/skills/ai-multimodal/.env.example
Normal file
@@ -0,0 +1,230 @@
|
||||
# Google Gemini API Configuration
|
||||
|
||||
# ============================================================================
|
||||
# OPTION 1: Google AI Studio (Default - Recommended for most users)
|
||||
# ============================================================================
|
||||
# Get your API key: https://aistudio.google.com/apikey
|
||||
GEMINI_API_KEY=your_api_key_here
|
||||
|
||||
# ============================================================================
|
||||
# API Key Rotation (Optional - For high-volume usage)
|
||||
# ============================================================================
|
||||
# Add multiple API keys for automatic rotation on rate limit errors.
|
||||
# Free tier accounts are heavily rate-limited; rotation helps distribute load.
|
||||
#
|
||||
# Format: GEMINI_API_KEY_N where N is 2, 3, 4, etc.
|
||||
# The primary GEMINI_API_KEY is always used first.
|
||||
#
|
||||
# GEMINI_API_KEY_2=your_second_api_key
|
||||
# GEMINI_API_KEY_3=your_third_api_key
|
||||
# GEMINI_API_KEY_4=your_fourth_api_key
|
||||
#
|
||||
# Features:
|
||||
# - Auto-rotates on RESOURCE_EXHAUSTED / 429 errors
|
||||
# - 60-second cooldown per key after rate limit
|
||||
# - Logs rotation events with --verbose flag
|
||||
# - Backward compatible: single key still works
|
||||
|
||||
# ============================================================================
|
||||
# OPTION 2: Vertex AI (Google Cloud Platform)
|
||||
# ============================================================================
|
||||
# Uncomment these lines to use Vertex AI instead of Google AI Studio
|
||||
# GEMINI_USE_VERTEX=true
|
||||
# VERTEX_PROJECT_ID=your-gcp-project-id
|
||||
# VERTEX_LOCATION=us-central1
|
||||
|
||||
# ============================================================================
|
||||
# Model Selection (Optional)
|
||||
# ============================================================================
|
||||
# Override default models for specific capabilities
|
||||
# If not set, intelligent defaults are used based on task type
|
||||
|
||||
# --- Image Generation ---
|
||||
# Used by: --task generate (image)
|
||||
# Default: gemini-2.5-flash-image (Nano Banana Flash - fast, cost-effective)
|
||||
# Alternative: imagen-4.0-generate-001 (production quality)
|
||||
# NOTE: All image generation requires billing - no free tier available (limit: 0)
|
||||
# Options:
|
||||
# gemini-2.5-flash-image - Nano Banana Flash: fast, ~$1/1M tokens (DEFAULT)
|
||||
# gemini-3-pro-image-preview - Nano Banana Pro: 4K text, reasoning (requires billing)
|
||||
# imagen-4.0-generate-001 - Imagen 4 Standard: production quality (~$0.02/image)
|
||||
# imagen-4.0-ultra-generate-001 - Imagen 4 Ultra: maximum quality (~$0.04/image)
|
||||
# imagen-4.0-fast-generate-001 - Imagen 4 Fast: speed-optimized (~$0.01/image)
|
||||
# IMAGE_GEN_MODEL=gemini-2.5-flash-image
|
||||
|
||||
# --- Video Generation ---
|
||||
# Used by: --task generate-video (new capability)
|
||||
# Default: veo-3.1-generate-preview
|
||||
# NOTE: Video generation requires billing - no free tier fallback available
|
||||
# Options:
|
||||
# veo-3.1-generate-preview - Latest, native audio, frame control (requires billing)
|
||||
# veo-3.1-fast-generate-preview - Speed-optimized for business (requires billing)
|
||||
# veo-3.0-generate-001 - Stable, native audio, 8s videos (requires billing)
|
||||
# veo-3.0-fast-generate-001 - Stable fast variant (requires billing)
|
||||
# VIDEO_GEN_MODEL=veo-3.1-generate-preview
|
||||
|
||||
# --- Multimodal Analysis ---
|
||||
# Used by: --task analyze, transcribe, extract
|
||||
# Default: gemini-2.5-flash
|
||||
# Options:
|
||||
# gemini-3-pro-preview - Latest, agentic workflows, 1M context
|
||||
# gemini-2.5-flash - Best price/performance (recommended)
|
||||
# gemini-2.5-pro - Highest quality
|
||||
# MULTIMODAL_MODEL=gemini-2.5-flash
|
||||
|
||||
# --- Legacy Compatibility ---
|
||||
# Generic model override (use specific variables above instead)
|
||||
# GEMINI_MODEL=gemini-2.5-flash
|
||||
# GEMINI_IMAGE_GEN_MODEL=gemini-2.5-flash-image
|
||||
|
||||
# ============================================================================
|
||||
# MiniMax API Configuration (Optional - for image/video/speech/music generation)
|
||||
# ============================================================================
|
||||
# Get your API key: https://platform.minimax.io/user-center/basic-information/interface-key
|
||||
# MINIMAX_API_KEY=your_minimax_api_key_here
|
||||
|
||||
# --- MiniMax Image Generation ---
|
||||
# Models: image-01 (standard), image-01-live (enhanced)
|
||||
# Cost: ~$0.03/image | Rate: 10 RPM
|
||||
# MINIMAX_IMAGE_MODEL=image-01
|
||||
|
||||
# --- MiniMax Video Generation (Hailuo) ---
|
||||
# Models: MiniMax-Hailuo-2.3, MiniMax-Hailuo-2.3-Fast, MiniMax-Hailuo-02, S2V-01
|
||||
# Cost: $0.25-0.52/video | Rate: 5 RPM
|
||||
# MINIMAX_VIDEO_MODEL=MiniMax-Hailuo-2.3
|
||||
|
||||
# --- MiniMax Speech/TTS ---
|
||||
# Models: speech-2.8-hd (best), speech-2.8-turbo (fast)
|
||||
# Cost: $30-50/1M chars | Rate: 60 RPM | 300+ voices, 40+ languages
|
||||
# MINIMAX_SPEECH_MODEL=speech-2.8-hd
|
||||
|
||||
# --- MiniMax Music Generation ---
|
||||
# Models: music-2.5 (4-minute songs with vocals)
|
||||
# Cost: $0.03-0.075/gen | Rate: 120 RPM
|
||||
# MINIMAX_MUSIC_MODEL=music-2.5
|
||||
|
||||
# ============================================================================
|
||||
# Rate Limiting Configuration (Optional)
|
||||
# ============================================================================
|
||||
# Requests per minute limit (adjust based on your tier)
|
||||
# GEMINI_RPM_LIMIT=15
|
||||
|
||||
# Tokens per minute limit
|
||||
# GEMINI_TPM_LIMIT=4000000
|
||||
|
||||
# Requests per day limit
|
||||
# GEMINI_RPD_LIMIT=1500
|
||||
|
||||
# ============================================================================
|
||||
# Video Generation Options (Optional)
|
||||
# ============================================================================
|
||||
# Video duration in seconds (8s only for now)
|
||||
# VEO_DURATION=8
|
||||
|
||||
# Video resolution: 720p or 1080p
|
||||
# VEO_RESOLUTION=1080p
|
||||
|
||||
# Aspect ratio: 16:9, 9:16, 1:1 (16:9 is default)
|
||||
# VEO_ASPECT_RATIO=16:9
|
||||
|
||||
# Frame rate: 24fps (fixed for now)
|
||||
# VEO_FPS=24
|
||||
|
||||
# Enable native audio generation
|
||||
# VEO_AUDIO=true
|
||||
|
||||
# ============================================================================
|
||||
# Image Generation Options (Optional)
|
||||
# ============================================================================
|
||||
# Number of images to generate (1-4)
|
||||
# IMAGEN_NUM_IMAGES=1
|
||||
|
||||
# Image size: 1K or 2K (Ultra/Standard only)
|
||||
# IMAGEN_SIZE=1K
|
||||
|
||||
# Aspect ratio: 1:1, 16:9, 9:16, 4:3, 3:4
|
||||
# IMAGEN_ASPECT_RATIO=1:1
|
||||
|
||||
# Enable person generation (restricted in EEA, CH, UK)
|
||||
# IMAGEN_PERSON_GENERATION=true
|
||||
|
||||
# Add SynthID watermark (always enabled by default)
|
||||
# IMAGEN_WATERMARK=true
|
||||
|
||||
# ============================================================================
|
||||
# Processing Options (Optional)
|
||||
# ============================================================================
|
||||
# Video resolution mode: default or low-res
|
||||
# low-res uses ~100 tokens/second vs ~300 for default
|
||||
# GEMINI_VIDEO_RESOLUTION=default
|
||||
|
||||
# Audio quality: default (16 Kbps mono, auto-downsampled)
|
||||
# GEMINI_AUDIO_QUALITY=default
|
||||
|
||||
# PDF processing mode: inline (<20MB) or file-api (>20MB, automatic)
|
||||
# GEMINI_PDF_MODE=auto
|
||||
|
||||
# ============================================================================
|
||||
# Retry Configuration (Optional)
|
||||
# ============================================================================
|
||||
# Maximum retry attempts for failed requests
|
||||
# GEMINI_MAX_RETRIES=3
|
||||
|
||||
# Initial retry delay in seconds (uses exponential backoff)
|
||||
# GEMINI_RETRY_DELAY=1
|
||||
|
||||
# ============================================================================
|
||||
# Output Configuration (Optional)
|
||||
# ============================================================================
|
||||
# Default output directory for generated images
|
||||
# OUTPUT_DIR=./output
|
||||
|
||||
# Image output format (png or jpeg)
|
||||
# IMAGE_FORMAT=png
|
||||
|
||||
# Image quality for JPEG (1-100)
|
||||
# IMAGE_QUALITY=95
|
||||
|
||||
# ============================================================================
|
||||
# Context Caching (Optional)
|
||||
# ============================================================================
|
||||
# Enable context caching for repeated queries on same file
|
||||
# GEMINI_ENABLE_CACHING=true
|
||||
|
||||
# Cache TTL in seconds (default: 1800 = 30 minutes)
|
||||
# GEMINI_CACHE_TTL=1800
|
||||
|
||||
# ============================================================================
|
||||
# Logging (Optional)
|
||||
# ============================================================================
|
||||
# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||
# LOG_LEVEL=INFO
|
||||
|
||||
# Log file path
|
||||
# LOG_FILE=./logs/gemini.log
|
||||
|
||||
# ============================================================================
|
||||
# Pricing Reference (as of 2025-11)
|
||||
# ============================================================================
|
||||
# Gemini 2.5 Flash: $1.00/1M input, $0.10/1M output
|
||||
# Gemini 2.5 Pro: $3.00/1M input, $12.00/1M output
|
||||
# Gemini 3 Pro: $2.00/1M input (<200k), $4.00 (>200k), $12/$18 output
|
||||
# Imagen 4: ~$0.01-$0.04 per image (varies by variant)
|
||||
# Veo 3: TBD (preview pricing)
|
||||
# Monitor: https://ai.google.dev/pricing
|
||||
|
||||
# ============================================================================
|
||||
# Notes
|
||||
# ============================================================================
|
||||
# 1. Never commit API keys to version control
|
||||
# 2. Add .env to .gitignore
|
||||
# 3. API keys can be restricted in Google Cloud Console
|
||||
# 4. Monitor usage at: https://aistudio.google.com/apikey
|
||||
# 5. Free tier limits: 15 RPM, 1M-4M TPM, 1,500 RPD
|
||||
# 6. Vertex AI requires GCP authentication via gcloud CLI
|
||||
# 7. Model defaults (Dec 2025):
|
||||
# - Image gen: gemini-2.5-flash-image (Nano Banana Flash - default)
|
||||
# - Image gen: imagen-4.0-generate-001 (alternative for production)
|
||||
# - Video gen: veo-3.1-generate-preview
|
||||
# - Analysis: gemini-2.5-flash
|
||||
# 8. Preview models (veo-3.1, gemini-3) may have API changes
|
||||
112
.opencode/skills/ai-multimodal/SKILL.md
Normal file
112
.opencode/skills/ai-multimodal/SKILL.md
Normal file
@@ -0,0 +1,112 @@
|
||||
---
|
||||
name: ck:ai-multimodal
|
||||
description: Analyze images/audio/video with Gemini API (better vision than Claude). Generate images (Imagen 4, Nano Banana 2, MiniMax), videos (Veo 3, Hailuo), speech (MiniMax TTS), music (MiniMax). Use for vision analysis, transcription, OCR, design extraction, multimodal AI.
|
||||
license: MIT
|
||||
allowed-tools:
|
||||
- Bash
|
||||
- Read
|
||||
- Write
|
||||
- Edit
|
||||
argument-hint: "[file-path] [prompt]"
|
||||
---
|
||||
|
||||
# AI Multimodal
|
||||
|
||||
Process audio, images, videos, documents using Gemini. Generate images, videos, speech, music via Gemini + MiniMax.
|
||||
|
||||
## Setup
|
||||
|
||||
```bash
|
||||
# Google Gemini (analysis + image/video gen)
|
||||
export GEMINI_API_KEY="your-key" # https://aistudio.google.com/apikey
|
||||
# MiniMax (image/video/speech/music gen)
|
||||
export MINIMAX_API_KEY="your-key" # https://platform.minimax.io/user-center/basic-information/interface-key
|
||||
pip install google-genai python-dotenv pillow requests
|
||||
```
|
||||
|
||||
### API Key Rotation (Optional)
|
||||
|
||||
For high-volume Gemini usage, configure multiple keys:
|
||||
|
||||
```bash
|
||||
export GEMINI_API_KEY="key1"
|
||||
export GEMINI_API_KEY_2="key2" # auto-rotates on rate limit
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
**Verify setup**: `python scripts/check_setup.py`
|
||||
**Analyze media**: `python scripts/gemini_batch_process.py --files <file> --task <analyze|transcribe|extract>`
|
||||
- TIP: When you're asked to analyze an image, check if `gemini` command is available, then use `echo "<prompt to analyze image>" | gemini -y -m <gemini.model>` command (read model from `$HOME/.opencode/.ck.json`: `gemini.model`). If `gemini` command is not available, use `python scripts/gemini_batch_process.py --files <file> --task analyze` command.
|
||||
**Generate (Gemini)**: `python scripts/gemini_batch_process.py --task <generate|generate-video> --prompt "desc"`
|
||||
**Generate (MiniMax)**: `python scripts/minimax_cli.py --task <generate|generate-video|generate-speech|generate-music> --prompt "desc"`
|
||||
|
||||
> **Stdin support**: Pipe files via stdin for Gemini analysis (auto-detects PNG/JPG/PDF/WAV/MP3).
|
||||
|
||||
## Models
|
||||
|
||||
### Google Gemini / Imagen
|
||||
- **Image gen**: `gemini-3.1-flash-image-preview` (Nano Banana 2 - DEFAULT), `gemini-2.5-flash-image` (Flash), `gemini-3-pro-image-preview` (Pro 4K), `imagen-4.0-generate-001` (standard), `imagen-4.0-ultra-generate-001` (quality), `imagen-4.0-fast-generate-001` (speed)
|
||||
- **Video gen**: `veo-3.1-generate-preview` (8s clips with audio)
|
||||
- **Analysis**: `gemini-2.5-flash` (recommended), `gemini-2.5-pro` (advanced)
|
||||
|
||||
### MiniMax (NEW)
|
||||
- **Image gen**: `image-01` (standard), `image-01-live` (enhanced) - $0.03/image, 1-9 batch
|
||||
- **Video gen (Hailuo)**: `MiniMax-Hailuo-2.3` (1080p), `MiniMax-Hailuo-2.3-Fast` (50% cheaper), `MiniMax-Hailuo-02` (first+last frame), `S2V-01` (subject ref)
|
||||
- **Speech/TTS**: `speech-2.8-hd` (best), `speech-2.8-turbo` (fast) - 300+ voices, 40+ languages, emotion control
|
||||
- **Music**: `music-2.5` - 4-minute songs with vocals, synchronized lyrics
|
||||
|
||||
## Scripts
|
||||
|
||||
- **`gemini_batch_process.py`**: Gemini CLI for `transcribe|analyze|extract|generate|generate-video`. Auto-resolves API keys, Imagen 4 + Veo + Nano Banana workflows.
|
||||
- **`minimax_cli.py`**: MiniMax CLI for `generate|generate-video|generate-speech|generate-music`. Supports all MiniMax models.
|
||||
- **`minimax_generate.py`**: MiniMax generation functions (image, video, speech, music). Library for programmatic use.
|
||||
- **`minimax_api_client.py`**: MiniMax HTTP client, auth, async polling, file download utilities.
|
||||
- **`media_optimizer.py`**: ffmpeg/Pillow preflight: compress/resize/convert media to stay within API limits.
|
||||
- **`document_converter.py`**: Gemini-powered PDF/image/Office → markdown converter.
|
||||
- **`check_setup.py`**: Setup checker for API keys and dependencies.
|
||||
|
||||
Use `--help` for options.
|
||||
|
||||
## References
|
||||
|
||||
Load for detailed guidance:
|
||||
|
||||
| Topic | File | Description |
|
||||
|-------|------|-------------|
|
||||
| Music | `references/music-generation.md` | Lyria RealTime API for background music generation, style prompts, real-time control, integration with video production. |
|
||||
| Audio | `references/audio-processing.md` | Audio formats and limits, transcription (timestamps, speakers, segments), non-speech analysis, File API vs inline input, TTS models, best practices, cost and token math, and concrete meeting/podcast/interview recipes. |
|
||||
| Images | `references/vision-understanding.md` | Vision capabilities overview, supported formats and models, captioning/classification/VQA, detection and segmentation, OCR and document reading, multi-image workflows, structured JSON output, token costs, best practices, and common product/screenshot/chart/scene use cases. |
|
||||
| Image Gen | `references/image-generation.md` | Imagen 4 and Gemini image model overview, generate_images vs generate_content APIs, aspect ratios and costs, text/image/both modalities, editing and composition, style and quality control, safety settings, best practices, troubleshooting, and common marketing/concept-art/UI scenarios. |
|
||||
| Video | `references/video-analysis.md` | Video analysis capabilities and supported formats, model/context choices, local/inline/YouTube inputs, clipping and FPS control, multi-video comparison, temporal Q&A and scene detection, transcription with visual context, token and cost guidance, and optimization/best-practice patterns. |
|
||||
| Video Gen | `references/video-generation.md` | Veo model matrix, text-to-video and image-to-video quick start, multi-reference and extension flows, camera and timing control, configuration (resolution, aspect, audio, safety), prompt design patterns, performance tips, limitations, troubleshooting, and cost estimates. |
|
||||
| MiniMax | `references/minimax-generation.md` | MiniMax image (image-01), video (Hailuo 2.3), speech (TTS 2.8), and music (2.5) generation APIs. Endpoints, models, parameters, async workflows, pricing, rate limits, voice library, and examples. |
|
||||
|
||||
## Limits
|
||||
|
||||
**Formats**: Audio (WAV/MP3/AAC, 9.5h), Images (PNG/JPEG/WEBP, 3.6k), Video (MP4/MOV, 6h), PDF (1k pages)
|
||||
**Size**: 20MB inline, 2GB File API
|
||||
**Important:**
|
||||
- If you are going to generate a transcript of the audio, and the audio length is longer than 15 minutes, the transcript often gets truncated due to output token limits in the Gemini API response. To get the full transcript, you need to split the audio into smaller chunks (max 15 minutes per chunk) and transcribe each segment for a complete transcript.
|
||||
- If you are going to generate a transcript of the video and the video length is longer than 15 minutes, use ffmpeg to extract the audio from the video, truncate the audio to 15 minutes, transcribe all audio segments, and then combine the transcripts into a single transcript.
|
||||
**Transcription Output Requirements:**
|
||||
- Format: Markdown
|
||||
- Metadata: Duration, file size, generated date, description, file name, topics covered, etc.
|
||||
- Parts: from-to (e.g., 00:00-00:15), audio chunk name, transcript, status, etc.
|
||||
- Transcript format:
|
||||
```
|
||||
[HH:MM:SS -> HH:MM:SS] transcript content
|
||||
[HH:MM:SS -> HH:MM:SS] transcript content
|
||||
...
|
||||
```
|
||||
|
||||
## Outputs
|
||||
|
||||
**IMPORTANT:** Invoke "/ck:project-organization" skill to organize the outputs.
|
||||
|
||||
## Resources
|
||||
|
||||
- [Gemini API Docs](https://ai.google.dev/gemini-api/docs/)
|
||||
- [Gemini Pricing](https://ai.google.dev/pricing)
|
||||
- [MiniMax API Docs](https://platform.minimax.io/docs/api-reference/api-overview)
|
||||
- [MiniMax Pricing](https://platform.minimax.io/pricing)
|
||||
387
.opencode/skills/ai-multimodal/references/audio-processing.md
Normal file
387
.opencode/skills/ai-multimodal/references/audio-processing.md
Normal file
@@ -0,0 +1,387 @@
|
||||
# Audio Processing Reference
|
||||
|
||||
Comprehensive guide for audio analysis and speech generation using Gemini API.
|
||||
|
||||
## Audio Understanding
|
||||
|
||||
### Supported Formats
|
||||
|
||||
| Format | MIME Type | Best Use |
|
||||
|--------|-----------|----------|
|
||||
| WAV | `audio/wav` | Uncompressed, highest quality |
|
||||
| MP3 | `audio/mp3` | Compressed, widely compatible |
|
||||
| AAC | `audio/aac` | Compressed, good quality |
|
||||
| FLAC | `audio/flac` | Lossless compression |
|
||||
| OGG Vorbis | `audio/ogg` | Open format |
|
||||
| AIFF | `audio/aiff` | Apple format |
|
||||
|
||||
### Specifications
|
||||
|
||||
- **Maximum length**: 9.5 hours per request
|
||||
- **Multiple files**: Unlimited count, combined max 9.5 hours
|
||||
- **Token rate**: 32 tokens/second (1 minute = 1,920 tokens)
|
||||
- **Processing**: Auto-downsampled to 16 Kbps mono
|
||||
- **File size limits**:
|
||||
- Inline: 20 MB max total request
|
||||
- File API: 2 GB per file, 20 GB project quota
|
||||
- Retention: 48 hours auto-delete
|
||||
- **Important:** if you are going to generate a transcript of the audio, and the audio length is longer than 15 minutes, the transcript often gets truncated due to output token limits in the Gemini API response. To get the full transcript, you need to split the audio into smaller chunks (max 15 minutes per chunk) and transcribe each segment for a complete transcript.
|
||||
|
||||
## Transcription
|
||||
|
||||
### Basic Transcription
|
||||
|
||||
```python
|
||||
from google import genai
|
||||
import os
|
||||
|
||||
client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
|
||||
|
||||
# Upload audio
|
||||
myfile = client.files.upload(file='meeting.mp3')
|
||||
|
||||
# Transcribe
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Generate a transcript of the speech.', myfile]
|
||||
)
|
||||
print(response.text)
|
||||
```
|
||||
|
||||
### With Timestamps
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Generate transcript with timestamps in MM:SS format.', myfile]
|
||||
)
|
||||
```
|
||||
|
||||
### Multi-Speaker Identification
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Transcribe with speaker labels. Format: [Speaker 1], [Speaker 2], etc.', myfile]
|
||||
)
|
||||
```
|
||||
|
||||
### Segment-Specific Transcription
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Transcribe only the segment from 02:30 to 05:15.', myfile]
|
||||
)
|
||||
```
|
||||
|
||||
## Audio Analysis
|
||||
|
||||
### Summarization
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Summarize key points in 5 bullets with timestamps.', myfile]
|
||||
)
|
||||
```
|
||||
|
||||
### Non-Speech Audio Analysis
|
||||
|
||||
```python
|
||||
# Music analysis
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Identify the musical instruments and genre.', myfile]
|
||||
)
|
||||
|
||||
# Environmental sounds
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Identify all sounds: voices, music, ambient noise.', myfile]
|
||||
)
|
||||
|
||||
# Birdsong identification
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Identify bird species based on their calls.', myfile]
|
||||
)
|
||||
```
|
||||
|
||||
### Timestamp-Based Analysis
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['What is discussed from 10:30 to 15:45? Provide key points.', myfile]
|
||||
)
|
||||
```
|
||||
|
||||
## Input Methods
|
||||
|
||||
### File Upload (>20MB or Reuse)
|
||||
|
||||
```python
|
||||
# Upload once, use multiple times
|
||||
myfile = client.files.upload(file='large-audio.mp3')
|
||||
|
||||
# First query
|
||||
response1 = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Transcribe this', myfile]
|
||||
)
|
||||
|
||||
# Second query (reuses same file)
|
||||
response2 = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Summarize this', myfile]
|
||||
)
|
||||
```
|
||||
|
||||
### Inline Data (<20MB)
|
||||
|
||||
```python
|
||||
from google.genai import types
|
||||
|
||||
with open('small-audio.mp3', 'rb') as f:
|
||||
audio_bytes = f.read()
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Describe this audio',
|
||||
types.Part.from_bytes(data=audio_bytes, mime_type='audio/mp3')
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Speech Generation (TTS)
|
||||
|
||||
### Available Models
|
||||
|
||||
| Model | Quality | Speed | Cost/1M tokens |
|
||||
|-------|---------|-------|----------------|
|
||||
| `gemini-2.5-flash-native-audio-preview-09-2025` | High | Fast | $10 |
|
||||
| `gemini-2.5-pro` TTS mode | Premium | Slower | $20 |
|
||||
|
||||
### Basic TTS
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash-native-audio-preview-09-2025',
|
||||
contents='Generate audio: Welcome to today\'s episode.'
|
||||
)
|
||||
|
||||
# Save audio
|
||||
with open('output.wav', 'wb') as f:
|
||||
f.write(response.audio_data)
|
||||
```
|
||||
|
||||
### Controllable Voice Style
|
||||
|
||||
```python
|
||||
# Professional tone
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash-native-audio-preview-09-2025',
|
||||
contents='Generate audio in a professional, clear tone: Welcome to our quarterly earnings call.'
|
||||
)
|
||||
|
||||
# Casual and friendly
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash-native-audio-preview-09-2025',
|
||||
contents='Generate audio in a friendly, conversational tone: Hey there! Let\'s dive into today\'s topic.'
|
||||
)
|
||||
|
||||
# Narrative style
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash-native-audio-preview-09-2025',
|
||||
contents='Generate audio in a narrative, storytelling tone: Once upon a time, in a land far away...'
|
||||
)
|
||||
```
|
||||
|
||||
### Voice Control Parameters
|
||||
|
||||
- **Style**: Professional, casual, narrative, conversational
|
||||
- **Pace**: Slow, normal, fast
|
||||
- **Tone**: Friendly, serious, enthusiastic
|
||||
- **Accent**: Natural language control (e.g., "British accent", "Southern drawl")
|
||||
|
||||
## Best Practices
|
||||
|
||||
### File Management
|
||||
|
||||
1. Use File API for files >20MB
|
||||
2. Use File API for repeated queries (saves tokens)
|
||||
3. Files auto-delete after 48 hours
|
||||
4. Clean up manually when done:
|
||||
```python
|
||||
client.files.delete(name=myfile.name)
|
||||
```
|
||||
|
||||
### Prompt Engineering
|
||||
|
||||
**Effective prompts**:
|
||||
- "Transcribe from 02:30 to 03:29 in MM:SS format"
|
||||
- "Identify speakers and extract dialogue with timestamps"
|
||||
- "Summarize key points with relevant timestamps"
|
||||
- "Transcribe and analyze sentiment for each speaker"
|
||||
|
||||
**Context improves accuracy**:
|
||||
- "This is a medical interview - use appropriate terminology"
|
||||
- "Transcribe this legal deposition with precise terminology"
|
||||
- "This is a technical podcast about machine learning"
|
||||
|
||||
**Combined tasks**:
|
||||
- "Transcribe and summarize in bullet points"
|
||||
- "Extract key quotes with timestamps and speaker labels"
|
||||
- "Transcribe and identify action items with timestamps"
|
||||
|
||||
### Cost Optimization
|
||||
|
||||
**Token calculation**:
|
||||
- 1 minute audio = 1,920 tokens
|
||||
- 1 hour audio = 115,200 tokens
|
||||
- 9.5 hours = 1,094,400 tokens
|
||||
|
||||
**Model selection**:
|
||||
- Use `gemini-2.5-flash` ($1/1M tokens) for most tasks
|
||||
- Upgrade to `gemini-2.5-pro` ($3/1M tokens) for complex analysis
|
||||
- For high-volume: `gemini-1.5-flash` ($0.70/1M tokens)
|
||||
|
||||
**Reduce costs**:
|
||||
- Process only relevant segments using timestamps
|
||||
- Use lower-quality audio when possible
|
||||
- Batch multiple short files in one request
|
||||
- Cache context for repeated queries
|
||||
|
||||
### Error Handling
|
||||
|
||||
```python
|
||||
import time
|
||||
|
||||
def transcribe_with_retry(file_path, max_retries=3):
|
||||
"""Transcribe audio with exponential backoff retry"""
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
myfile = client.files.upload(file=file_path)
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Transcribe with timestamps', myfile]
|
||||
)
|
||||
return response.text
|
||||
except Exception as e:
|
||||
if attempt == max_retries - 1:
|
||||
raise
|
||||
wait_time = 2 ** attempt
|
||||
print(f"Retry {attempt + 1} after {wait_time}s")
|
||||
time.sleep(wait_time)
|
||||
```
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### 1. Meeting Transcription
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Transcribe this meeting with:
|
||||
1. Speaker labels
|
||||
2. Timestamps for topic changes
|
||||
3. Action items highlighted
|
||||
''',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 2. Podcast Summary
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Create podcast summary with:
|
||||
1. Main topics with timestamps
|
||||
2. Key quotes from each speaker
|
||||
3. Recommended episode highlights
|
||||
''',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 3. Interview Analysis
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Analyze interview:
|
||||
1. Questions asked with timestamps
|
||||
2. Key responses from interviewee
|
||||
3. Overall sentiment and tone
|
||||
''',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 4. Content Verification
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Verify audio content:
|
||||
1. Check for specific keywords or phrases
|
||||
2. Identify any compliance issues
|
||||
3. Note any concerning statements with timestamps
|
||||
''',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 5. Multilingual Transcription
|
||||
|
||||
```python
|
||||
# Gemini auto-detects language
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Transcribe this audio and translate to English if needed.', myfile]
|
||||
)
|
||||
```
|
||||
|
||||
## Token Costs
|
||||
|
||||
**Audio Input** (32 tokens/second):
|
||||
- 1 minute = 1,920 tokens
|
||||
- 10 minutes = 19,200 tokens
|
||||
- 1 hour = 115,200 tokens
|
||||
- 9.5 hours = 1,094,400 tokens
|
||||
|
||||
**Example costs** (Gemini 2.5 Flash at $1/1M):
|
||||
- 1 hour audio: 115,200 tokens = $0.12
|
||||
- Full day podcast (8 hours): 921,600 tokens = $0.92
|
||||
|
||||
## Limitations
|
||||
|
||||
- Maximum 9.5 hours per request
|
||||
- Auto-downsampled to 16 Kbps mono (quality loss)
|
||||
- Files expire after 48 hours
|
||||
- No real-time streaming support
|
||||
- Non-speech audio less accurate than speech
|
||||
|
||||
---
|
||||
|
||||
## Related References
|
||||
|
||||
**Current**: Audio Processing
|
||||
|
||||
**Related Capabilities**:
|
||||
- [Video Analysis](./video-analysis.md) - Extract audio from videos
|
||||
- [Video Generation](./video-generation.md) - Generate videos with native audio
|
||||
- [Image Understanding](./vision-understanding.md) - Analyze audio with visual context
|
||||
|
||||
**Back to**: [AI Multimodal Skill](../SKILL.md)
|
||||
1002
.opencode/skills/ai-multimodal/references/image-generation.md
Normal file
1002
.opencode/skills/ai-multimodal/references/image-generation.md
Normal file
File diff suppressed because it is too large
Load Diff
141
.opencode/skills/ai-multimodal/references/minimax-generation.md
Normal file
141
.opencode/skills/ai-multimodal/references/minimax-generation.md
Normal file
@@ -0,0 +1,141 @@
|
||||
# MiniMax Generation Reference
|
||||
|
||||
## Overview
|
||||
|
||||
MiniMax provides image, video (Hailuo), speech (TTS), and music generation APIs.
|
||||
Base URL: `https://api.minimax.io/v1` | Auth: `Bearer {MINIMAX_API_KEY}`
|
||||
|
||||
## Image Generation
|
||||
|
||||
**Endpoint**: `POST /image_generation`
|
||||
**Models**: `image-01` (standard), `image-01-live` (enhanced)
|
||||
**Rate**: 10 RPM | **Cost**: ~$0.03/image
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "image-01",
|
||||
"prompt": "A girl looking into the distance",
|
||||
"aspect_ratio": "16:9",
|
||||
"n": 2,
|
||||
"response_format": "url",
|
||||
"prompt_optimizer": true,
|
||||
"subject_reference": [{"type": "character", "image_file": "url", "weight": 0.8}]
|
||||
}
|
||||
```
|
||||
|
||||
**Aspect ratios**: 1:1, 16:9, 4:3, 3:2, 2:3, 3:4, 9:16, 21:9
|
||||
**Custom dims**: 512-2048px (divisible by 8)
|
||||
**Batch**: 1-9 images per request
|
||||
|
||||
## Video Generation (Hailuo)
|
||||
|
||||
**Endpoints**: POST `/video_generation` → GET `/query/video_generation` → GET `/files/retrieve`
|
||||
**Async workflow**: Submit task → poll every 10s → download file (URL valid 9h)
|
||||
|
||||
### Models
|
||||
| Model | Features | Resolution |
|
||||
|-------|----------|-----------|
|
||||
| `MiniMax-Hailuo-2.3` | Text/image-to-video | 720p/1080p |
|
||||
| `MiniMax-Hailuo-2.3-Fast` | Same, 50% faster+cheaper | 720p/1080p |
|
||||
| `MiniMax-Hailuo-02` | First+last frame mode | 720p |
|
||||
| `S2V-01` | Subject reference | 720p |
|
||||
|
||||
**Rate**: 5 RPM | **Cost**: $0.25 (6s/768p), $0.52 (10s/768p)
|
||||
|
||||
```json
|
||||
// Text-to-video
|
||||
{"prompt": "A dancer", "model": "MiniMax-Hailuo-2.3", "duration": 6, "resolution": "1080P"}
|
||||
|
||||
// Image-to-video
|
||||
{"prompt": "Scene desc", "first_frame_image": "url", "model": "MiniMax-Hailuo-2.3", "duration": 6}
|
||||
|
||||
// First+last frame
|
||||
{"prompt": "Transition", "first_frame_image": "url", "last_frame_image": "url", "model": "MiniMax-Hailuo-02"}
|
||||
|
||||
// Subject reference
|
||||
{"prompt": "Scene with character", "subject_reference": [{"type": "character", "image": ["url"]}], "model": "S2V-01"}
|
||||
```
|
||||
|
||||
## Speech/TTS
|
||||
|
||||
**Endpoint**: `POST /speech/speech_t2a_input`
|
||||
**Models**: `speech-2.8-hd` (best), `speech-2.8-turbo` (fast), `speech-2.6-hd/turbo`, `speech-02-hd/turbo`
|
||||
**Rate**: 60 RPM | **Cost**: $30-50/1M chars
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "speech-2.8-hd",
|
||||
"text": "Your text here",
|
||||
"voice": "English_Warm_Bestie",
|
||||
"emotion": "happy",
|
||||
"rate": 1.0,
|
||||
"volume": 1.0,
|
||||
"pitch": 1.0,
|
||||
"output_format": "mp3"
|
||||
}
|
||||
```
|
||||
|
||||
**Voices**: 300+ system voices, 40+ languages
|
||||
**Emotions**: happy, sad, angry, fearful, disgusted, surprised, neutral
|
||||
**Formats**: mp3, wav, pcm, flac
|
||||
**Text limit**: 10,000 chars
|
||||
|
||||
### Voice Cloning
|
||||
```json
|
||||
POST /voice_clone
|
||||
{"audio_url": "https://sample.wav", "clone_name": "my_voice"}
|
||||
```
|
||||
Requires 10+ seconds of reference audio. Rate: 60 RPM.
|
||||
|
||||
## Music Generation
|
||||
|
||||
**Endpoint**: `POST /music_generation`
|
||||
**Models**: `music-2.5` (latest, vocals+accompaniment, 4min songs)
|
||||
**Rate**: 120 RPM | **Cost**: $0.03-0.075/generation
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "music-2.5",
|
||||
"lyrics": "Verse 1\nLine one\n\n[Chorus]\nChorus line",
|
||||
"prompt": "Upbeat pop with electronic elements",
|
||||
"output_format": "url",
|
||||
"audio_setting": {"sample_rate": 44100, "bitrate": 128000, "format": "mp3"}
|
||||
}
|
||||
```
|
||||
|
||||
**Lyrics**: 1-3500 chars, supports structure tags ([Verse], [Chorus], etc.)
|
||||
**Prompt**: 0-2000 chars, style/mood description
|
||||
**Sample rates**: 16000, 24000, 32000, 44100 Hz
|
||||
**Bitrates**: 32000, 64000, 128000, 256000 bps
|
||||
|
||||
## Error Codes
|
||||
|
||||
| Code | Meaning |
|
||||
|------|---------|
|
||||
| 0 | Success |
|
||||
| 1002 | Rate limit exceeded |
|
||||
| 1008 | Insufficient balance |
|
||||
| 2013 | Invalid parameters |
|
||||
|
||||
## CLI Examples
|
||||
|
||||
```bash
|
||||
# Image
|
||||
python minimax_cli.py --task generate --prompt "A cyberpunk city" --model image-01 --aspect-ratio 16:9
|
||||
|
||||
# Video
|
||||
python minimax_cli.py --task generate-video --prompt "A dancer" --model MiniMax-Hailuo-2.3 --duration 6
|
||||
|
||||
# Speech
|
||||
python minimax_cli.py --task generate-speech --text "Hello world" --model speech-2.8-hd --voice English_Warm_Bestie --emotion happy
|
||||
|
||||
# Music
|
||||
python minimax_cli.py --task generate-music --lyrics "La la la\nOh yeah" --prompt "upbeat pop" --model music-2.5
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- [API Overview](https://platform.minimax.io/docs/api-reference/api-overview)
|
||||
- [Video Guide](https://platform.minimax.io/docs/guides/video-generation)
|
||||
- [Speech API](https://platform.minimax.io/docs/api-reference/speech-t2a-intro)
|
||||
- [Music API](https://platform.minimax.io/docs/api-reference/music-generation)
|
||||
311
.opencode/skills/ai-multimodal/references/music-generation.md
Normal file
311
.opencode/skills/ai-multimodal/references/music-generation.md
Normal file
@@ -0,0 +1,311 @@
|
||||
# Music Generation Reference
|
||||
|
||||
Real-time music generation using Lyria RealTime via WebSocket API.
|
||||
|
||||
## Core Capabilities
|
||||
|
||||
- **Real-time streaming**: Bidirectional WebSocket for continuous generation
|
||||
- **Dynamic control**: Modify music in real-time during generation
|
||||
- **Style steering**: Genre, mood, instrumentation guidance
|
||||
- **Audio output**: 48kHz stereo 16-bit PCM
|
||||
|
||||
## Model
|
||||
|
||||
**Lyria RealTime** (Experimental)
|
||||
- WebSocket-based streaming
|
||||
- Real-time parameter adjustment
|
||||
- Instrumental only (no vocals)
|
||||
- Watermarked output
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Python
|
||||
|
||||
```python
|
||||
from google import genai
|
||||
import asyncio
|
||||
|
||||
client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
|
||||
|
||||
async def generate_music():
|
||||
async with client.aio.live.music.connect() as session:
|
||||
# Set style prompts with weights (0.0-1.0)
|
||||
await session.set_weighted_prompts([
|
||||
{"prompt": "Upbeat corporate background music", "weight": 0.8},
|
||||
{"prompt": "Modern electronic elements", "weight": 0.5}
|
||||
])
|
||||
|
||||
# Configure generation parameters
|
||||
await session.set_music_generation_config(
|
||||
guidance=4.0, # Prompt adherence (0.0-6.0)
|
||||
bpm=120, # Tempo (60-200)
|
||||
density=0.6, # Note density (0.0-1.0)
|
||||
brightness=0.5 # Tonal quality (0.0-1.0)
|
||||
)
|
||||
|
||||
# Start playback and collect audio
|
||||
await session.play()
|
||||
|
||||
audio_chunks = []
|
||||
async for chunk in session:
|
||||
audio_chunks.append(chunk.audio_data)
|
||||
|
||||
return b''.join(audio_chunks)
|
||||
```
|
||||
|
||||
### JavaScript
|
||||
|
||||
```javascript
|
||||
const client = new GenaiClient({ apiKey: process.env.GEMINI_API_KEY });
|
||||
|
||||
async function generateMusic() {
|
||||
const session = await client.live.music.connect();
|
||||
|
||||
await session.setWeightedPrompts([
|
||||
{ prompt: "Calm ambient background", weight: 0.9 },
|
||||
{ prompt: "Nature sounds influence", weight: 0.3 }
|
||||
]);
|
||||
|
||||
await session.setMusicGenerationConfig({
|
||||
guidance: 3.5,
|
||||
bpm: 80,
|
||||
density: 0.4,
|
||||
brightness: 0.6
|
||||
});
|
||||
|
||||
session.onAudio((audioChunk) => {
|
||||
// Process 48kHz stereo PCM audio
|
||||
audioBuffer.push(audioChunk);
|
||||
});
|
||||
|
||||
await session.play();
|
||||
}
|
||||
```
|
||||
|
||||
## Configuration Parameters
|
||||
|
||||
| Parameter | Range | Default | Description |
|
||||
|-----------|-------|---------|-------------|
|
||||
| `guidance` | 0.0-6.0 | 4.0 | Prompt adherence (higher = stricter) |
|
||||
| `bpm` | 60-200 | 120 | Tempo in beats per minute |
|
||||
| `density` | 0.0-1.0 | 0.5 | Note/sound density |
|
||||
| `brightness` | 0.0-1.0 | 0.5 | Tonal quality (higher = brighter) |
|
||||
| `scale` | 12 keys | C Major | Musical key |
|
||||
| `mute_bass` | bool | false | Remove bass elements |
|
||||
| `mute_drums` | bool | false | Remove drum elements |
|
||||
| `mode` | enum | QUALITY | QUALITY, DIVERSITY, VOCALIZATION |
|
||||
| `temperature` | 0.0-2.0 | 1.0 | Sampling randomness |
|
||||
| `top_k` | int | 40 | Sampling top-k |
|
||||
| `seed` | int | random | Reproducibility seed |
|
||||
|
||||
## Weighted Prompts
|
||||
|
||||
Control generation direction with weighted prompts:
|
||||
|
||||
```python
|
||||
await session.set_weighted_prompts([
|
||||
{"prompt": "Main style description", "weight": 1.0}, # Primary
|
||||
{"prompt": "Secondary influence", "weight": 0.5}, # Supporting
|
||||
{"prompt": "Subtle element", "weight": 0.2} # Accent
|
||||
])
|
||||
```
|
||||
|
||||
**Weight guidelines**:
|
||||
- 0.8-1.0: Dominant influence
|
||||
- 0.5-0.7: Secondary contribution
|
||||
- 0.2-0.4: Subtle accent
|
||||
- 0.0-0.1: Minimal effect
|
||||
|
||||
## Style Prompts by Use Case
|
||||
|
||||
### Corporate/Marketing
|
||||
|
||||
```python
|
||||
prompts = [
|
||||
{"prompt": "Professional corporate background music, modern", "weight": 0.9},
|
||||
{"prompt": "Uplifting, optimistic mood", "weight": 0.6},
|
||||
{"prompt": "Clean production, minimal complexity", "weight": 0.5}
|
||||
]
|
||||
config = {"bpm": 100, "brightness": 0.6, "density": 0.5}
|
||||
```
|
||||
|
||||
### Social Media/Short-form
|
||||
|
||||
```python
|
||||
prompts = [
|
||||
{"prompt": "Trending pop electronic beat", "weight": 0.9},
|
||||
{"prompt": "Energetic, catchy rhythm", "weight": 0.7},
|
||||
{"prompt": "Bass-heavy, punchy", "weight": 0.5}
|
||||
]
|
||||
config = {"bpm": 128, "brightness": 0.7, "density": 0.7}
|
||||
```
|
||||
|
||||
### Emotional/Cinematic
|
||||
|
||||
```python
|
||||
prompts = [
|
||||
{"prompt": "Cinematic orchestral underscore", "weight": 0.9},
|
||||
{"prompt": "Emotional, inspiring", "weight": 0.7},
|
||||
{"prompt": "Building tension and release", "weight": 0.5}
|
||||
]
|
||||
config = {"bpm": 70, "brightness": 0.4, "density": 0.4}
|
||||
```
|
||||
|
||||
### Ambient/Background
|
||||
|
||||
```python
|
||||
prompts = [
|
||||
{"prompt": "Calm ambient soundscape", "weight": 0.9},
|
||||
{"prompt": "Minimal, atmospheric", "weight": 0.6},
|
||||
{"prompt": "Lo-fi textures", "weight": 0.4}
|
||||
]
|
||||
config = {"bpm": 80, "brightness": 0.4, "density": 0.3}
|
||||
```
|
||||
|
||||
## Real-time Transitions
|
||||
|
||||
Smoothly transition between styles during generation:
|
||||
|
||||
```python
|
||||
async def dynamic_music_generation():
|
||||
async with client.aio.live.music.connect() as session:
|
||||
# Start with intro style
|
||||
await session.set_weighted_prompts([
|
||||
{"prompt": "Soft ambient intro", "weight": 0.9}
|
||||
])
|
||||
await session.play()
|
||||
|
||||
# Collect intro (4 seconds)
|
||||
intro_chunks = []
|
||||
for _ in range(192): # ~4 seconds at 48kHz
|
||||
chunk = await session.__anext__()
|
||||
intro_chunks.append(chunk.audio_data)
|
||||
|
||||
# Transition to main section
|
||||
await session.set_weighted_prompts([
|
||||
{"prompt": "Building energy", "weight": 0.7},
|
||||
{"prompt": "Full beat drop", "weight": 0.5}
|
||||
])
|
||||
|
||||
# Continue with new style...
|
||||
```
|
||||
|
||||
## Output Specifications
|
||||
|
||||
- **Format**: Raw 16-bit PCM
|
||||
- **Sample Rate**: 48,000 Hz
|
||||
- **Channels**: 2 (stereo)
|
||||
- **Bit Depth**: 16 bits
|
||||
- **Watermarking**: Always enabled (SynthID)
|
||||
|
||||
### Save to WAV
|
||||
|
||||
```python
|
||||
import wave
|
||||
|
||||
def save_pcm_to_wav(pcm_data, filename):
|
||||
with wave.open(filename, 'wb') as wav_file:
|
||||
wav_file.setnchannels(2) # Stereo
|
||||
wav_file.setsampwidth(2) # 16-bit
|
||||
wav_file.setframerate(48000) # 48kHz
|
||||
wav_file.writeframes(pcm_data)
|
||||
```
|
||||
|
||||
### Convert to MP3
|
||||
|
||||
```bash
|
||||
# Using FFmpeg
|
||||
ffmpeg -f s16le -ar 48000 -ac 2 -i input.pcm output.mp3
|
||||
```
|
||||
|
||||
## Integration with Video Production
|
||||
|
||||
### Generate Background Music for Video
|
||||
|
||||
```python
|
||||
async def generate_video_background(duration_seconds, mood):
|
||||
"""Generate background music matching video length"""
|
||||
|
||||
# Configure for video background
|
||||
prompts = [
|
||||
{"prompt": f"{mood} background music for video", "weight": 0.9},
|
||||
{"prompt": "Non-distracting, supportive underscore", "weight": 0.6}
|
||||
]
|
||||
|
||||
async with client.aio.live.music.connect() as session:
|
||||
await session.set_weighted_prompts(prompts)
|
||||
await session.set_music_generation_config(
|
||||
guidance=4.0,
|
||||
density=0.4, # Keep sparse for background
|
||||
brightness=0.5
|
||||
)
|
||||
await session.play()
|
||||
|
||||
# Calculate chunks needed (48kHz stereo = 192000 bytes/second)
|
||||
total_chunks = duration_seconds * 48000 // 512 # Chunk size estimate
|
||||
|
||||
audio_data = []
|
||||
async for i, chunk in enumerate(session):
|
||||
audio_data.append(chunk.audio_data)
|
||||
if i >= total_chunks:
|
||||
break
|
||||
|
||||
return b''.join(audio_data)
|
||||
```
|
||||
|
||||
### Sync with Storyboard Timing
|
||||
|
||||
```python
|
||||
async def generate_scene_music(scenes):
|
||||
"""Generate music with transitions matching scene changes"""
|
||||
|
||||
all_audio = []
|
||||
|
||||
async with client.aio.live.music.connect() as session:
|
||||
for scene in scenes:
|
||||
# Update style for each scene
|
||||
await session.set_weighted_prompts([
|
||||
{"prompt": scene['mood'], "weight": 0.9},
|
||||
{"prompt": scene['style'], "weight": 0.5}
|
||||
])
|
||||
|
||||
if scene['index'] == 0:
|
||||
await session.play()
|
||||
|
||||
# Collect audio for scene duration
|
||||
chunks = int(scene['duration'] * 48000 / 512)
|
||||
for _ in range(chunks):
|
||||
chunk = await session.__anext__()
|
||||
all_audio.append(chunk.audio_data)
|
||||
|
||||
return b''.join(all_audio)
|
||||
```
|
||||
|
||||
## Limitations
|
||||
|
||||
- **Instrumental only**: No vocal/singing generation
|
||||
- **WebSocket required**: Real-time streaming connection
|
||||
- **Safety filtering**: Prompts undergo safety review
|
||||
- **Watermarking**: All output contains SynthID watermark
|
||||
- **Experimental**: API may change
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Buffer audio**: Implement robust buffering for smooth playback
|
||||
2. **Gradual transitions**: Avoid drastic prompt changes mid-stream
|
||||
3. **Sparse for backgrounds**: Lower density for video backgrounds
|
||||
4. **Test prompts**: Iterate on prompt combinations
|
||||
5. **Cross-fade transitions**: Blend audio at style changes
|
||||
6. **Match video mood**: Align music tempo/energy with visuals
|
||||
|
||||
## Resources
|
||||
|
||||
- [Lyria RealTime Docs](https://ai.google.dev/gemini-api/docs/music-generation)
|
||||
- [Audio Processing Guide](./audio-processing.md)
|
||||
- [Video Generation](./video-generation.md)
|
||||
|
||||
---
|
||||
|
||||
**Related**: [Audio Processing](./audio-processing.md) | [Video Generation](./video-generation.md)
|
||||
|
||||
**Back to**: [AI Multimodal Skill](../SKILL.md)
|
||||
515
.opencode/skills/ai-multimodal/references/video-analysis.md
Normal file
515
.opencode/skills/ai-multimodal/references/video-analysis.md
Normal file
@@ -0,0 +1,515 @@
|
||||
# Video Analysis Reference
|
||||
|
||||
Comprehensive guide for video understanding, temporal analysis, and YouTube processing using Gemini API.
|
||||
|
||||
> **Note**: This guide covers video *analysis* (understanding existing videos). For video *generation* (creating new videos), see [Video Generation Reference](./video-generation.md).
|
||||
|
||||
## Core Capabilities
|
||||
|
||||
- **Video Summarization**: Create concise summaries
|
||||
- **Question Answering**: Answer specific questions about content
|
||||
- **Transcription**: Audio transcription with visual descriptions
|
||||
- **Timestamp References**: Query specific moments (MM:SS format)
|
||||
- **Video Clipping**: Process specific segments
|
||||
- **Scene Detection**: Identify scene changes and transitions
|
||||
- **Multiple Videos**: Compare up to 10 videos (2.5+)
|
||||
- **YouTube Support**: Analyze YouTube videos directly
|
||||
- **Custom Frame Rate**: Adjust FPS sampling
|
||||
|
||||
## Supported Formats
|
||||
|
||||
- MP4, MPEG, MOV, AVI, FLV, MPG, WebM, WMV, 3GPP
|
||||
|
||||
## Model Selection
|
||||
|
||||
### Gemini 3 Series (Latest)
|
||||
- **gemini-3-pro-preview**: Latest, agentic workflows, 1M context, dynamic thinking
|
||||
|
||||
### Gemini 2.5 Series (Recommended)
|
||||
- **gemini-2.5-pro**: Best quality, 1M-2M context
|
||||
- **gemini-2.5-flash**: Balanced, 1M-2M context (recommended)
|
||||
|
||||
### Context Windows
|
||||
- **2M token models**: ~2 hours (default) or ~6 hours (low-res)
|
||||
- **1M token models**: ~1 hour (default) or ~3 hours (low-res)
|
||||
|
||||
## Basic Video Analysis
|
||||
|
||||
### Local Video
|
||||
|
||||
```python
|
||||
from google import genai
|
||||
import os
|
||||
|
||||
client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
|
||||
|
||||
# Upload video (File API for >20MB)
|
||||
myfile = client.files.upload(file='video.mp4')
|
||||
|
||||
# Wait for processing
|
||||
import time
|
||||
while myfile.state.name == 'PROCESSING':
|
||||
time.sleep(1)
|
||||
myfile = client.files.get(name=myfile.name)
|
||||
|
||||
if myfile.state.name == 'FAILED':
|
||||
raise ValueError('Video processing failed')
|
||||
|
||||
# Analyze
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Summarize this video in 3 key points', myfile]
|
||||
)
|
||||
print(response.text)
|
||||
```
|
||||
|
||||
### YouTube Video
|
||||
|
||||
```python
|
||||
from google.genai import types
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Summarize the main topics discussed',
|
||||
types.Part.from_uri(
|
||||
uri='https://www.youtube.com/watch?v=VIDEO_ID',
|
||||
mime_type='video/mp4'
|
||||
)
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Inline Video (<20MB)
|
||||
|
||||
```python
|
||||
with open('short-clip.mp4', 'rb') as f:
|
||||
video_bytes = f.read()
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'What happens in this video?',
|
||||
types.Part.from_bytes(data=video_bytes, mime_type='video/mp4')
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Advanced Features
|
||||
|
||||
### Video Clipping
|
||||
|
||||
```python
|
||||
# Analyze specific time range
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Summarize this segment',
|
||||
types.Part.from_video_metadata(
|
||||
file_uri=myfile.uri,
|
||||
start_offset='40s',
|
||||
end_offset='80s'
|
||||
)
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Custom Frame Rate
|
||||
|
||||
```python
|
||||
# Lower FPS for static content (saves tokens)
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Analyze this presentation',
|
||||
types.Part.from_video_metadata(
|
||||
file_uri=myfile.uri,
|
||||
fps=0.5 # Sample every 2 seconds
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Higher FPS for fast-moving content
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Analyze rapid movements in this sports video',
|
||||
types.Part.from_video_metadata(
|
||||
file_uri=myfile.uri,
|
||||
fps=5 # Sample 5 times per second
|
||||
)
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Multiple Videos (2.5+)
|
||||
|
||||
```python
|
||||
video1 = client.files.upload(file='demo1.mp4')
|
||||
video2 = client.files.upload(file='demo2.mp4')
|
||||
|
||||
# Wait for processing
|
||||
for video in [video1, video2]:
|
||||
while video.state.name == 'PROCESSING':
|
||||
time.sleep(1)
|
||||
video = client.files.get(name=video.name)
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-pro',
|
||||
contents=[
|
||||
'Compare these two product demos. Which explains features better?',
|
||||
video1,
|
||||
video2
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Temporal Understanding
|
||||
|
||||
### Timestamp-Based Questions
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'What happens at 01:15 and how does it relate to 02:30?',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Timeline Creation
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Create a timeline with timestamps:
|
||||
- Key events
|
||||
- Scene changes
|
||||
- Important moments
|
||||
Format: MM:SS - Description
|
||||
''',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Scene Detection
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Identify all scene changes with timestamps and describe each scene',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Transcription
|
||||
|
||||
### Basic Transcription
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Transcribe the audio from this video',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### With Visual Descriptions
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Transcribe with visual context:
|
||||
- Audio transcription
|
||||
- Visual descriptions of important moments
|
||||
- Timestamps for salient events
|
||||
''',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Speaker Identification
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Transcribe with speaker labels and timestamps',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### 1. Video Summarization
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Summarize this video:
|
||||
1. Main topic and purpose
|
||||
2. Key points with timestamps
|
||||
3. Conclusion or call-to-action
|
||||
''',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 2. Educational Content
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Create educational materials:
|
||||
1. List key concepts taught
|
||||
2. Create 5 quiz questions with answers
|
||||
3. Provide timestamp for each concept
|
||||
''',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 3. Action Detection
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'List all actions performed in this tutorial with timestamps',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 4. Content Moderation
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Review video content:
|
||||
1. Identify any problematic content
|
||||
2. Note timestamps of concerns
|
||||
3. Provide content rating recommendation
|
||||
''',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 5. Interview Analysis
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Analyze interview:
|
||||
1. Questions asked (timestamps)
|
||||
2. Key responses
|
||||
3. Candidate body language and demeanor
|
||||
4. Overall assessment
|
||||
''',
|
||||
myfile
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 6. Sports Analysis
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Analyze sports video:
|
||||
1. Key plays with timestamps
|
||||
2. Player movements and positioning
|
||||
3. Game strategy observations
|
||||
''',
|
||||
types.Part.from_video_metadata(
|
||||
file_uri=myfile.uri,
|
||||
fps=5 # Higher FPS for fast action
|
||||
)
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## YouTube Specific Features
|
||||
|
||||
### Public Video Requirements
|
||||
|
||||
- Video must be public (not private or unlisted)
|
||||
- No age-restricted content
|
||||
- Valid video ID required
|
||||
|
||||
### Usage Example
|
||||
|
||||
```python
|
||||
# YouTube URL
|
||||
youtube_uri = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Create chapter markers with timestamps',
|
||||
types.Part.from_uri(uri=youtube_uri, mime_type='video/mp4')
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Rate Limits
|
||||
|
||||
- **Free tier**: 8 hours of YouTube video per day
|
||||
- **Paid tier**: No length-based limits
|
||||
- Public videos only
|
||||
|
||||
## Token Calculation
|
||||
|
||||
Video tokens depend on resolution and FPS:
|
||||
|
||||
**Default resolution** (~300 tokens/second):
|
||||
- 1 minute = 18,000 tokens
|
||||
- 10 minutes = 180,000 tokens
|
||||
- 1 hour = 1,080,000 tokens
|
||||
|
||||
**Low resolution** (~100 tokens/second):
|
||||
- 1 minute = 6,000 tokens
|
||||
- 10 minutes = 60,000 tokens
|
||||
- 1 hour = 360,000 tokens
|
||||
|
||||
**Context windows**:
|
||||
- 2M tokens ≈ 2 hours (default) or 6 hours (low-res)
|
||||
- 1M tokens ≈ 1 hour (default) or 3 hours (low-res)
|
||||
|
||||
## Best Practices
|
||||
|
||||
### File Management
|
||||
|
||||
1. Use File API for videos >20MB (most videos)
|
||||
2. Wait for ACTIVE state before analysis
|
||||
3. Files auto-delete after 48 hours
|
||||
4. Clean up manually:
|
||||
```python
|
||||
client.files.delete(name=myfile.name)
|
||||
```
|
||||
|
||||
### Optimization Strategies
|
||||
|
||||
**Reduce token usage**:
|
||||
- Process specific segments using start/end offsets
|
||||
- Use lower FPS for static content
|
||||
- Use low-resolution mode for long videos
|
||||
- Split very long videos into chunks
|
||||
|
||||
**Improve accuracy**:
|
||||
- Provide context in prompts
|
||||
- Use higher FPS for fast-moving content
|
||||
- Use Pro model for complex analysis
|
||||
- Be specific about what to extract
|
||||
|
||||
### Prompt Engineering
|
||||
|
||||
**Effective prompts**:
|
||||
- "Summarize key points with timestamps in MM:SS format"
|
||||
- "Identify all scene changes and describe each scene"
|
||||
- "Extract action items mentioned with timestamps"
|
||||
- "Compare these two videos on: X, Y, Z criteria"
|
||||
|
||||
**Structured output**:
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
|
||||
class VideoEvent(BaseModel):
|
||||
timestamp: str # MM:SS format
|
||||
description: str
|
||||
category: str
|
||||
|
||||
class VideoAnalysis(BaseModel):
|
||||
summary: str
|
||||
events: List[VideoEvent]
|
||||
duration: str
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Analyze this video', myfile],
|
||||
config=genai.types.GenerateContentConfig(
|
||||
response_mime_type='application/json',
|
||||
response_schema=VideoAnalysis
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### Error Handling
|
||||
|
||||
```python
|
||||
import time
|
||||
|
||||
def upload_and_process_video(file_path, max_wait=300):
|
||||
"""Upload video and wait for processing"""
|
||||
myfile = client.files.upload(file=file_path)
|
||||
|
||||
elapsed = 0
|
||||
while myfile.state.name == 'PROCESSING' and elapsed < max_wait:
|
||||
time.sleep(5)
|
||||
myfile = client.files.get(name=myfile.name)
|
||||
elapsed += 5
|
||||
|
||||
if myfile.state.name == 'FAILED':
|
||||
raise ValueError(f'Video processing failed: {myfile.state.name}')
|
||||
|
||||
if myfile.state.name == 'PROCESSING':
|
||||
raise TimeoutError(f'Processing timeout after {max_wait}s')
|
||||
|
||||
return myfile
|
||||
```
|
||||
|
||||
## Cost Optimization
|
||||
|
||||
**Token costs** (Gemini 2.5 Flash at $1/1M):
|
||||
- 1 minute video (default): 18,000 tokens = $0.018
|
||||
- 10 minute video: 180,000 tokens = $0.18
|
||||
- 1 hour video: 1,080,000 tokens = $1.08
|
||||
|
||||
**Strategies**:
|
||||
- Use video clipping for specific segments
|
||||
- Lower FPS for static content
|
||||
- Use low-resolution mode for long videos
|
||||
- Batch related queries on same video
|
||||
- Use context caching for repeated queries
|
||||
|
||||
## Limitations
|
||||
|
||||
- Maximum 6 hours (low-res) or 2 hours (default)
|
||||
- YouTube videos must be public
|
||||
- No live streaming analysis
|
||||
- Files expire after 48 hours
|
||||
- Processing time varies by video length
|
||||
- No real-time processing
|
||||
- Limited to 10 videos per request (2.5+)
|
||||
|
||||
---
|
||||
|
||||
## Related References
|
||||
|
||||
**Current**: Video Analysis
|
||||
|
||||
**Related Capabilities**:
|
||||
- [Video Generation](./video-generation.md) - Creating videos from text/images
|
||||
- [Audio Processing](./audio-processing.md) - Extract and analyze audio tracks
|
||||
- [Image Understanding](./vision-understanding.md) - Analyze individual frames
|
||||
|
||||
**Back to**: [AI Multimodal Skill](../SKILL.md)
|
||||
457
.opencode/skills/ai-multimodal/references/video-generation.md
Normal file
457
.opencode/skills/ai-multimodal/references/video-generation.md
Normal file
@@ -0,0 +1,457 @@
|
||||
# Video Generation Reference
|
||||
|
||||
Comprehensive guide for video creation using Veo models via Gemini API.
|
||||
|
||||
## Core Capabilities
|
||||
|
||||
- **Text-to-Video**: Generate 8-second videos from text prompts
|
||||
- **Image-to-Video**: Animate images with text direction
|
||||
- **Video Extension**: Continue previously generated videos
|
||||
- **Frame Control**: Precise camera movements and effects
|
||||
- **Native Audio**: Synchronized audio generation
|
||||
- **Multiple Resolutions**: 720p and 1080p output
|
||||
- **Aspect Ratios**: 16:9, 9:16, 1:1
|
||||
|
||||
## Models
|
||||
|
||||
### Veo 3.1 Preview (Latest)
|
||||
|
||||
**veo-3.1-generate-preview** - Latest with advanced controls
|
||||
- Frame-specific generation
|
||||
- Up to 3 reference images for image-to-video
|
||||
- Video extension capability
|
||||
- Native audio generation
|
||||
- Resolution: 720p, 1080p
|
||||
- Duration: 8 seconds at 24fps
|
||||
- Status: Preview (API may change)
|
||||
- Updated: September 2025
|
||||
|
||||
**veo-3.1-fast-generate-preview** - Speed-optimized
|
||||
- Optimized for business use cases
|
||||
- Programmatic ad creation
|
||||
- Social media content
|
||||
- Same features as standard but faster
|
||||
- Status: Preview
|
||||
- Updated: September 2025
|
||||
|
||||
### Veo 3.0 Stable
|
||||
|
||||
**veo-3.0-generate-001** - Production-ready
|
||||
- Native audio generation
|
||||
- Text-to-video and image-to-video
|
||||
- 720p and 1080p (16:9 only)
|
||||
- 8 seconds at 24fps
|
||||
- Status: Stable
|
||||
- Updated: July 2025
|
||||
|
||||
**veo-3.0-fast-generate-001** - Stable fast variant
|
||||
- Speed-optimized stable version
|
||||
- Same reliability as 3.0
|
||||
- Status: Stable
|
||||
- Updated: July 2025
|
||||
|
||||
## Model Comparison
|
||||
|
||||
| Model | Speed | Features | Audio | Status | Best For |
|
||||
|-------|-------|----------|-------|--------|----------|
|
||||
| veo-3.1-preview | Medium | All | ✓ | Preview | Latest features |
|
||||
| veo-3.1-fast | Fast | All | ✓ | Preview | Business/speed |
|
||||
| veo-3.0-001 | Medium | Standard | ✓ | Stable | Production |
|
||||
| veo-3.0-fast | Fast | Standard | ✓ | Stable | Production/speed |
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Text-to-Video
|
||||
|
||||
```python
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
import os
|
||||
|
||||
client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
|
||||
|
||||
# Basic generation
|
||||
response = client.models.generate_video(
|
||||
model='veo-3.1-generate-preview',
|
||||
prompt='A serene beach at sunset with gentle waves rolling onto the shore',
|
||||
config=types.VideoGenerationConfig(
|
||||
resolution='1080p',
|
||||
aspect_ratio='16:9'
|
||||
)
|
||||
)
|
||||
|
||||
# Save video
|
||||
with open('output.mp4', 'wb') as f:
|
||||
f.write(response.video.data)
|
||||
```
|
||||
|
||||
### Image-to-Video
|
||||
|
||||
```python
|
||||
import PIL.Image
|
||||
|
||||
# Load reference image
|
||||
ref_image = PIL.Image.open('beach.jpg')
|
||||
|
||||
# Animate the image
|
||||
response = client.models.generate_video(
|
||||
model='veo-3.1-generate-preview',
|
||||
prompt='Camera slowly pans across the scene from left to right',
|
||||
reference_images=[ref_image],
|
||||
config=types.VideoGenerationConfig(
|
||||
resolution='1080p'
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### Multiple Reference Images
|
||||
|
||||
```python
|
||||
# Use up to 3 reference images for complex scenes
|
||||
img1 = PIL.Image.open('foreground.jpg')
|
||||
img2 = PIL.Image.open('background.jpg')
|
||||
img3 = PIL.Image.open('subject.jpg')
|
||||
|
||||
response = client.models.generate_video(
|
||||
model='veo-3.1-generate-preview',
|
||||
prompt='Combine these elements into a cohesive animated scene',
|
||||
reference_images=[img1, img2, img3],
|
||||
config=types.VideoGenerationConfig(
|
||||
resolution='1080p',
|
||||
aspect_ratio='16:9'
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
## Advanced Features
|
||||
|
||||
### Video Extension
|
||||
|
||||
```python
|
||||
# Continue from previously generated video
|
||||
previous_video = open('part1.mp4', 'rb').read()
|
||||
|
||||
response = client.models.extend_video(
|
||||
model='veo-3.1-generate-preview',
|
||||
video=previous_video,
|
||||
prompt='The scene transitions to nighttime with stars appearing'
|
||||
)
|
||||
```
|
||||
|
||||
### Frame Control
|
||||
|
||||
```python
|
||||
# Precise camera movements
|
||||
response = client.models.generate_video(
|
||||
model='veo-3.1-generate-preview',
|
||||
prompt='A mountain landscape',
|
||||
config=types.VideoGenerationConfig(
|
||||
resolution='1080p',
|
||||
camera_motion='zoom_in', # Options: zoom_in, zoom_out, pan_left, pan_right, tilt_up, tilt_down, static
|
||||
motion_speed='slow' # Options: slow, medium, fast
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
## Prompt Engineering
|
||||
|
||||
### Effective Video Prompts
|
||||
|
||||
**Structure**:
|
||||
1. **Subject**: What's in the scene
|
||||
2. **Action**: What's happening
|
||||
3. **Camera**: How it's filmed
|
||||
4. **Style**: Visual treatment
|
||||
5. **Timing**: Pacing details
|
||||
|
||||
**Example**:
|
||||
```
|
||||
"A hummingbird [subject] hovers near a red flower, then flies away [action].
|
||||
Slow-motion close-up shot [camera] with vibrant colors and soft focus background [style].
|
||||
Gentle, peaceful pacing [timing]."
|
||||
```
|
||||
|
||||
### Action Verbs
|
||||
|
||||
**Movement**:
|
||||
- "walks", "runs", "flies", "swims", "dances"
|
||||
- "rotates", "spins", "rolls", "bounces"
|
||||
- "emerges", "disappears", "transforms"
|
||||
|
||||
**Camera**:
|
||||
- "zoom in on", "pull back from", "follow"
|
||||
- "orbit around", "track alongside"
|
||||
- "tilt up to reveal", "pan across"
|
||||
|
||||
**Transitions**:
|
||||
- "gradually changes from... to..."
|
||||
- "morphs into", "dissolves into"
|
||||
- "cuts to", "fades to"
|
||||
|
||||
### Timing Control
|
||||
|
||||
```python
|
||||
# Explicit timing in prompt
|
||||
prompt = '''
|
||||
0-2s: Close-up of a seed in soil
|
||||
2-4s: Time-lapse of sprout emerging
|
||||
4-6s: Growing into a small plant
|
||||
6-8s: Zoom out to show garden context
|
||||
'''
|
||||
```
|
||||
|
||||
## Configuration Options
|
||||
|
||||
### Resolution
|
||||
|
||||
```python
|
||||
config = types.VideoGenerationConfig(
|
||||
resolution='1080p' # Options: 720p, 1080p
|
||||
)
|
||||
```
|
||||
|
||||
**Considerations**:
|
||||
- 1080p: Higher quality, longer generation time, larger file
|
||||
- 720p: Faster generation, smaller file, good for drafts
|
||||
|
||||
### Aspect Ratios
|
||||
|
||||
```python
|
||||
config = types.VideoGenerationConfig(
|
||||
aspect_ratio='16:9' # Options: 16:9, 9:16, 1:1
|
||||
)
|
||||
```
|
||||
|
||||
**Use Cases**:
|
||||
- 16:9: Landscape, YouTube, traditional video
|
||||
- 9:16: Mobile, TikTok, Instagram Stories
|
||||
- 1:1: Square, Instagram feed, versatile
|
||||
|
||||
### Audio Control
|
||||
|
||||
```python
|
||||
config = types.VideoGenerationConfig(
|
||||
include_audio=True # Default: True
|
||||
)
|
||||
```
|
||||
|
||||
Native audio is generated automatically and synchronized with video content.
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Prompt Quality
|
||||
|
||||
**Be specific**:
|
||||
- ❌ "A person walking"
|
||||
- ✅ "A young woman in a red coat walking through a park in autumn"
|
||||
|
||||
**Include motion**:
|
||||
- ❌ "A city street"
|
||||
- ✅ "A busy city street with cars passing and people crossing"
|
||||
|
||||
**Specify camera**:
|
||||
- ❌ "A mountain"
|
||||
- ✅ "Aerial drone shot slowly ascending over a snow-capped mountain"
|
||||
|
||||
### 2. Reference Images
|
||||
|
||||
**Quality**:
|
||||
- Use high-resolution images (1080p+)
|
||||
- Clear, well-lit subjects
|
||||
- Minimal motion blur
|
||||
|
||||
**Composition**:
|
||||
- Match desired final aspect ratio
|
||||
- Leave room for motion/movement
|
||||
- Consider camera angle in prompt
|
||||
|
||||
### 3. Performance Optimization
|
||||
|
||||
**Generation Time**:
|
||||
- 720p: ~30-60 seconds
|
||||
- 1080p: ~60-120 seconds
|
||||
- Fast models: 30-50% faster
|
||||
|
||||
**Strategies**:
|
||||
- Use 720p for iteration/drafts
|
||||
- Use fast models for rapid feedback
|
||||
- Batch multiple requests
|
||||
- Use async processing for UI responsiveness
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### 1. Product Demos
|
||||
|
||||
```python
|
||||
response = client.models.generate_video(
|
||||
model='veo-3.0-fast-generate-001',
|
||||
prompt='''
|
||||
Professional product video:
|
||||
- Sleek smartphone rotating on a pedestal
|
||||
- Clean white background with soft shadows
|
||||
- Slow 360-degree rotation
|
||||
- Spotlight highlighting premium design
|
||||
- Modern, minimalist aesthetic
|
||||
''',
|
||||
config=types.VideoGenerationConfig(
|
||||
resolution='1080p',
|
||||
aspect_ratio='1:1'
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### 2. Social Media Content
|
||||
|
||||
```python
|
||||
response = client.models.generate_video(
|
||||
model='veo-3.1-fast-generate-preview',
|
||||
prompt='''
|
||||
Trendy social media clip:
|
||||
- Text overlay "NEW ARRIVAL" appears
|
||||
- Fashion product showcase
|
||||
- Quick cuts and dynamic camera
|
||||
- Vibrant colors, high energy
|
||||
- Upbeat pacing
|
||||
''',
|
||||
config=types.VideoGenerationConfig(
|
||||
resolution='1080p',
|
||||
aspect_ratio='9:16' # Mobile
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### 3. Explainer Animations
|
||||
|
||||
```python
|
||||
response = client.models.generate_video(
|
||||
model='veo-3.1-generate-preview',
|
||||
prompt='''
|
||||
Educational animation:
|
||||
- Simple diagram illustrating data flow
|
||||
- Arrows and icons animating in sequence
|
||||
- Clean, clear visual hierarchy
|
||||
- Smooth transitions between steps
|
||||
- Professional corporate style
|
||||
''',
|
||||
config=types.VideoGenerationConfig(
|
||||
resolution='720p',
|
||||
aspect_ratio='16:9'
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
## Safety & Content Policy
|
||||
|
||||
### Safety Settings
|
||||
|
||||
```python
|
||||
config = types.VideoGenerationConfig(
|
||||
safety_settings=[
|
||||
types.SafetySetting(
|
||||
category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
||||
threshold=types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
|
||||
)
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Prohibited Content
|
||||
|
||||
- Violence, gore, harm
|
||||
- Sexually explicit content
|
||||
- Hate speech, harassment
|
||||
- Copyrighted characters/brands
|
||||
- Real people (without consent)
|
||||
- Misleading/deceptive content
|
||||
|
||||
## Limitations
|
||||
|
||||
- **Duration**: Fixed 8 seconds (as of Sept 2025)
|
||||
- **Frame Rate**: 24fps only
|
||||
- **File Size**: ~5-20MB per video
|
||||
- **Generation Time**: 30s-2min depending on resolution
|
||||
- **Reference Images**: Max 3 images
|
||||
- **Preview Status**: API may change (3.1 models)
|
||||
- **Audio**: Cannot upload custom audio (native only)
|
||||
- **No real-time**: Pre-generation required
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Long Generation Times
|
||||
|
||||
```python
|
||||
import time
|
||||
|
||||
# Track generation progress
|
||||
start = time.time()
|
||||
response = client.models.generate_video(...)
|
||||
duration = time.time() - start
|
||||
print(f"Generated in {duration:.1f}s")
|
||||
```
|
||||
|
||||
**Expected times**:
|
||||
- Fast models + 720p: 30-45s
|
||||
- Standard models + 720p: 45-90s
|
||||
- Fast models + 1080p: 45-60s
|
||||
- Standard models + 1080p: 60-120s
|
||||
|
||||
### Safety Filter Blocking
|
||||
|
||||
```python
|
||||
try:
|
||||
response = client.models.generate_video(...)
|
||||
except Exception as e:
|
||||
if 'safety' in str(e).lower():
|
||||
print("Video blocked by safety filters")
|
||||
# Modify prompt and retry
|
||||
```
|
||||
|
||||
### Quota Exceeded
|
||||
|
||||
```python
|
||||
# Implement exponential backoff
|
||||
import time
|
||||
|
||||
def generate_with_retry(model, prompt, max_retries=3):
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
return client.models.generate_video(model=model, prompt=prompt)
|
||||
except Exception as e:
|
||||
if '429' in str(e): # Rate limit
|
||||
wait = 2 ** attempt
|
||||
print(f"Rate limited, waiting {wait}s...")
|
||||
time.sleep(wait)
|
||||
else:
|
||||
raise
|
||||
raise Exception("Max retries exceeded")
|
||||
```
|
||||
|
||||
## Cost Estimation
|
||||
|
||||
**Pricing**: TBD (preview models)
|
||||
|
||||
**Estimated based on compute**:
|
||||
- Fast + 720p: ~$0.05-$0.10 per video
|
||||
- Standard + 1080p: ~$0.15-$0.25 per video
|
||||
|
||||
**Monitor**: https://ai.google.dev/pricing
|
||||
|
||||
## Resources
|
||||
|
||||
- [Veo API Docs](https://ai.google.dev/gemini-api/docs/video)
|
||||
- [Video Generation Guide](https://ai.google.dev/gemini-api/docs/video#model-versions)
|
||||
- [Content Policy](https://ai.google.dev/gemini-api/docs/safety)
|
||||
- [Get API Key](https://aistudio.google.com/apikey)
|
||||
|
||||
---
|
||||
|
||||
## Related References
|
||||
|
||||
**Current**: Video Generation
|
||||
|
||||
**Related Capabilities**:
|
||||
- [Video Analysis](./video-analysis.md) - Understanding existing videos
|
||||
- [Image Generation](./image-generation.md) - Creating static images
|
||||
- [Image Understanding](./vision-understanding.md) - Analyzing reference images
|
||||
|
||||
**Back to**: [AI Multimodal Skill](../SKILL.md)
|
||||
@@ -0,0 +1,492 @@
|
||||
# Vision Understanding Reference
|
||||
|
||||
Comprehensive guide for image analysis, object detection, and visual understanding using Gemini API.
|
||||
|
||||
## Core Capabilities
|
||||
|
||||
- **Captioning**: Generate descriptive text for images
|
||||
- **Classification**: Categorize and identify content
|
||||
- **Visual Q&A**: Answer questions about images
|
||||
- **Object Detection**: Locate objects with bounding boxes (2.0+)
|
||||
- **Segmentation**: Create pixel-level masks (2.5+)
|
||||
- **Multi-image**: Compare up to 3,600 images
|
||||
- **OCR**: Extract text from images
|
||||
- **Document Understanding**: Process PDFs with vision
|
||||
|
||||
## Supported Formats
|
||||
|
||||
- **Images**: PNG, JPEG, WEBP, HEIC, HEIF
|
||||
- **Documents**: PDF (up to 1,000 pages)
|
||||
- **Size Limits**:
|
||||
- Inline: 20MB max total request
|
||||
- File API: 2GB per file
|
||||
- Max images: 3,600 per request
|
||||
|
||||
## Model Selection
|
||||
|
||||
### Gemini 2.5 Series
|
||||
- **gemini-2.5-pro**: Best quality, segmentation + detection
|
||||
- **gemini-2.5-flash**: Fast, efficient, all features
|
||||
- **gemini-2.5-flash-lite**: Lightweight, all features
|
||||
|
||||
### Feature Requirements
|
||||
- **Segmentation**: Requires 2.5+ models
|
||||
- **Object Detection**: Requires 2.0+ models
|
||||
- **Multi-image**: All models (up to 3,600 images)
|
||||
|
||||
## Basic Image Analysis
|
||||
|
||||
### Image Captioning
|
||||
|
||||
```python
|
||||
from google import genai
|
||||
import os
|
||||
|
||||
client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
|
||||
|
||||
# Local file
|
||||
with open('image.jpg', 'rb') as f:
|
||||
img_bytes = f.read()
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Describe this image in detail',
|
||||
genai.types.Part.from_bytes(data=img_bytes, mime_type='image/jpeg')
|
||||
]
|
||||
)
|
||||
print(response.text)
|
||||
```
|
||||
|
||||
### Image Classification
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Classify this image. Provide category and confidence level.',
|
||||
img_part
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Visual Question Answering
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'How many people are in this image and what are they doing?',
|
||||
img_part
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Advanced Features
|
||||
|
||||
### Object Detection (2.5+)
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Detect all objects in this image and provide bounding boxes',
|
||||
img_part
|
||||
]
|
||||
)
|
||||
|
||||
# Returns bounding box coordinates: [ymin, xmin, ymax, xmax]
|
||||
# Normalized to [0, 1000] range
|
||||
```
|
||||
|
||||
### Segmentation (2.5+)
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Create a segmentation mask for all people in this image',
|
||||
img_part
|
||||
]
|
||||
)
|
||||
|
||||
# Returns pixel-level masks for requested objects
|
||||
```
|
||||
|
||||
### Multi-Image Comparison
|
||||
|
||||
```python
|
||||
import PIL.Image
|
||||
|
||||
img1 = PIL.Image.open('photo1.jpg')
|
||||
img2 = PIL.Image.open('photo2.jpg')
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Compare these two images. What are the differences?',
|
||||
img1,
|
||||
img2
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### OCR and Text Extraction
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Extract all visible text from this image',
|
||||
img_part
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Input Methods
|
||||
|
||||
### Inline Data (<20MB)
|
||||
|
||||
```python
|
||||
from google.genai import types
|
||||
|
||||
# From file
|
||||
with open('image.jpg', 'rb') as f:
|
||||
img_bytes = f.read()
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Analyze this image',
|
||||
types.Part.from_bytes(data=img_bytes, mime_type='image/jpeg')
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### PIL Image
|
||||
|
||||
```python
|
||||
import PIL.Image
|
||||
|
||||
img = PIL.Image.open('photo.jpg')
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['What is in this image?', img]
|
||||
)
|
||||
```
|
||||
|
||||
### File API (>20MB or Reuse)
|
||||
|
||||
```python
|
||||
# Upload once
|
||||
myfile = client.files.upload(file='large-image.jpg')
|
||||
|
||||
# Use multiple times
|
||||
response1 = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Describe this image', myfile]
|
||||
)
|
||||
|
||||
response2 = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['What colors dominate this image?', myfile]
|
||||
)
|
||||
```
|
||||
|
||||
### URL (Public Images)
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Analyze this image',
|
||||
types.Part.from_uri(
|
||||
uri='https://example.com/image.jpg',
|
||||
mime_type='image/jpeg'
|
||||
)
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Token Calculation
|
||||
|
||||
Images consume tokens based on size:
|
||||
|
||||
**Small images** (≤384px both dimensions): 258 tokens
|
||||
|
||||
**Large images**: Tiled into 768×768 chunks, 258 tokens each
|
||||
|
||||
**Formula**:
|
||||
```
|
||||
crop_unit = floor(min(width, height) / 1.5)
|
||||
tiles = (width / crop_unit) × (height / crop_unit)
|
||||
total_tokens = tiles × 258
|
||||
```
|
||||
|
||||
**Examples**:
|
||||
- 256×256: 258 tokens (small)
|
||||
- 512×512: 258 tokens (small)
|
||||
- 960×540: 6 tiles = 1,548 tokens
|
||||
- 1920×1080: 6 tiles = 1,548 tokens
|
||||
- 3840×2160 (4K): 24 tiles = 6,192 tokens
|
||||
|
||||
## Structured Output
|
||||
|
||||
### JSON Schema Output
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel
|
||||
from typing import List
|
||||
|
||||
class ObjectDetection(BaseModel):
|
||||
object_name: str
|
||||
confidence: float
|
||||
bounding_box: List[int] # [ymin, xmin, ymax, xmax]
|
||||
|
||||
class ImageAnalysis(BaseModel):
|
||||
description: str
|
||||
objects: List[ObjectDetection]
|
||||
scene_type: str
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Analyze this image', img_part],
|
||||
config=genai.types.GenerateContentConfig(
|
||||
response_mime_type='application/json',
|
||||
response_schema=ImageAnalysis
|
||||
)
|
||||
)
|
||||
|
||||
result = ImageAnalysis.model_validate_json(response.text)
|
||||
```
|
||||
|
||||
## Multi-Image Analysis
|
||||
|
||||
### Batch Processing
|
||||
|
||||
```python
|
||||
images = [
|
||||
PIL.Image.open(f'image{i}.jpg')
|
||||
for i in range(10)
|
||||
]
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=['Analyze these images and find common themes'] + images
|
||||
)
|
||||
```
|
||||
|
||||
### Image Comparison
|
||||
|
||||
```python
|
||||
before = PIL.Image.open('before.jpg')
|
||||
after = PIL.Image.open('after.jpg')
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Compare before and after. List all visible changes.',
|
||||
before,
|
||||
after
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### Visual Search
|
||||
|
||||
```python
|
||||
reference = PIL.Image.open('target.jpg')
|
||||
candidates = [PIL.Image.open(f'option{i}.jpg') for i in range(5)]
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Find which candidate images contain objects similar to the reference',
|
||||
reference
|
||||
] + candidates
|
||||
)
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Image Quality
|
||||
|
||||
1. **Resolution**: Use clear, non-blurry images
|
||||
2. **Rotation**: Verify correct orientation
|
||||
3. **Lighting**: Ensure good contrast and lighting
|
||||
4. **Size optimization**: Balance quality vs token cost
|
||||
5. **Format**: JPEG for photos, PNG for graphics
|
||||
|
||||
### Prompt Engineering
|
||||
|
||||
**Specific instructions**:
|
||||
- "Identify all vehicles with their colors and positions"
|
||||
- "Count people wearing blue shirts"
|
||||
- "Extract text from the sign in the top-left corner"
|
||||
|
||||
**Output format**:
|
||||
- "Return results as JSON with fields: category, count, description"
|
||||
- "Format as markdown table"
|
||||
- "List findings as numbered items"
|
||||
|
||||
**Few-shot examples**:
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Example: For an image of a cat on a sofa, respond: "Object: cat, Location: sofa"',
|
||||
'Now analyze this image:',
|
||||
img_part
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### File Management
|
||||
|
||||
1. Use File API for images >20MB
|
||||
2. Use File API for repeated queries (saves tokens)
|
||||
3. Files auto-delete after 48 hours
|
||||
4. Clean up manually:
|
||||
```python
|
||||
client.files.delete(name=myfile.name)
|
||||
```
|
||||
|
||||
### Cost Optimization
|
||||
|
||||
**Token-efficient strategies**:
|
||||
- Resize large images before upload
|
||||
- Use File API for repeated queries
|
||||
- Batch multiple images when related
|
||||
- Use appropriate model (Flash vs Pro)
|
||||
|
||||
**Token costs** (Gemini 2.5 Flash at $1/1M):
|
||||
- Small image (258 tokens): $0.000258
|
||||
- HD image (1,548 tokens): $0.001548
|
||||
- 4K image (6,192 tokens): $0.006192
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### 1. Product Analysis
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Analyze this product image:
|
||||
1. Identify the product
|
||||
2. List visible features
|
||||
3. Assess condition
|
||||
4. Estimate value range
|
||||
''',
|
||||
img_part
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 2. Screenshot Analysis
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Extract all text and UI elements from this screenshot',
|
||||
img_part
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 3. Medical Imaging (Informational Only)
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-pro',
|
||||
contents=[
|
||||
'Describe visible features in this medical image. Note: This is for informational purposes only.',
|
||||
img_part
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 4. Chart/Graph Reading
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'Extract data from this chart and format as JSON',
|
||||
img_part
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
### 5. Scene Understanding
|
||||
|
||||
```python
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
'''Analyze this scene:
|
||||
1. Location type
|
||||
2. Time of day
|
||||
3. Weather conditions
|
||||
4. Activities happening
|
||||
5. Mood/atmosphere
|
||||
''',
|
||||
img_part
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
```python
|
||||
import time
|
||||
|
||||
def analyze_image_with_retry(image_path, prompt, max_retries=3):
|
||||
"""Analyze image with exponential backoff retry"""
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
with open(image_path, 'rb') as f:
|
||||
img_bytes = f.read()
|
||||
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.5-flash',
|
||||
contents=[
|
||||
prompt,
|
||||
genai.types.Part.from_bytes(
|
||||
data=img_bytes,
|
||||
mime_type='image/jpeg'
|
||||
)
|
||||
]
|
||||
)
|
||||
return response.text
|
||||
except Exception as e:
|
||||
if attempt == max_retries - 1:
|
||||
raise
|
||||
wait_time = 2 ** attempt
|
||||
print(f"Retry {attempt + 1} after {wait_time}s: {e}")
|
||||
time.sleep(wait_time)
|
||||
```
|
||||
|
||||
## Limitations
|
||||
|
||||
- Maximum 3,600 images per request
|
||||
- OCR accuracy varies with text quality
|
||||
- Object detection requires 2.0+ models
|
||||
- Segmentation requires 2.5+ models
|
||||
- No video frame extraction (use video API)
|
||||
- Regional restrictions on child images (EEA, CH, UK)
|
||||
|
||||
---
|
||||
|
||||
## Related References
|
||||
|
||||
**Current**: Image Understanding
|
||||
|
||||
**Related Capabilities**:
|
||||
- [Image Generation](./image-generation.md) - Create and edit images
|
||||
- [Video Analysis](./video-analysis.md) - Analyze video frames
|
||||
- [Video Generation](./video-generation.md) - Reference images for video generation
|
||||
|
||||
**Back to**: [AI Multimodal Skill](../SKILL.md)
|
||||
BIN
.opencode/skills/ai-multimodal/scripts/.coverage
Normal file
BIN
.opencode/skills/ai-multimodal/scripts/.coverage
Normal file
Binary file not shown.
315
.opencode/skills/ai-multimodal/scripts/check_setup.py
Executable file
315
.opencode/skills/ai-multimodal/scripts/check_setup.py
Executable file
@@ -0,0 +1,315 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validate ai-multimodal skill setup and configuration.
|
||||
|
||||
Checks:
|
||||
- API key presence and format
|
||||
- Python dependencies
|
||||
- Centralized resolver availability
|
||||
- Directory structure
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Fix Windows cp1252 encoding: Unicode symbols (✓, ⚠, ✗) can't encode on Windows.
|
||||
# Reconfigure stdout to UTF-8 with replacement (Python 3.7+).
|
||||
if sys.stdout.encoding and sys.stdout.encoding.lower() != "utf-8":
|
||||
if hasattr(sys.stdout, 'reconfigure'):
|
||||
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
||||
if hasattr(sys.stderr, 'reconfigure'):
|
||||
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
|
||||
|
||||
# Color codes for terminal output
|
||||
GREEN = '\033[92m'
|
||||
YELLOW = '\033[93m'
|
||||
RED = '\033[91m'
|
||||
BLUE = '\033[94m'
|
||||
RESET = '\033[0m'
|
||||
BOLD = '\033[1m'
|
||||
|
||||
|
||||
def print_header(text):
|
||||
"""Print section header."""
|
||||
print(f"\n{BOLD}{BLUE}{'='*60}{RESET}")
|
||||
print(f"{BOLD}{BLUE}{text}{RESET}")
|
||||
print(f"{BOLD}{BLUE}{'='*60}{RESET}\n")
|
||||
|
||||
|
||||
def print_success(text):
|
||||
"""Print success message."""
|
||||
print(f"{GREEN}✓ {text}{RESET}")
|
||||
|
||||
|
||||
def print_warning(text):
|
||||
"""Print warning message."""
|
||||
print(f"{YELLOW}⚠ {text}{RESET}")
|
||||
|
||||
|
||||
def print_error(text):
|
||||
"""Print error message."""
|
||||
print(f"{RED}✗ {text}{RESET}")
|
||||
|
||||
|
||||
def print_info(text):
|
||||
"""Print info message."""
|
||||
print(f"{BLUE}ℹ {text}{RESET}")
|
||||
|
||||
|
||||
def check_dependencies():
|
||||
"""Check if required Python packages are installed."""
|
||||
print_header("Checking Python Dependencies")
|
||||
|
||||
dependencies = {
|
||||
'google.genai': 'google-genai',
|
||||
'dotenv': 'python-dotenv',
|
||||
'PIL': 'pillow'
|
||||
}
|
||||
|
||||
missing = []
|
||||
|
||||
for module_name, package_name in dependencies.items():
|
||||
try:
|
||||
__import__(module_name)
|
||||
print_success(f"{package_name} is installed")
|
||||
except ImportError:
|
||||
print_error(f"{package_name} is NOT installed")
|
||||
missing.append(package_name)
|
||||
|
||||
if missing:
|
||||
print_error("\nMissing dependencies detected!")
|
||||
print_info(f"Install with: pip install {' '.join(missing)}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def check_centralized_resolver():
|
||||
"""Check if centralized resolver is available."""
|
||||
print_header("Checking Centralized Resolver")
|
||||
|
||||
claude_root = Path(__file__).parent.parent.parent.parent
|
||||
resolver_path = claude_root / 'scripts' / 'resolve_env.py'
|
||||
|
||||
if resolver_path.exists():
|
||||
print_success(f"Centralized resolver found: {resolver_path}")
|
||||
|
||||
# Try to import it
|
||||
sys.path.insert(0, str(resolver_path.parent))
|
||||
try:
|
||||
from resolve_env import resolve_env
|
||||
print_success("Centralized resolver can be imported")
|
||||
return True
|
||||
except ImportError as e:
|
||||
print_error(f"Centralized resolver exists but cannot be imported: {e}")
|
||||
return False
|
||||
else:
|
||||
print_warning(f"Centralized resolver not found: {resolver_path}")
|
||||
print_info("Skill will use fallback resolution logic")
|
||||
return True # Not critical, fallback works
|
||||
|
||||
|
||||
def find_api_key():
|
||||
"""Find and validate API key using centralized resolver."""
|
||||
print_header("Checking API Key Configuration")
|
||||
|
||||
# Try to use centralized resolver
|
||||
claude_root = Path(__file__).parent.parent.parent.parent
|
||||
sys.path.insert(0, str(claude_root / 'scripts'))
|
||||
try:
|
||||
from resolve_env import resolve_env
|
||||
|
||||
print_info("Using centralized resolver...")
|
||||
api_key = resolve_env('GEMINI_API_KEY', skill='ai-multimodal')
|
||||
|
||||
if api_key:
|
||||
print_success("API key found via centralized resolver")
|
||||
print_info(f"Key preview: {api_key[:20]}...{api_key[-4:]}")
|
||||
|
||||
# Show hierarchy
|
||||
print_info("\nTo see where the key was found, run:")
|
||||
print_info("python ~/.opencode/scripts/resolve_env.py GEMINI_API_KEY --skill ai-multimodal --verbose")
|
||||
|
||||
return api_key
|
||||
else:
|
||||
print_error("API key not found in any location")
|
||||
return None
|
||||
|
||||
except ImportError:
|
||||
print_warning("Centralized resolver not available, using fallback")
|
||||
|
||||
# Fallback: check environment
|
||||
api_key = os.getenv('GEMINI_API_KEY')
|
||||
if api_key:
|
||||
print_success("API key found in process.env")
|
||||
print_info(f"Key preview: {api_key[:20]}...{api_key[-4:]}")
|
||||
return api_key
|
||||
else:
|
||||
print_error("API key not found")
|
||||
return None
|
||||
|
||||
|
||||
def validate_api_key_format(api_key):
|
||||
"""Basic validation of API key format."""
|
||||
if not api_key:
|
||||
return False
|
||||
|
||||
# Google AI Studio keys typically start with 'AIza'
|
||||
if api_key.startswith('AIza'):
|
||||
print_success("API key format looks valid (Google AI Studio)")
|
||||
return True
|
||||
elif len(api_key) > 20:
|
||||
print_warning("API key format not recognized (may be Vertex AI or custom)")
|
||||
return True
|
||||
else:
|
||||
print_error("API key format looks invalid (too short)")
|
||||
return False
|
||||
|
||||
|
||||
def test_api_connection(api_key):
|
||||
"""Test API connection with a simple request."""
|
||||
print_header("Testing API Connection")
|
||||
|
||||
try:
|
||||
from google import genai
|
||||
|
||||
print_info("Initializing Gemini client...")
|
||||
client = genai.Client(api_key=api_key)
|
||||
|
||||
print_info("Fetching available models...")
|
||||
# List models to verify API key works
|
||||
models = list(client.models.list())
|
||||
|
||||
print_success(f"API connection successful! Found {len(models)} available models")
|
||||
|
||||
# Show some available models
|
||||
print_info("\nSample available models:")
|
||||
for model in models[:5]:
|
||||
print(f" - {model.name}")
|
||||
|
||||
return True
|
||||
|
||||
except ImportError:
|
||||
print_error("google-genai package not installed")
|
||||
return False
|
||||
except Exception as e:
|
||||
print_error(f"API connection failed: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def check_directory_structure():
|
||||
"""Verify skill directory structure."""
|
||||
print_header("Checking Directory Structure")
|
||||
|
||||
script_dir = Path(__file__).parent
|
||||
skill_dir = script_dir.parent
|
||||
|
||||
required_files = [
|
||||
('SKILL.md', skill_dir / 'SKILL.md'),
|
||||
('.env.example', skill_dir / '.env.example'),
|
||||
('gemini_batch_process.py', script_dir / 'gemini_batch_process.py'),
|
||||
]
|
||||
|
||||
all_exist = True
|
||||
|
||||
for name, path in required_files:
|
||||
if path.exists():
|
||||
print_success(f"{name} exists")
|
||||
else:
|
||||
print_error(f"{name} NOT found at {path}")
|
||||
all_exist = False
|
||||
|
||||
return all_exist
|
||||
|
||||
|
||||
def provide_setup_instructions():
|
||||
"""Provide setup instructions if configuration is incomplete."""
|
||||
print_header("Setup Instructions")
|
||||
|
||||
print_info("To configure the ai-multimodal skill:")
|
||||
print("\n1. Get a Gemini API key:")
|
||||
print(" → Visit: https://aistudio.google.com/apikey")
|
||||
|
||||
print("\n2. Configure the API key (choose one method):")
|
||||
|
||||
print(f"\n Option A: User global config (recommended)")
|
||||
print(f" $ echo 'GEMINI_API_KEY=your-api-key-here' >> ~/.opencode/.env")
|
||||
|
||||
script_dir = Path(__file__).parent
|
||||
skill_dir = script_dir.parent
|
||||
|
||||
print(f"\n Option B: Skill-specific config")
|
||||
print(f" $ cd {skill_dir}")
|
||||
print(f" $ cp .env.example .env")
|
||||
print(f" $ # Edit .env and add your API key")
|
||||
|
||||
print(f"\n Option C: Runtime environment (temporary)")
|
||||
print(f" $ export GEMINI_API_KEY='your-api-key-here'")
|
||||
|
||||
print("\n3. Verify setup:")
|
||||
print(f" $ python {Path(__file__)}")
|
||||
|
||||
print("\n4. Debug if needed:")
|
||||
print(f" $ python ~/.opencode/scripts/resolve_env.py --show-hierarchy --skill ai-multimodal")
|
||||
print(f" $ python ~/.opencode/scripts/resolve_env.py GEMINI_API_KEY --skill ai-multimodal --verbose")
|
||||
|
||||
|
||||
def main():
|
||||
"""Run all setup checks."""
|
||||
print(f"\n{BOLD}AI Multimodal Skill - Setup Checker{RESET}")
|
||||
|
||||
all_passed = True
|
||||
|
||||
# Check directory structure
|
||||
if not check_directory_structure():
|
||||
all_passed = False
|
||||
|
||||
# Check centralized resolver
|
||||
check_centralized_resolver()
|
||||
|
||||
# Check dependencies
|
||||
if not check_dependencies():
|
||||
all_passed = False
|
||||
provide_setup_instructions()
|
||||
sys.exit(1)
|
||||
|
||||
# Check API key
|
||||
api_key = find_api_key()
|
||||
|
||||
if not api_key:
|
||||
print_error("\n❌ GEMINI_API_KEY not found in any location")
|
||||
all_passed = False
|
||||
provide_setup_instructions()
|
||||
sys.exit(1)
|
||||
|
||||
# Validate API key format
|
||||
if not validate_api_key_format(api_key):
|
||||
all_passed = False
|
||||
|
||||
# Test API connection
|
||||
if not test_api_connection(api_key):
|
||||
all_passed = False
|
||||
|
||||
# Final summary
|
||||
print_header("Setup Summary")
|
||||
|
||||
if all_passed:
|
||||
print_success("✅ All checks passed! The ai-multimodal skill is ready to use.")
|
||||
print_info("\nNext steps:")
|
||||
print(" • Read SKILL.md for usage examples")
|
||||
print(" • Try: python scripts/gemini_batch_process.py --help")
|
||||
print("\nImage generation models:")
|
||||
print(" • gemini-2.5-flash-image - Nano Banana Flash (DEFAULT - fast)")
|
||||
print(" • imagen-4.0-generate-001 - Imagen 4 (alternative - production)")
|
||||
print(" • gemini-3-pro-image-preview - Nano Banana Pro (4K text, reasoning)")
|
||||
print("\nExample (uses default model):")
|
||||
print(" python scripts/gemini_batch_process.py --task generate \\")
|
||||
print(" --prompt 'A sunset over mountains' --aspect-ratio 16:9 --size 2K")
|
||||
else:
|
||||
print_error("❌ Some checks failed. Please fix the issues above.")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
395
.opencode/skills/ai-multimodal/scripts/document_converter.py
Executable file
395
.opencode/skills/ai-multimodal/scripts/document_converter.py
Executable file
@@ -0,0 +1,395 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Convert documents to Markdown using Gemini API.
|
||||
|
||||
Supports all document types:
|
||||
- PDF documents (native vision processing)
|
||||
- Images (JPEG, PNG, WEBP, HEIC)
|
||||
- Office documents (DOCX, XLSX, PPTX)
|
||||
- HTML, TXT, and other text formats
|
||||
|
||||
Features:
|
||||
- Converts to clean markdown format
|
||||
- Preserves structure, tables, and formatting
|
||||
- Extracts text from images and scanned documents
|
||||
- Batch conversion support
|
||||
- Saves to docs/assets/document-extraction.md by default
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
try:
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
except ImportError:
|
||||
print("Error: google-genai package not installed")
|
||||
print("Install with: pip install google-genai")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
except ImportError:
|
||||
load_dotenv = None
|
||||
|
||||
|
||||
def find_api_key() -> Optional[str]:
|
||||
"""Find Gemini API key using correct priority order.
|
||||
|
||||
Priority order (highest to lowest):
|
||||
1. process.env (runtime environment variables)
|
||||
2. .opencode/skills/ai-multimodal/.env (skill-specific config)
|
||||
3. .opencode/skills/.env (shared skills config)
|
||||
4. .opencode/.env (Claude global config)
|
||||
"""
|
||||
# Priority 1: Already in process.env (highest)
|
||||
api_key = os.getenv('GEMINI_API_KEY')
|
||||
if api_key:
|
||||
return api_key
|
||||
|
||||
# Load .env files if dotenv available
|
||||
if load_dotenv:
|
||||
# Determine base paths
|
||||
script_dir = Path(__file__).parent
|
||||
skill_dir = script_dir.parent # .opencode/skills/ai-multimodal
|
||||
skills_dir = skill_dir.parent # .opencode/skills
|
||||
claude_dir = skills_dir.parent # .claude
|
||||
|
||||
# Priority 2: Skill-specific .env
|
||||
env_file = skill_dir / '.env'
|
||||
if env_file.exists():
|
||||
load_dotenv(env_file)
|
||||
api_key = os.getenv('GEMINI_API_KEY')
|
||||
if api_key:
|
||||
return api_key
|
||||
|
||||
# Priority 3: Shared skills .env
|
||||
env_file = skills_dir / '.env'
|
||||
if env_file.exists():
|
||||
load_dotenv(env_file)
|
||||
api_key = os.getenv('GEMINI_API_KEY')
|
||||
if api_key:
|
||||
return api_key
|
||||
|
||||
# Priority 4: Claude global .env
|
||||
env_file = claude_dir / '.env'
|
||||
if env_file.exists():
|
||||
load_dotenv(env_file)
|
||||
api_key = os.getenv('GEMINI_API_KEY')
|
||||
if api_key:
|
||||
return api_key
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def find_project_root() -> Path:
|
||||
"""Find project root directory."""
|
||||
script_dir = Path(__file__).parent
|
||||
|
||||
# Look for .git or .claude directory
|
||||
for parent in [script_dir] + list(script_dir.parents):
|
||||
if (parent / '.git').exists() or (parent / '.claude').exists():
|
||||
return parent
|
||||
|
||||
return script_dir
|
||||
|
||||
|
||||
def get_mime_type(file_path: str) -> str:
|
||||
"""Determine MIME type from file extension."""
|
||||
ext = Path(file_path).suffix.lower()
|
||||
|
||||
mime_types = {
|
||||
# Documents
|
||||
'.pdf': 'application/pdf',
|
||||
'.txt': 'text/plain',
|
||||
'.html': 'text/html',
|
||||
'.htm': 'text/html',
|
||||
'.md': 'text/markdown',
|
||||
'.csv': 'text/csv',
|
||||
# Images
|
||||
'.jpg': 'image/jpeg',
|
||||
'.jpeg': 'image/jpeg',
|
||||
'.png': 'image/png',
|
||||
'.webp': 'image/webp',
|
||||
'.heic': 'image/heic',
|
||||
'.heif': 'image/heif',
|
||||
# Office (need to be uploaded as binary)
|
||||
'.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
}
|
||||
|
||||
return mime_types.get(ext, 'application/octet-stream')
|
||||
|
||||
|
||||
def upload_file(client: genai.Client, file_path: str, verbose: bool = False) -> Any:
|
||||
"""Upload file to Gemini File API."""
|
||||
if verbose:
|
||||
print(f"Uploading {file_path}...")
|
||||
|
||||
myfile = client.files.upload(file=file_path)
|
||||
|
||||
# Wait for processing if needed
|
||||
max_wait = 300 # 5 minutes
|
||||
elapsed = 0
|
||||
while myfile.state.name == 'PROCESSING' and elapsed < max_wait:
|
||||
time.sleep(2)
|
||||
myfile = client.files.get(name=myfile.name)
|
||||
elapsed += 2
|
||||
if verbose and elapsed % 10 == 0:
|
||||
print(f" Processing... {elapsed}s")
|
||||
|
||||
if myfile.state.name == 'FAILED':
|
||||
raise ValueError(f"File processing failed: {file_path}")
|
||||
|
||||
if myfile.state.name == 'PROCESSING':
|
||||
raise TimeoutError(f"Processing timeout after {max_wait}s: {file_path}")
|
||||
|
||||
if verbose:
|
||||
print(f" Uploaded: {myfile.name}")
|
||||
|
||||
return myfile
|
||||
|
||||
|
||||
def convert_to_markdown(
|
||||
client: genai.Client,
|
||||
file_path: str,
|
||||
model: str = 'gemini-2.5-flash',
|
||||
custom_prompt: Optional[str] = None,
|
||||
verbose: bool = False,
|
||||
max_retries: int = 3
|
||||
) -> Dict[str, Any]:
|
||||
"""Convert a document to markdown using Gemini."""
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
file_path_obj = Path(file_path)
|
||||
file_size = file_path_obj.stat().st_size
|
||||
use_file_api = file_size > 20 * 1024 * 1024 # >20MB
|
||||
|
||||
# Default prompt for markdown conversion
|
||||
if custom_prompt:
|
||||
prompt = custom_prompt
|
||||
else:
|
||||
prompt = """Convert this document to clean, well-formatted Markdown.
|
||||
|
||||
Requirements:
|
||||
- Preserve all content, structure, and formatting
|
||||
- Convert tables to markdown table format
|
||||
- Maintain heading hierarchy (# ## ### etc)
|
||||
- Preserve lists, code blocks, and quotes
|
||||
- Extract text from images if present
|
||||
- Keep formatting consistent and readable
|
||||
|
||||
Output only the markdown content without any preamble or explanation."""
|
||||
|
||||
# Upload or inline the file
|
||||
if use_file_api:
|
||||
myfile = upload_file(client, str(file_path), verbose)
|
||||
content = [prompt, myfile]
|
||||
else:
|
||||
with open(file_path, 'rb') as f:
|
||||
file_bytes = f.read()
|
||||
|
||||
mime_type = get_mime_type(str(file_path))
|
||||
content = [
|
||||
prompt,
|
||||
types.Part.from_bytes(data=file_bytes, mime_type=mime_type)
|
||||
]
|
||||
|
||||
# Generate markdown
|
||||
response = client.models.generate_content(
|
||||
model=model,
|
||||
contents=content
|
||||
)
|
||||
|
||||
markdown_content = response.text if hasattr(response, 'text') else ''
|
||||
|
||||
return {
|
||||
'file': str(file_path),
|
||||
'status': 'success',
|
||||
'markdown': markdown_content
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
if attempt == max_retries - 1:
|
||||
return {
|
||||
'file': str(file_path),
|
||||
'status': 'error',
|
||||
'error': str(e),
|
||||
'markdown': None
|
||||
}
|
||||
|
||||
wait_time = 2 ** attempt
|
||||
if verbose:
|
||||
print(f" Retry {attempt + 1} after {wait_time}s: {e}")
|
||||
time.sleep(wait_time)
|
||||
|
||||
|
||||
def batch_convert(
|
||||
files: List[str],
|
||||
output_file: Optional[str] = None,
|
||||
auto_name: bool = False,
|
||||
model: str = 'gemini-2.5-flash',
|
||||
custom_prompt: Optional[str] = None,
|
||||
verbose: bool = False
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Batch convert multiple files to markdown."""
|
||||
|
||||
api_key = find_api_key()
|
||||
if not api_key:
|
||||
print("Error: GEMINI_API_KEY not found")
|
||||
print("Set via: export GEMINI_API_KEY='your-key'")
|
||||
print("Or create .env file with: GEMINI_API_KEY=your-key")
|
||||
sys.exit(1)
|
||||
|
||||
client = genai.Client(api_key=api_key)
|
||||
results = []
|
||||
|
||||
# Determine output path
|
||||
if not output_file:
|
||||
project_root = find_project_root()
|
||||
output_dir = project_root / 'docs' / 'assets'
|
||||
|
||||
if auto_name and len(files) == 1:
|
||||
# Auto-generate meaningful filename from input
|
||||
input_path = Path(files[0])
|
||||
base_name = input_path.stem
|
||||
output_file = str(output_dir / f"{base_name}-extraction.md")
|
||||
else:
|
||||
output_file = str(output_dir / 'document-extraction.md')
|
||||
|
||||
output_path = Path(output_file)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Process each file
|
||||
for i, file_path in enumerate(files, 1):
|
||||
if verbose:
|
||||
print(f"\n[{i}/{len(files)}] Converting: {file_path}")
|
||||
|
||||
result = convert_to_markdown(
|
||||
client=client,
|
||||
file_path=file_path,
|
||||
model=model,
|
||||
custom_prompt=custom_prompt,
|
||||
verbose=verbose
|
||||
)
|
||||
|
||||
results.append(result)
|
||||
|
||||
if verbose:
|
||||
status = result.get('status', 'unknown')
|
||||
print(f" Status: {status}")
|
||||
|
||||
# Save combined markdown
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write("# Document Extraction Results\n\n")
|
||||
f.write(f"Converted {len(files)} document(s) to markdown.\n\n")
|
||||
f.write("---\n\n")
|
||||
|
||||
for result in results:
|
||||
f.write(f"## {Path(result['file']).name}\n\n")
|
||||
|
||||
if result['status'] == 'success' and result.get('markdown'):
|
||||
f.write(result['markdown'])
|
||||
f.write("\n\n")
|
||||
elif result['status'] == 'success':
|
||||
f.write("**Note**: Conversion succeeded but no content was returned.\n\n")
|
||||
else:
|
||||
f.write(f"**Error**: {result.get('error', 'Unknown error')}\n\n")
|
||||
|
||||
f.write("---\n\n")
|
||||
|
||||
if verbose or True: # Always show output location
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Converted: {len(results)} file(s)")
|
||||
print(f"Success: {sum(1 for r in results if r['status'] == 'success')}")
|
||||
print(f"Failed: {sum(1 for r in results if r['status'] == 'error')}")
|
||||
print(f"Output saved to: {output_path}")
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert documents to Markdown using Gemini API',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Convert single PDF to markdown (default name)
|
||||
%(prog)s --input document.pdf
|
||||
|
||||
# Auto-generate meaningful filename
|
||||
%(prog)s --input testpdf.pdf --auto-name
|
||||
# Output: docs/assets/testpdf-extraction.md
|
||||
|
||||
# Convert multiple files
|
||||
%(prog)s --input doc1.pdf doc2.docx image.png
|
||||
|
||||
# Specify custom output location
|
||||
%(prog)s --input document.pdf --output ./output.md
|
||||
|
||||
# Use custom prompt
|
||||
%(prog)s --input document.pdf --prompt "Extract only the tables as markdown"
|
||||
|
||||
# Batch convert directory
|
||||
%(prog)s --input ./documents/*.pdf --verbose
|
||||
|
||||
Supported formats:
|
||||
- PDF documents (up to 1,000 pages)
|
||||
- Images (JPEG, PNG, WEBP, HEIC)
|
||||
- Office documents (DOCX, XLSX, PPTX)
|
||||
- Text formats (TXT, HTML, Markdown, CSV)
|
||||
|
||||
Default output: <project-root>/docs/assets/document-extraction.md
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--input', '-i', nargs='+', required=True,
|
||||
help='Input file(s) to convert')
|
||||
parser.add_argument('--output', '-o',
|
||||
help='Output markdown file (default: docs/assets/document-extraction.md)')
|
||||
parser.add_argument('--auto-name', '-a', action='store_true',
|
||||
help='Auto-generate meaningful output filename from input (e.g., document.pdf -> document-extraction.md)')
|
||||
parser.add_argument('--model', default='gemini-2.5-flash',
|
||||
help='Gemini model to use (default: gemini-2.5-flash)')
|
||||
parser.add_argument('--prompt', '-p',
|
||||
help='Custom prompt for conversion')
|
||||
parser.add_argument('--verbose', '-v', action='store_true',
|
||||
help='Verbose output')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate input files
|
||||
files = []
|
||||
for file_pattern in args.input:
|
||||
file_path = Path(file_pattern)
|
||||
if file_path.exists() and file_path.is_file():
|
||||
files.append(str(file_path))
|
||||
else:
|
||||
# Try glob pattern
|
||||
import glob
|
||||
matched = glob.glob(file_pattern)
|
||||
files.extend([f for f in matched if Path(f).is_file()])
|
||||
|
||||
if not files:
|
||||
print("Error: No valid input files found")
|
||||
sys.exit(1)
|
||||
|
||||
# Convert files
|
||||
batch_convert(
|
||||
files=files,
|
||||
output_file=args.output,
|
||||
auto_name=args.auto_name,
|
||||
model=args.model,
|
||||
custom_prompt=args.prompt,
|
||||
verbose=args.verbose
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
1211
.opencode/skills/ai-multimodal/scripts/gemini_batch_process.py
Executable file
1211
.opencode/skills/ai-multimodal/scripts/gemini_batch_process.py
Executable file
File diff suppressed because it is too large
Load Diff
506
.opencode/skills/ai-multimodal/scripts/media_optimizer.py
Executable file
506
.opencode/skills/ai-multimodal/scripts/media_optimizer.py
Executable file
@@ -0,0 +1,506 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Optimize media files for Gemini API processing.
|
||||
|
||||
Features:
|
||||
- Compress videos/audio for size limits
|
||||
- Resize images appropriately
|
||||
- Split long videos into chunks
|
||||
- Format conversion
|
||||
- Quality vs size optimization
|
||||
- Validation before upload
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
except ImportError:
|
||||
load_dotenv = None
|
||||
|
||||
|
||||
def load_env_files():
|
||||
"""Load .env files in correct priority order.
|
||||
|
||||
Priority order (highest to lowest):
|
||||
1. process.env (runtime environment variables)
|
||||
2. .opencode/skills/ai-multimodal/.env (skill-specific config)
|
||||
3. .opencode/skills/.env (shared skills config)
|
||||
4. .opencode/.env (Claude global config)
|
||||
"""
|
||||
if not load_dotenv:
|
||||
return
|
||||
|
||||
# Determine base paths
|
||||
script_dir = Path(__file__).parent
|
||||
skill_dir = script_dir.parent # .opencode/skills/ai-multimodal
|
||||
skills_dir = skill_dir.parent # .opencode/skills
|
||||
claude_dir = skills_dir.parent # .claude
|
||||
|
||||
# Priority 2: Skill-specific .env
|
||||
env_file = skill_dir / '.env'
|
||||
if env_file.exists():
|
||||
load_dotenv(env_file)
|
||||
|
||||
# Priority 3: Shared skills .env
|
||||
env_file = skills_dir / '.env'
|
||||
if env_file.exists():
|
||||
load_dotenv(env_file)
|
||||
|
||||
# Priority 4: Claude global .env
|
||||
env_file = claude_dir / '.env'
|
||||
if env_file.exists():
|
||||
load_dotenv(env_file)
|
||||
|
||||
|
||||
# Load environment variables at module level
|
||||
load_env_files()
|
||||
|
||||
|
||||
def check_ffmpeg() -> bool:
|
||||
"""Check if ffmpeg is installed."""
|
||||
try:
|
||||
subprocess.run(['ffmpeg', '-version'],
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
check=True)
|
||||
return True
|
||||
except (subprocess.CalledProcessError, FileNotFoundError, Exception):
|
||||
return False
|
||||
|
||||
|
||||
def get_media_info(file_path: str) -> Dict[str, Any]:
|
||||
"""Get media file information using ffprobe."""
|
||||
if not check_ffmpeg():
|
||||
return {}
|
||||
|
||||
try:
|
||||
cmd = [
|
||||
'ffprobe',
|
||||
'-v', 'quiet',
|
||||
'-print_format', 'json',
|
||||
'-show_format',
|
||||
'-show_streams',
|
||||
file_path
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
data = json.loads(result.stdout)
|
||||
|
||||
info = {
|
||||
'size': int(data['format'].get('size', 0)),
|
||||
'duration': float(data['format'].get('duration', 0)),
|
||||
'bit_rate': int(data['format'].get('bit_rate', 0)),
|
||||
}
|
||||
|
||||
# Get video/audio specific info
|
||||
for stream in data.get('streams', []):
|
||||
if stream['codec_type'] == 'video':
|
||||
info['width'] = stream.get('width', 0)
|
||||
info['height'] = stream.get('height', 0)
|
||||
info['fps'] = eval(stream.get('r_frame_rate', '0/1'))
|
||||
elif stream['codec_type'] == 'audio':
|
||||
info['sample_rate'] = int(stream.get('sample_rate', 0))
|
||||
info['channels'] = stream.get('channels', 0)
|
||||
|
||||
return info
|
||||
|
||||
except (subprocess.CalledProcessError, json.JSONDecodeError, Exception):
|
||||
return {}
|
||||
|
||||
|
||||
def optimize_video(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
target_size_mb: Optional[int] = None,
|
||||
max_duration: Optional[int] = None,
|
||||
quality: int = 23,
|
||||
resolution: Optional[str] = None,
|
||||
verbose: bool = False
|
||||
) -> bool:
|
||||
"""Optimize video file for Gemini API."""
|
||||
if not check_ffmpeg():
|
||||
print("Error: ffmpeg not installed")
|
||||
print("Install: apt-get install ffmpeg (Linux) or brew install ffmpeg (Mac)")
|
||||
return False
|
||||
|
||||
info = get_media_info(input_path)
|
||||
if not info:
|
||||
print(f"Error: Could not read media info from {input_path}")
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print(f"Input: {Path(input_path).name}")
|
||||
print(f" Size: {info['size'] / (1024*1024):.2f} MB")
|
||||
print(f" Duration: {info['duration']:.2f}s")
|
||||
if 'width' in info:
|
||||
print(f" Resolution: {info['width']}x{info['height']}")
|
||||
print(f" Bit rate: {info['bit_rate'] / 1000:.0f} kbps")
|
||||
|
||||
# Build ffmpeg command
|
||||
cmd = ['ffmpeg', '-i', input_path, '-y']
|
||||
|
||||
# Video codec
|
||||
cmd.extend(['-c:v', 'libx264', '-crf', str(quality)])
|
||||
|
||||
# Resolution
|
||||
if resolution:
|
||||
cmd.extend(['-vf', f'scale={resolution}'])
|
||||
elif 'width' in info and info['width'] > 1920:
|
||||
cmd.extend(['-vf', 'scale=1920:-2']) # Max 1080p
|
||||
|
||||
# Audio codec
|
||||
cmd.extend(['-c:a', 'aac', '-b:a', '128k', '-ac', '2'])
|
||||
|
||||
# Duration limit
|
||||
if max_duration and info['duration'] > max_duration:
|
||||
cmd.extend(['-t', str(max_duration)])
|
||||
|
||||
# Target size (rough estimate using bitrate)
|
||||
if target_size_mb:
|
||||
target_bits = target_size_mb * 8 * 1024 * 1024
|
||||
duration = min(info['duration'], max_duration) if max_duration else info['duration']
|
||||
target_bitrate = int(target_bits / duration)
|
||||
# Reserve some for audio (128kbps)
|
||||
video_bitrate = max(target_bitrate - 128000, 500000)
|
||||
cmd.extend(['-b:v', str(video_bitrate)])
|
||||
|
||||
cmd.append(output_path)
|
||||
|
||||
if verbose:
|
||||
print(f"\nOptimizing...")
|
||||
print(f" Command: {' '.join(cmd)}")
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, check=True, capture_output=not verbose)
|
||||
|
||||
# Check output
|
||||
output_info = get_media_info(output_path)
|
||||
if output_info and verbose:
|
||||
print(f"\nOutput: {Path(output_path).name}")
|
||||
print(f" Size: {output_info['size'] / (1024*1024):.2f} MB")
|
||||
print(f" Duration: {output_info['duration']:.2f}s")
|
||||
if 'width' in output_info:
|
||||
print(f" Resolution: {output_info['width']}x{output_info['height']}")
|
||||
compression = (1 - output_info['size'] / info['size']) * 100
|
||||
print(f" Compression: {compression:.1f}%")
|
||||
|
||||
return True
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error optimizing video: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def optimize_audio(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
target_size_mb: Optional[int] = None,
|
||||
bitrate: str = '64k',
|
||||
sample_rate: int = 16000,
|
||||
verbose: bool = False
|
||||
) -> bool:
|
||||
"""Optimize audio file for Gemini API."""
|
||||
if not check_ffmpeg():
|
||||
print("Error: ffmpeg not installed")
|
||||
return False
|
||||
|
||||
info = get_media_info(input_path)
|
||||
if not info:
|
||||
print(f"Error: Could not read media info from {input_path}")
|
||||
return False
|
||||
|
||||
if verbose:
|
||||
print(f"Input: {Path(input_path).name}")
|
||||
print(f" Size: {info['size'] / (1024*1024):.2f} MB")
|
||||
print(f" Duration: {info['duration']:.2f}s")
|
||||
|
||||
# Build command
|
||||
cmd = [
|
||||
'ffmpeg', '-i', input_path, '-y',
|
||||
'-c:a', 'aac',
|
||||
'-b:a', bitrate,
|
||||
'-ar', str(sample_rate),
|
||||
'-ac', '1', # Mono (Gemini uses mono anyway)
|
||||
output_path
|
||||
]
|
||||
|
||||
if verbose:
|
||||
print(f"\nOptimizing...")
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, check=True, capture_output=not verbose)
|
||||
|
||||
output_info = get_media_info(output_path)
|
||||
if output_info and verbose:
|
||||
print(f"\nOutput: {Path(output_path).name}")
|
||||
print(f" Size: {output_info['size'] / (1024*1024):.2f} MB")
|
||||
compression = (1 - output_info['size'] / info['size']) * 100
|
||||
print(f" Compression: {compression:.1f}%")
|
||||
|
||||
return True
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error optimizing audio: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def optimize_image(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
max_width: int = 1920,
|
||||
quality: int = 85,
|
||||
verbose: bool = False
|
||||
) -> bool:
|
||||
"""Optimize image file for Gemini API."""
|
||||
try:
|
||||
from PIL import Image
|
||||
except ImportError:
|
||||
print("Error: Pillow not installed")
|
||||
print("Install with: pip install pillow")
|
||||
return False
|
||||
|
||||
try:
|
||||
img = Image.open(input_path)
|
||||
|
||||
if verbose:
|
||||
print(f"Input: {Path(input_path).name}")
|
||||
print(f" Size: {Path(input_path).stat().st_size / 1024:.2f} KB")
|
||||
print(f" Resolution: {img.width}x{img.height}")
|
||||
|
||||
# Resize if needed
|
||||
if img.width > max_width:
|
||||
ratio = max_width / img.width
|
||||
new_height = int(img.height * ratio)
|
||||
img = img.resize((max_width, new_height), Image.Resampling.LANCZOS)
|
||||
if verbose:
|
||||
print(f" Resized to: {img.width}x{img.height}")
|
||||
|
||||
# Convert RGBA to RGB if saving as JPEG
|
||||
if output_path.lower().endswith('.jpg') or output_path.lower().endswith('.jpeg'):
|
||||
if img.mode == 'RGBA':
|
||||
rgb_img = Image.new('RGB', img.size, (255, 255, 255))
|
||||
rgb_img.paste(img, mask=img.split()[3])
|
||||
img = rgb_img
|
||||
|
||||
# Save
|
||||
img.save(output_path, quality=quality, optimize=True)
|
||||
|
||||
if verbose:
|
||||
print(f"\nOutput: {Path(output_path).name}")
|
||||
print(f" Size: {Path(output_path).stat().st_size / 1024:.2f} KB")
|
||||
compression = (1 - Path(output_path).stat().st_size / Path(input_path).stat().st_size) * 100
|
||||
print(f" Compression: {compression:.1f}%")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error optimizing image: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def split_video(
|
||||
input_path: str,
|
||||
output_dir: str,
|
||||
chunk_duration: int = 3600,
|
||||
verbose: bool = False
|
||||
) -> List[str]:
|
||||
"""Split long video into chunks."""
|
||||
if not check_ffmpeg():
|
||||
print("Error: ffmpeg not installed")
|
||||
return []
|
||||
|
||||
info = get_media_info(input_path)
|
||||
if not info:
|
||||
return []
|
||||
|
||||
total_duration = info['duration']
|
||||
num_chunks = int(total_duration / chunk_duration) + 1
|
||||
|
||||
if num_chunks == 1:
|
||||
if verbose:
|
||||
print("Video is short enough, no splitting needed")
|
||||
return [input_path]
|
||||
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
output_files = []
|
||||
|
||||
for i in range(num_chunks):
|
||||
start_time = i * chunk_duration
|
||||
output_file = Path(output_dir) / f"{Path(input_path).stem}_chunk_{i+1}.mp4"
|
||||
|
||||
cmd = [
|
||||
'ffmpeg', '-i', input_path, '-y',
|
||||
'-ss', str(start_time),
|
||||
'-t', str(chunk_duration),
|
||||
'-c', 'copy',
|
||||
str(output_file)
|
||||
]
|
||||
|
||||
if verbose:
|
||||
print(f"Creating chunk {i+1}/{num_chunks}...")
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, check=True, capture_output=not verbose)
|
||||
output_files.append(str(output_file))
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error creating chunk {i+1}: {e}")
|
||||
|
||||
return output_files
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Optimize media files for Gemini API',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Optimize video to 100MB
|
||||
%(prog)s --input video.mp4 --output optimized.mp4 --target-size 100
|
||||
|
||||
# Optimize audio
|
||||
%(prog)s --input audio.mp3 --output optimized.m4a --bitrate 64k
|
||||
|
||||
# Resize image
|
||||
%(prog)s --input image.jpg --output resized.jpg --max-width 1920
|
||||
|
||||
# Split long video
|
||||
%(prog)s --input long-video.mp4 --split --chunk-duration 3600 --output-dir ./chunks
|
||||
|
||||
# Batch optimize directory
|
||||
%(prog)s --input-dir ./videos --output-dir ./optimized --quality 85
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--input', help='Input file')
|
||||
parser.add_argument('--output', help='Output file')
|
||||
parser.add_argument('--input-dir', help='Input directory for batch processing')
|
||||
parser.add_argument('--output-dir', help='Output directory for batch processing')
|
||||
parser.add_argument('--target-size', type=int, help='Target size in MB')
|
||||
parser.add_argument('--quality', type=int, default=85,
|
||||
help='Quality (video: 0-51 CRF, image: 1-100) (default: 85)')
|
||||
parser.add_argument('--max-width', type=int, default=1920,
|
||||
help='Max image width (default: 1920)')
|
||||
parser.add_argument('--bitrate', default='64k',
|
||||
help='Audio bitrate (default: 64k)')
|
||||
parser.add_argument('--resolution', help='Video resolution (e.g., 1920x1080)')
|
||||
parser.add_argument('--split', action='store_true', help='Split long video into chunks')
|
||||
parser.add_argument('--chunk-duration', type=int, default=3600,
|
||||
help='Chunk duration in seconds (default: 3600 = 1 hour)')
|
||||
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate arguments
|
||||
if not args.input and not args.input_dir:
|
||||
parser.error("Either --input or --input-dir required")
|
||||
|
||||
# Single file processing
|
||||
if args.input:
|
||||
input_path = Path(args.input)
|
||||
if not input_path.exists():
|
||||
print(f"Error: Input file not found: {input_path}")
|
||||
sys.exit(1)
|
||||
|
||||
if args.split:
|
||||
output_dir = args.output_dir or './chunks'
|
||||
chunks = split_video(str(input_path), output_dir, args.chunk_duration, args.verbose)
|
||||
print(f"\nCreated {len(chunks)} chunks in {output_dir}")
|
||||
sys.exit(0)
|
||||
|
||||
if not args.output:
|
||||
parser.error("--output required for single file processing")
|
||||
|
||||
output_path = Path(args.output)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Determine file type
|
||||
ext = input_path.suffix.lower()
|
||||
|
||||
if ext in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv']:
|
||||
success = optimize_video(
|
||||
str(input_path),
|
||||
str(output_path),
|
||||
target_size_mb=args.target_size,
|
||||
quality=args.quality,
|
||||
resolution=args.resolution,
|
||||
verbose=args.verbose
|
||||
)
|
||||
elif ext in ['.mp3', '.wav', '.m4a', '.flac', '.aac']:
|
||||
success = optimize_audio(
|
||||
str(input_path),
|
||||
str(output_path),
|
||||
target_size_mb=args.target_size,
|
||||
bitrate=args.bitrate,
|
||||
verbose=args.verbose
|
||||
)
|
||||
elif ext in ['.jpg', '.jpeg', '.png', '.webp']:
|
||||
success = optimize_image(
|
||||
str(input_path),
|
||||
str(output_path),
|
||||
max_width=args.max_width,
|
||||
quality=args.quality,
|
||||
verbose=args.verbose
|
||||
)
|
||||
else:
|
||||
print(f"Error: Unsupported file type: {ext}")
|
||||
sys.exit(1)
|
||||
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
# Batch processing
|
||||
if args.input_dir:
|
||||
if not args.output_dir:
|
||||
parser.error("--output-dir required for batch processing")
|
||||
|
||||
input_dir = Path(args.input_dir)
|
||||
output_dir = Path(args.output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Find all media files
|
||||
patterns = ['*.mp4', '*.mov', '*.avi', '*.mkv', '*.webm',
|
||||
'*.mp3', '*.wav', '*.m4a', '*.flac',
|
||||
'*.jpg', '*.jpeg', '*.png', '*.webp']
|
||||
|
||||
files = []
|
||||
for pattern in patterns:
|
||||
files.extend(input_dir.glob(pattern))
|
||||
|
||||
if not files:
|
||||
print(f"No media files found in {input_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Found {len(files)} files to process")
|
||||
|
||||
success_count = 0
|
||||
for input_file in files:
|
||||
output_file = output_dir / input_file.name
|
||||
|
||||
ext = input_file.suffix.lower()
|
||||
success = False
|
||||
|
||||
if ext in ['.mp4', '.mov', '.avi', '.mkv', '.webm', '.flv']:
|
||||
success = optimize_video(str(input_file), str(output_file),
|
||||
quality=args.quality, verbose=args.verbose)
|
||||
elif ext in ['.mp3', '.wav', '.m4a', '.flac', '.aac']:
|
||||
success = optimize_audio(str(input_file), str(output_file),
|
||||
bitrate=args.bitrate, verbose=args.verbose)
|
||||
elif ext in ['.jpg', '.jpeg', '.png', '.webp']:
|
||||
success = optimize_image(str(input_file), str(output_file),
|
||||
max_width=args.max_width, quality=args.quality,
|
||||
verbose=args.verbose)
|
||||
|
||||
if success:
|
||||
success_count += 1
|
||||
|
||||
print(f"\nProcessed: {success_count}/{len(files)} files")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
189
.opencode/skills/ai-multimodal/scripts/minimax_api_client.py
Normal file
189
.opencode/skills/ai-multimodal/scripts/minimax_api_client.py
Normal file
@@ -0,0 +1,189 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MiniMax API client - shared HTTP utilities for all MiniMax generation tasks.
|
||||
|
||||
Handles authentication, API calls, async task polling, and file downloads.
|
||||
Base URL: https://api.minimax.io/v1
|
||||
Auth: Bearer token via MINIMAX_API_KEY environment variable.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
try:
|
||||
import requests
|
||||
except ImportError:
|
||||
print("Error: requests package not installed")
|
||||
print("Install with: pip install requests")
|
||||
sys.exit(1)
|
||||
|
||||
# Import centralized environment resolver
|
||||
CLAUDE_ROOT = Path(__file__).parent.parent.parent.parent
|
||||
sys.path.insert(0, str(CLAUDE_ROOT / 'scripts'))
|
||||
try:
|
||||
from resolve_env import resolve_env
|
||||
CENTRALIZED_RESOLVER_AVAILABLE = True
|
||||
except ImportError:
|
||||
CENTRALIZED_RESOLVER_AVAILABLE = False
|
||||
|
||||
BASE_URL = "https://api.minimax.io/v1"
|
||||
|
||||
|
||||
def find_minimax_api_key() -> Optional[str]:
|
||||
"""Find MINIMAX_API_KEY using centralized resolver or environment."""
|
||||
if CENTRALIZED_RESOLVER_AVAILABLE:
|
||||
return resolve_env('MINIMAX_API_KEY', skill='ai-multimodal')
|
||||
|
||||
# Fallback: check environment and .env files
|
||||
api_key = os.getenv('MINIMAX_API_KEY')
|
||||
if api_key:
|
||||
return api_key
|
||||
|
||||
# Check .env files in skill directory hierarchy
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
skill_dir = Path(__file__).parent.parent
|
||||
for env_path in [skill_dir / '.env', skill_dir.parent / '.env']:
|
||||
if env_path.exists():
|
||||
load_dotenv(env_path, override=True)
|
||||
api_key = os.getenv('MINIMAX_API_KEY')
|
||||
if api_key:
|
||||
return api_key
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_headers(api_key: str) -> Dict[str, str]:
|
||||
"""Build authorization headers for MiniMax API."""
|
||||
return {
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
|
||||
def api_post(endpoint: str, payload: Dict[str, Any], api_key: str,
|
||||
verbose: bool = False, timeout: int = 120) -> Dict[str, Any]:
|
||||
"""Make POST request to MiniMax API with error handling."""
|
||||
url = f"{BASE_URL}/{endpoint}"
|
||||
headers = get_headers(api_key)
|
||||
|
||||
if verbose:
|
||||
print(f" POST {url}", file=sys.stderr)
|
||||
|
||||
response = requests.post(url, headers=headers, json=payload, timeout=timeout)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception(
|
||||
f"MiniMax API error (HTTP {response.status_code}): {response.text}"
|
||||
)
|
||||
|
||||
data = response.json()
|
||||
|
||||
# Check MiniMax-specific error codes
|
||||
base_resp = data.get("base_resp", {})
|
||||
status_code = base_resp.get("status_code", 0)
|
||||
if status_code != 0:
|
||||
raise Exception(
|
||||
f"MiniMax API error (code {status_code}): "
|
||||
f"{base_resp.get('status_msg', 'Unknown error')}"
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def api_get(endpoint: str, params: Dict[str, str], api_key: str,
|
||||
verbose: bool = False) -> Dict[str, Any]:
|
||||
"""Make GET request to MiniMax API."""
|
||||
url = f"{BASE_URL}/{endpoint}"
|
||||
headers = get_headers(api_key)
|
||||
|
||||
if verbose:
|
||||
print(f" GET {url}", file=sys.stderr)
|
||||
|
||||
response = requests.get(url, headers=headers, params=params, timeout=60)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception(
|
||||
f"MiniMax API error (HTTP {response.status_code}): {response.text}"
|
||||
)
|
||||
|
||||
return response.json()
|
||||
|
||||
|
||||
def poll_async_task(task_id: str, task_type: str, api_key: str,
|
||||
poll_interval: int = 10, max_wait: int = 600,
|
||||
verbose: bool = False) -> Dict[str, Any]:
|
||||
"""Poll async task (video/music) until completion.
|
||||
|
||||
Args:
|
||||
task_id: The task ID returned from creation endpoint
|
||||
task_type: 'video_generation' or 'music_generation'
|
||||
poll_interval: Seconds between polls (default 10)
|
||||
max_wait: Maximum wait time in seconds (default 600)
|
||||
"""
|
||||
elapsed = 0
|
||||
while elapsed < max_wait:
|
||||
result = api_get(
|
||||
f"query/{task_type}",
|
||||
{"task_id": task_id},
|
||||
api_key,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
status = result.get("status", "Unknown")
|
||||
if verbose and elapsed > 0 and elapsed % 30 == 0:
|
||||
print(f" Polling... {elapsed}s elapsed, status: {status}",
|
||||
file=sys.stderr)
|
||||
|
||||
if status == "Success":
|
||||
return result
|
||||
elif status in ("Failed", "Error"):
|
||||
raise Exception(f"Task failed: {json.dumps(result)}")
|
||||
|
||||
time.sleep(poll_interval)
|
||||
elapsed += poll_interval
|
||||
|
||||
raise TimeoutError(f"Task {task_id} timed out after {max_wait}s")
|
||||
|
||||
|
||||
def download_file(file_id: str, api_key: str, output_path: str,
|
||||
verbose: bool = False) -> str:
|
||||
"""Download file from MiniMax file service."""
|
||||
result = api_get("files/retrieve", {"file_id": file_id}, api_key, verbose)
|
||||
|
||||
download_url = result.get("file", {}).get("download_url")
|
||||
if not download_url:
|
||||
raise Exception(f"No download URL in response: {json.dumps(result)}")
|
||||
|
||||
if verbose:
|
||||
print(f" Downloading to: {output_path}", file=sys.stderr)
|
||||
|
||||
response = requests.get(download_url, stream=True, timeout=300)
|
||||
response.raise_for_status()
|
||||
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def get_output_dir() -> Path:
|
||||
"""Get project output directory for generated assets."""
|
||||
script_dir = Path(__file__).parent
|
||||
for parent in [script_dir] + list(script_dir.parents):
|
||||
if (parent / '.git').exists() or (parent / '.claude').exists():
|
||||
output_dir = parent / 'docs' / 'assets'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
return output_dir
|
||||
# Fallback
|
||||
output_dir = script_dir.parent / 'assets'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
return output_dir
|
||||
178
.opencode/skills/ai-multimodal/scripts/minimax_cli.py
Normal file
178
.opencode/skills/ai-multimodal/scripts/minimax_cli.py
Normal file
@@ -0,0 +1,178 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MiniMax CLI entry point - standalone CLI for MiniMax generation tasks.
|
||||
|
||||
Can be called directly or delegated to from gemini_batch_process.py
|
||||
when MiniMax models are detected.
|
||||
|
||||
Usage:
|
||||
python minimax_cli.py --task generate --prompt "A cat" --model image-01
|
||||
python minimax_cli.py --task generate-video --prompt "A dancer" --model MiniMax-Hailuo-2.3
|
||||
python minimax_cli.py --task generate-speech --text "Hello" --model speech-2.8-hd --voice English_Warm_Bestie
|
||||
python minimax_cli.py --task generate-music --lyrics "La la la" --prompt "pop song" --model music-2.5
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from minimax_api_client import find_minimax_api_key
|
||||
from minimax_generate import (
|
||||
generate_image, generate_video, generate_speech, generate_music
|
||||
)
|
||||
|
||||
TASK_DEFAULTS = {
|
||||
'generate': 'image-01',
|
||||
'generate-video': 'MiniMax-Hailuo-2.3',
|
||||
'generate-speech': 'speech-2.8-hd',
|
||||
'generate-music': 'music-2.5'
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='MiniMax AI generation CLI (image/video/speech/music)',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Generate image
|
||||
%(prog)s --task generate --prompt "A cyberpunk city at night" --model image-01 --aspect-ratio 16:9
|
||||
|
||||
# Generate video (async, ~30-60s)
|
||||
%(prog)s --task generate-video --prompt "A dancer performing" --model MiniMax-Hailuo-2.3
|
||||
|
||||
# Generate speech
|
||||
%(prog)s --task generate-speech --text "Welcome to the show" --model speech-2.8-hd --voice English_Warm_Bestie
|
||||
|
||||
# Generate music with lyrics
|
||||
%(prog)s --task generate-music --lyrics "Verse 1\\nHello world" --prompt "upbeat pop" --model music-2.5
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--task', required=True,
|
||||
choices=['generate', 'generate-video',
|
||||
'generate-speech', 'generate-music'],
|
||||
help='Generation task type')
|
||||
parser.add_argument('--prompt', help='Text prompt for generation')
|
||||
parser.add_argument('--text', help='Text for speech generation')
|
||||
parser.add_argument('--lyrics', help='Lyrics for music generation')
|
||||
parser.add_argument('--model', help='Model name (auto-detected from task)')
|
||||
parser.add_argument('--aspect-ratio', default='1:1',
|
||||
choices=['1:1', '16:9', '4:3', '3:2', '2:3',
|
||||
'3:4', '9:16', '21:9'],
|
||||
help='Aspect ratio for image generation')
|
||||
parser.add_argument('--num-images', type=int, default=1,
|
||||
help='Number of images (1-9, default: 1)')
|
||||
parser.add_argument('--duration', type=int, default=6,
|
||||
choices=[6, 10],
|
||||
help='Video duration in seconds (6 or 10)')
|
||||
parser.add_argument('--resolution', default='1080P',
|
||||
choices=['720P', '1080P'],
|
||||
help='Video resolution')
|
||||
parser.add_argument('--voice', default='English_expressive_narrator',
|
||||
help='Voice ID for speech (default: English_expressive_narrator)')
|
||||
parser.add_argument('--emotion', default='neutral',
|
||||
choices=['happy', 'sad', 'angry', 'fearful',
|
||||
'disgusted', 'surprised', 'neutral'],
|
||||
help='Emotion for speech')
|
||||
parser.add_argument('--output-format', default='mp3',
|
||||
choices=['mp3', 'wav', 'flac', 'pcm'],
|
||||
help='Audio output format')
|
||||
parser.add_argument('--first-frame', help='Image URL for video first frame')
|
||||
parser.add_argument('--output', '-o', help='Output file path')
|
||||
parser.add_argument('--verbose', '-v', action='store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Auto-detect model from task
|
||||
if not args.model:
|
||||
args.model = TASK_DEFAULTS.get(args.task, 'image-01')
|
||||
if args.verbose:
|
||||
print(f"Auto-detected model: {args.model}")
|
||||
|
||||
# Find API key
|
||||
api_key = find_minimax_api_key()
|
||||
if not api_key:
|
||||
print("Error: MINIMAX_API_KEY not found")
|
||||
print("\nSetup:")
|
||||
print("1. export MINIMAX_API_KEY='your-key'")
|
||||
print("2. Or add to .env: MINIMAX_API_KEY=your-key")
|
||||
print("\nGet key at: https://platform.minimax.io/user-center/basic-information/interface-key")
|
||||
sys.exit(1)
|
||||
|
||||
# Dispatch to task handler
|
||||
try:
|
||||
if args.task == 'generate':
|
||||
if not args.prompt:
|
||||
parser.error("--prompt required for image generation")
|
||||
result = generate_image(
|
||||
api_key, args.prompt, args.model,
|
||||
args.aspect_ratio, args.num_images,
|
||||
args.output, args.verbose
|
||||
)
|
||||
elif args.task == 'generate-video':
|
||||
if not args.prompt:
|
||||
parser.error("--prompt required for video generation")
|
||||
result = generate_video(
|
||||
api_key, args.prompt, args.model,
|
||||
args.duration, args.resolution,
|
||||
args.first_frame, args.output, args.verbose
|
||||
)
|
||||
elif args.task == 'generate-speech':
|
||||
text = args.text or args.prompt
|
||||
if not text:
|
||||
parser.error("--text or --prompt required for speech")
|
||||
result = generate_speech(
|
||||
api_key, text, args.model,
|
||||
args.voice, args.emotion, args.output_format,
|
||||
output=args.output, verbose=args.verbose
|
||||
)
|
||||
elif args.task == 'generate-music':
|
||||
if not args.lyrics and not args.prompt:
|
||||
parser.error("--lyrics or --prompt required for music")
|
||||
result = generate_music(
|
||||
api_key, args.lyrics or '', args.prompt or '',
|
||||
args.model, args.output_format,
|
||||
args.output, args.verbose
|
||||
)
|
||||
else:
|
||||
parser.error(f"Unknown task: {args.task}")
|
||||
return
|
||||
|
||||
# Print results
|
||||
print_result(result, args.task)
|
||||
|
||||
except Exception as e:
|
||||
print(f"\nError: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def print_result(result: dict, task: str):
|
||||
"""Print generation result in LLM-friendly format."""
|
||||
print(f"\n=== RESULTS ===\n")
|
||||
print(f"[{task}]")
|
||||
print(f"Status: {result.get('status', 'unknown')}")
|
||||
|
||||
if result.get('status') == 'success':
|
||||
if 'generated_images' in result:
|
||||
for img in result['generated_images']:
|
||||
print(f"Generated image: {img}")
|
||||
if 'generated_video' in result:
|
||||
print(f"Generated video: {result['generated_video']}")
|
||||
if 'generation_time' in result:
|
||||
print(f"Generation time: {result['generation_time']:.1f}s")
|
||||
if 'generated_audio' in result:
|
||||
print(f"Generated audio: {result['generated_audio']}")
|
||||
if 'duration_ms' in result:
|
||||
dur = result['duration_ms'] / 1000
|
||||
print(f"Duration: {dur:.1f}s")
|
||||
elif result.get('error'):
|
||||
print(f"Error: {result['error']}")
|
||||
|
||||
print(f"\nModel: {result.get('model', 'unknown')}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
278
.opencode/skills/ai-multimodal/scripts/minimax_generate.py
Normal file
278
.opencode/skills/ai-multimodal/scripts/minimax_generate.py
Normal file
@@ -0,0 +1,278 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
MiniMax generation CLI - image, video, speech, and music generation.
|
||||
|
||||
Models:
|
||||
- Image: image-01, image-01-live
|
||||
- Video: MiniMax-Hailuo-2.3, MiniMax-Hailuo-2.3-Fast, MiniMax-Hailuo-02, S2V-01
|
||||
- Speech: speech-2.8-hd, speech-2.8-turbo, speech-2.6-hd, speech-2.6-turbo
|
||||
- Music: music-2.5
|
||||
|
||||
Usage:
|
||||
python minimax_generate.py --task generate --prompt "A cat in space" --model image-01
|
||||
python minimax_generate.py --task generate-video --prompt "A dancer" --model MiniMax-Hailuo-2.3
|
||||
python minimax_generate.py --task generate-speech --text "Hello world" --model speech-2.8-hd
|
||||
python minimax_generate.py --task generate-music --lyrics "Verse 1..." --model music-2.5
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from minimax_api_client import (
|
||||
find_minimax_api_key, api_post, poll_async_task,
|
||||
download_file, get_output_dir
|
||||
)
|
||||
|
||||
# Model registries
|
||||
MINIMAX_IMAGE_MODELS = {'image-01', 'image-01-live'}
|
||||
MINIMAX_VIDEO_MODELS = {
|
||||
'MiniMax-Hailuo-2.3', 'MiniMax-Hailuo-2.3-Fast',
|
||||
'MiniMax-Hailuo-02', 'S2V-01'
|
||||
}
|
||||
MINIMAX_SPEECH_MODELS = {
|
||||
'speech-2.8-hd', 'speech-2.8-turbo',
|
||||
'speech-2.6-hd', 'speech-2.6-turbo',
|
||||
'speech-02-hd', 'speech-02-turbo'
|
||||
}
|
||||
MINIMAX_MUSIC_MODELS = {'music-2.5', 'music-2.0'}
|
||||
|
||||
ALL_MINIMAX_MODELS = (
|
||||
MINIMAX_IMAGE_MODELS | MINIMAX_VIDEO_MODELS |
|
||||
MINIMAX_SPEECH_MODELS | MINIMAX_MUSIC_MODELS
|
||||
)
|
||||
|
||||
|
||||
def is_minimax_model(model: str) -> bool:
|
||||
"""Check if model is a MiniMax model."""
|
||||
return (
|
||||
model in ALL_MINIMAX_MODELS or
|
||||
model.startswith('MiniMax-') or
|
||||
model.startswith('image-01') or
|
||||
model.startswith('speech-') or
|
||||
model.startswith('music-') or
|
||||
model.startswith('S2V-')
|
||||
)
|
||||
|
||||
|
||||
def generate_image(api_key: str, prompt: str, model: str = 'image-01',
|
||||
aspect_ratio: str = '1:1', num_images: int = 1,
|
||||
output: str = None, verbose: bool = False) -> dict:
|
||||
"""Generate image using MiniMax image-01 model."""
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"aspect_ratio": aspect_ratio,
|
||||
"n": min(num_images, 9),
|
||||
"response_format": "url",
|
||||
"prompt_optimizer": True
|
||||
}
|
||||
|
||||
if verbose:
|
||||
print(f"Generating {num_images} image(s) with {model}...")
|
||||
|
||||
result = api_post("image_generation", payload, api_key, verbose)
|
||||
|
||||
# Download images
|
||||
image_urls = result.get("data", {}).get("image_urls", [])
|
||||
if not image_urls:
|
||||
return {"status": "error", "error": "No images in response"}
|
||||
|
||||
output_dir = get_output_dir()
|
||||
saved_files = []
|
||||
import requests as req
|
||||
|
||||
for i, url in enumerate(image_urls):
|
||||
ts = int(time.time())
|
||||
fname = f"minimax_image_{ts}_{i}.png"
|
||||
fpath = output_dir / fname
|
||||
|
||||
resp = req.get(url, timeout=60)
|
||||
resp.raise_for_status()
|
||||
with open(fpath, 'wb') as f:
|
||||
f.write(resp.content)
|
||||
saved_files.append(str(fpath))
|
||||
|
||||
if verbose:
|
||||
print(f" Saved: {fpath}")
|
||||
|
||||
# Copy first image to output if specified
|
||||
if output and saved_files:
|
||||
Path(output).parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(saved_files[0], output)
|
||||
|
||||
return {"status": "success", "generated_images": saved_files, "model": model}
|
||||
|
||||
|
||||
def generate_video(api_key: str, prompt: str, model: str = 'MiniMax-Hailuo-2.3',
|
||||
duration: int = 6, resolution: str = '1080P',
|
||||
first_frame: str = None, output: str = None,
|
||||
verbose: bool = False) -> dict:
|
||||
"""Generate video using MiniMax Hailuo models (async)."""
|
||||
payload = {
|
||||
"prompt": prompt,
|
||||
"model": model,
|
||||
"duration": duration,
|
||||
"resolution": resolution
|
||||
}
|
||||
if first_frame:
|
||||
payload["first_frame_image"] = first_frame
|
||||
|
||||
if verbose:
|
||||
print(f"Submitting video generation with {model}...")
|
||||
|
||||
result = api_post("video_generation", payload, api_key, verbose)
|
||||
task_id = result.get("task_id")
|
||||
if not task_id:
|
||||
return {"status": "error", "error": f"No task_id: {json.dumps(result)}"}
|
||||
|
||||
if verbose:
|
||||
print(f" Task ID: {task_id}, polling...")
|
||||
|
||||
start = time.time()
|
||||
poll_result = poll_async_task(task_id, "video_generation", api_key,
|
||||
poll_interval=10, verbose=verbose)
|
||||
|
||||
file_id = poll_result.get("file_id")
|
||||
if not file_id:
|
||||
return {"status": "error", "error": f"No file_id: {json.dumps(poll_result)}"}
|
||||
|
||||
output_dir = get_output_dir()
|
||||
ts = int(time.time())
|
||||
output_path = str(output_dir / f"minimax_video_{ts}.mp4")
|
||||
download_file(file_id, api_key, output_path, verbose)
|
||||
|
||||
elapsed = time.time() - start
|
||||
file_size = Path(output_path).stat().st_size / (1024 * 1024)
|
||||
|
||||
if output:
|
||||
Path(output).parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(output_path, output)
|
||||
|
||||
if verbose:
|
||||
print(f" Generated in {elapsed:.1f}s, size: {file_size:.2f} MB")
|
||||
|
||||
return {
|
||||
"status": "success", "generated_video": output_path,
|
||||
"generation_time": elapsed, "file_size_mb": file_size, "model": model
|
||||
}
|
||||
|
||||
|
||||
def generate_speech(api_key: str, text: str, model: str = 'speech-2.8-hd',
|
||||
voice: str = 'English_expressive_narrator',
|
||||
emotion: str = 'neutral', output_format: str = 'mp3',
|
||||
rate: float = 1.0, output: str = None,
|
||||
verbose: bool = False) -> dict:
|
||||
"""Generate speech using MiniMax TTS v2 API."""
|
||||
payload = {
|
||||
"model": model,
|
||||
"text": text[:10000],
|
||||
"stream": False,
|
||||
"language_boost": "auto",
|
||||
"output_format": "hex",
|
||||
"voice_setting": {
|
||||
"voice_id": voice,
|
||||
"speed": rate,
|
||||
"vol": 1.0,
|
||||
"pitch": 0
|
||||
},
|
||||
"audio_setting": {
|
||||
"sample_rate": 32000,
|
||||
"bitrate": 128000,
|
||||
"format": output_format,
|
||||
"channel": 1
|
||||
}
|
||||
}
|
||||
|
||||
if verbose:
|
||||
print(f"Generating speech with {model}, voice: {voice}...")
|
||||
|
||||
result = api_post("t2a_v2", payload, api_key, verbose)
|
||||
|
||||
audio_data = result.get("data", {}).get("audio")
|
||||
if not audio_data:
|
||||
return {"status": "error", "error": "No audio in response"}
|
||||
|
||||
output_dir = get_output_dir()
|
||||
ts = int(time.time())
|
||||
ext = output_format if output_format in ('mp3', 'wav', 'flac') else 'mp3'
|
||||
output_path = str(output_dir / f"minimax_speech_{ts}.{ext}")
|
||||
|
||||
# Audio returned as hex-encoded string from t2a_v2
|
||||
audio_bytes = bytes.fromhex(audio_data)
|
||||
with open(output_path, 'wb') as f:
|
||||
f.write(audio_bytes)
|
||||
|
||||
if output:
|
||||
Path(output).parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(output_path, output)
|
||||
|
||||
if verbose:
|
||||
size_kb = len(audio_bytes) / 1024
|
||||
print(f" Saved: {output_path} ({size_kb:.1f} KB)")
|
||||
|
||||
return {"status": "success", "generated_audio": output_path, "model": model}
|
||||
|
||||
|
||||
def generate_music(api_key: str, lyrics: str = '', prompt: str = '',
|
||||
model: str = 'music-2.5', output_format: str = 'mp3',
|
||||
output: str = None, verbose: bool = False) -> dict:
|
||||
"""Generate music using MiniMax music models."""
|
||||
payload = {
|
||||
"model": model,
|
||||
"output_format": "url",
|
||||
"audio_setting": {
|
||||
"sample_rate": 44100,
|
||||
"bitrate": 128000,
|
||||
"format": output_format
|
||||
}
|
||||
}
|
||||
if lyrics:
|
||||
payload["lyrics"] = lyrics[:3500]
|
||||
if prompt:
|
||||
payload["prompt"] = prompt[:2000]
|
||||
|
||||
if verbose:
|
||||
print(f"Generating music with {model}...")
|
||||
|
||||
result = api_post("music_generation", payload, api_key, verbose, timeout=300)
|
||||
|
||||
audio_data = result.get("data", {}).get("audio")
|
||||
extra = result.get("extra_info", {})
|
||||
duration_ms = extra.get("music_duration", 0)
|
||||
|
||||
if not audio_data:
|
||||
return {"status": "error", "error": "No audio in response"}
|
||||
|
||||
output_dir = get_output_dir()
|
||||
ts = int(time.time())
|
||||
output_path = str(output_dir / f"minimax_music_{ts}.{output_format}")
|
||||
|
||||
# Download from URL or decode hex
|
||||
if audio_data.startswith("http"):
|
||||
import requests as req
|
||||
resp = req.get(audio_data, timeout=120)
|
||||
resp.raise_for_status()
|
||||
with open(output_path, 'wb') as f:
|
||||
f.write(resp.content)
|
||||
else:
|
||||
audio_bytes = bytes.fromhex(audio_data)
|
||||
with open(output_path, 'wb') as f:
|
||||
f.write(audio_bytes)
|
||||
|
||||
if output:
|
||||
Path(output).parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(output_path, output)
|
||||
|
||||
if verbose:
|
||||
dur_s = duration_ms / 1000 if duration_ms else 0
|
||||
print(f" Saved: {output_path} ({dur_s:.1f}s)")
|
||||
|
||||
return {
|
||||
"status": "success", "generated_audio": output_path,
|
||||
"duration_ms": duration_ms, "model": model
|
||||
}
|
||||
26
.opencode/skills/ai-multimodal/scripts/requirements.txt
Normal file
26
.opencode/skills/ai-multimodal/scripts/requirements.txt
Normal file
@@ -0,0 +1,26 @@
|
||||
# AI Multimodal Skill Dependencies
|
||||
# Python 3.10+ required
|
||||
|
||||
# Google Gemini API
|
||||
google-genai>=0.1.0
|
||||
|
||||
# PDF processing
|
||||
pypdf>=4.0.0
|
||||
|
||||
# Document conversion
|
||||
python-docx>=1.0.0
|
||||
docx2pdf>=0.1.8 # Windows only, optional on Linux/macOS
|
||||
|
||||
# Markdown processing
|
||||
markdown>=3.5.0
|
||||
|
||||
# Image processing
|
||||
Pillow>=10.0.0
|
||||
|
||||
# Environment variable management
|
||||
python-dotenv>=1.0.0
|
||||
|
||||
# Testing dependencies (dev)
|
||||
pytest>=8.0.0
|
||||
pytest-cov>=4.1.0
|
||||
pytest-mock>=3.12.0
|
||||
BIN
.opencode/skills/ai-multimodal/scripts/tests/.coverage
Normal file
BIN
.opencode/skills/ai-multimodal/scripts/tests/.coverage
Normal file
Binary file not shown.
@@ -0,0 +1,20 @@
|
||||
# Core dependencies
|
||||
google-genai>=0.2.0
|
||||
python-dotenv>=1.0.0
|
||||
|
||||
# Image processing
|
||||
pillow>=10.0.0
|
||||
|
||||
# PDF processing
|
||||
pypdf>=3.0.0
|
||||
|
||||
# Document conversion
|
||||
markdown>=3.5
|
||||
|
||||
# Testing
|
||||
pytest>=7.4.0
|
||||
pytest-cov>=4.1.0
|
||||
pytest-mock>=3.12.0
|
||||
|
||||
# Optional dependencies for full functionality
|
||||
# ffmpeg-python>=0.2.0 # For media optimization (requires ffmpeg installed)
|
||||
@@ -0,0 +1,74 @@
|
||||
"""
|
||||
Tests for document_converter.py
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock, mock_open
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import document_converter as dc
|
||||
|
||||
|
||||
class TestAPIKeyFinder:
|
||||
"""Test API key finding logic."""
|
||||
|
||||
@patch.dict('os.environ', {'GEMINI_API_KEY': 'test-key-from-env'})
|
||||
def test_find_api_key_from_env(self):
|
||||
"""Test finding API key from environment."""
|
||||
api_key = dc.find_api_key()
|
||||
assert api_key == 'test-key-from-env'
|
||||
|
||||
@patch.dict('os.environ', {}, clear=True)
|
||||
@patch('document_converter.load_dotenv', None)
|
||||
def test_find_api_key_no_key(self):
|
||||
"""Test when no API key is available."""
|
||||
api_key = dc.find_api_key()
|
||||
assert api_key is None
|
||||
|
||||
|
||||
class TestProjectRoot:
|
||||
"""Test project root finding."""
|
||||
|
||||
@patch('pathlib.Path.exists')
|
||||
def test_find_project_root_with_git(self, mock_exists):
|
||||
"""Test finding project root with .git directory."""
|
||||
root = dc.find_project_root()
|
||||
assert isinstance(root, Path)
|
||||
|
||||
|
||||
class TestMimeType:
|
||||
"""Test MIME type detection."""
|
||||
|
||||
def test_pdf_mime_type(self):
|
||||
"""Test PDF MIME type."""
|
||||
assert dc.get_mime_type('document.pdf') == 'application/pdf'
|
||||
|
||||
def test_image_mime_types(self):
|
||||
"""Test image MIME types."""
|
||||
assert dc.get_mime_type('image.jpg') == 'image/jpeg'
|
||||
assert dc.get_mime_type('image.png') == 'image/png'
|
||||
|
||||
def test_unknown_mime_type(self):
|
||||
"""Test unknown file extension."""
|
||||
assert dc.get_mime_type('file.unknown') == 'application/octet-stream'
|
||||
|
||||
|
||||
class TestIntegration:
|
||||
"""Integration tests."""
|
||||
|
||||
def test_mime_type_integration(self):
|
||||
"""Test MIME type detection with various extensions."""
|
||||
test_cases = [
|
||||
('document.pdf', 'application/pdf'),
|
||||
('image.jpg', 'image/jpeg'),
|
||||
('unknown.xyz', 'application/octet-stream'),
|
||||
]
|
||||
for file_path, expected_mime in test_cases:
|
||||
assert dc.get_mime_type(file_path) == expected_mime
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v', '--cov=document_converter', '--cov-report=term-missing'])
|
||||
@@ -0,0 +1,362 @@
|
||||
"""
|
||||
Tests for gemini_batch_process.py
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import gemini_batch_process as gbp
|
||||
|
||||
|
||||
class TestAPIKeyFinder:
|
||||
"""Test API key detection."""
|
||||
|
||||
def test_find_api_key_from_env(self, monkeypatch):
|
||||
"""Test finding API key from environment variable."""
|
||||
monkeypatch.setenv('GEMINI_API_KEY', 'test_key_123')
|
||||
assert gbp.find_api_key() == 'test_key_123'
|
||||
|
||||
@patch('gemini_batch_process.load_dotenv')
|
||||
def test_find_api_key_not_found(self, mock_load_dotenv, monkeypatch):
|
||||
"""Test when API key is not found."""
|
||||
monkeypatch.delenv('GEMINI_API_KEY', raising=False)
|
||||
# Mock load_dotenv to not actually load any files
|
||||
mock_load_dotenv.return_value = None
|
||||
assert gbp.find_api_key() is None
|
||||
|
||||
|
||||
class TestMimeTypeDetection:
|
||||
"""Test MIME type detection."""
|
||||
|
||||
def test_audio_mime_types(self):
|
||||
"""Test audio file MIME types."""
|
||||
assert gbp.get_mime_type('test.mp3') == 'audio/mp3'
|
||||
assert gbp.get_mime_type('test.wav') == 'audio/wav'
|
||||
assert gbp.get_mime_type('test.aac') == 'audio/aac'
|
||||
assert gbp.get_mime_type('test.flac') == 'audio/flac'
|
||||
|
||||
def test_image_mime_types(self):
|
||||
"""Test image file MIME types."""
|
||||
assert gbp.get_mime_type('test.jpg') == 'image/jpeg'
|
||||
assert gbp.get_mime_type('test.jpeg') == 'image/jpeg'
|
||||
assert gbp.get_mime_type('test.png') == 'image/png'
|
||||
assert gbp.get_mime_type('test.webp') == 'image/webp'
|
||||
|
||||
def test_video_mime_types(self):
|
||||
"""Test video file MIME types."""
|
||||
assert gbp.get_mime_type('test.mp4') == 'video/mp4'
|
||||
assert gbp.get_mime_type('test.mov') == 'video/quicktime'
|
||||
assert gbp.get_mime_type('test.avi') == 'video/x-msvideo'
|
||||
|
||||
def test_document_mime_types(self):
|
||||
"""Test document file MIME types."""
|
||||
assert gbp.get_mime_type('test.pdf') == 'application/pdf'
|
||||
assert gbp.get_mime_type('test.txt') == 'text/plain'
|
||||
|
||||
def test_unknown_mime_type(self):
|
||||
"""Test unknown file extension."""
|
||||
assert gbp.get_mime_type('test.xyz') == 'application/octet-stream'
|
||||
|
||||
def test_case_insensitive(self):
|
||||
"""Test case-insensitive extension matching."""
|
||||
assert gbp.get_mime_type('TEST.MP3') == 'audio/mp3'
|
||||
assert gbp.get_mime_type('Test.JPG') == 'image/jpeg'
|
||||
|
||||
|
||||
class TestFileUpload:
|
||||
"""Test file upload functionality."""
|
||||
|
||||
@patch('gemini_batch_process.genai.Client')
|
||||
def test_upload_file_success(self, mock_client_class):
|
||||
"""Test successful file upload."""
|
||||
# Mock client and file
|
||||
mock_client = Mock()
|
||||
mock_file = Mock()
|
||||
mock_file.state.name = 'ACTIVE'
|
||||
mock_file.name = 'test_file'
|
||||
mock_client.files.upload.return_value = mock_file
|
||||
|
||||
result = gbp.upload_file(mock_client, 'test.jpg', verbose=False)
|
||||
|
||||
assert result == mock_file
|
||||
mock_client.files.upload.assert_called_once_with(file='test.jpg')
|
||||
|
||||
@patch('gemini_batch_process.genai.Client')
|
||||
@patch('gemini_batch_process.time.sleep')
|
||||
def test_upload_video_with_processing(self, mock_sleep, mock_client_class):
|
||||
"""Test video upload with processing wait."""
|
||||
mock_client = Mock()
|
||||
|
||||
# First call: PROCESSING, second call: ACTIVE
|
||||
mock_file_processing = Mock()
|
||||
mock_file_processing.state.name = 'PROCESSING'
|
||||
mock_file_processing.name = 'test_video'
|
||||
|
||||
mock_file_active = Mock()
|
||||
mock_file_active.state.name = 'ACTIVE'
|
||||
mock_file_active.name = 'test_video'
|
||||
|
||||
mock_client.files.upload.return_value = mock_file_processing
|
||||
mock_client.files.get.return_value = mock_file_active
|
||||
|
||||
result = gbp.upload_file(mock_client, 'test.mp4', verbose=False)
|
||||
|
||||
assert result.state.name == 'ACTIVE'
|
||||
|
||||
@patch('gemini_batch_process.genai.Client')
|
||||
def test_upload_file_failed(self, mock_client_class):
|
||||
"""Test failed file upload."""
|
||||
mock_client = Mock()
|
||||
mock_file = Mock()
|
||||
mock_file.state.name = 'FAILED'
|
||||
mock_client.files.upload.return_value = mock_file
|
||||
mock_client.files.get.return_value = mock_file
|
||||
|
||||
with pytest.raises(ValueError, match="File processing failed"):
|
||||
gbp.upload_file(mock_client, 'test.mp4', verbose=False)
|
||||
|
||||
|
||||
class TestProcessFile:
|
||||
"""Test file processing functionality."""
|
||||
|
||||
@patch('gemini_batch_process.genai.Client')
|
||||
@patch('builtins.open', create=True)
|
||||
@patch('pathlib.Path.stat')
|
||||
def test_process_small_file_inline(self, mock_stat, mock_open, mock_client_class):
|
||||
"""Test processing small file with inline data."""
|
||||
# Mock small file
|
||||
mock_stat.return_value.st_size = 10 * 1024 * 1024 # 10MB
|
||||
|
||||
# Mock file content
|
||||
mock_open.return_value.__enter__.return_value.read.return_value = b'test_data'
|
||||
|
||||
# Mock client and response
|
||||
mock_client = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.text = 'Test response'
|
||||
mock_client.models.generate_content.return_value = mock_response
|
||||
|
||||
result = gbp.process_file(
|
||||
client=mock_client,
|
||||
file_path='test.jpg',
|
||||
prompt='Describe this image',
|
||||
model='gemini-2.5-flash',
|
||||
task='analyze',
|
||||
format_output='text',
|
||||
verbose=False
|
||||
)
|
||||
|
||||
assert result['status'] == 'success'
|
||||
assert result['response'] == 'Test response'
|
||||
|
||||
@patch('gemini_batch_process.upload_file')
|
||||
@patch('gemini_batch_process.genai.Client')
|
||||
@patch('pathlib.Path.stat')
|
||||
def test_process_large_file_api(self, mock_stat, mock_client_class, mock_upload):
|
||||
"""Test processing large file with File API."""
|
||||
# Mock large file
|
||||
mock_stat.return_value.st_size = 50 * 1024 * 1024 # 50MB
|
||||
|
||||
# Mock upload and response
|
||||
mock_file = Mock()
|
||||
mock_upload.return_value = mock_file
|
||||
|
||||
mock_client = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.text = 'Test response'
|
||||
mock_client.models.generate_content.return_value = mock_response
|
||||
|
||||
result = gbp.process_file(
|
||||
client=mock_client,
|
||||
file_path='test.mp4',
|
||||
prompt='Summarize this video',
|
||||
model='gemini-2.5-flash',
|
||||
task='analyze',
|
||||
format_output='text',
|
||||
verbose=False
|
||||
)
|
||||
|
||||
assert result['status'] == 'success'
|
||||
mock_upload.assert_called_once()
|
||||
|
||||
@patch('gemini_batch_process.genai.Client')
|
||||
@patch('builtins.open', create=True)
|
||||
@patch('pathlib.Path.stat')
|
||||
def test_process_file_error_handling(self, mock_stat, mock_open, mock_client_class):
|
||||
"""Test error handling in file processing."""
|
||||
mock_stat.return_value.st_size = 1024
|
||||
|
||||
# Mock file read
|
||||
mock_file = MagicMock()
|
||||
mock_file.__enter__.return_value.read.return_value = b'test_data'
|
||||
mock_open.return_value = mock_file
|
||||
|
||||
mock_client = Mock()
|
||||
mock_client.models.generate_content.side_effect = Exception("API Error")
|
||||
|
||||
result = gbp.process_file(
|
||||
client=mock_client,
|
||||
file_path='test.jpg',
|
||||
prompt='Test',
|
||||
model='gemini-2.5-flash',
|
||||
task='analyze',
|
||||
format_output='text',
|
||||
verbose=False,
|
||||
max_retries=1
|
||||
)
|
||||
|
||||
assert result['status'] == 'error'
|
||||
assert 'API Error' in result['error']
|
||||
|
||||
@patch('gemini_batch_process.genai.Client')
|
||||
@patch('builtins.open', create=True)
|
||||
@patch('pathlib.Path.stat')
|
||||
def test_image_generation_with_aspect_ratio(self, mock_stat, mock_open, mock_client_class):
|
||||
"""Test image generation with aspect ratio config."""
|
||||
mock_stat.return_value.st_size = 1024
|
||||
|
||||
# Mock file read
|
||||
mock_file = MagicMock()
|
||||
mock_file.__enter__.return_value.read.return_value = b'test'
|
||||
mock_open.return_value = mock_file
|
||||
|
||||
mock_client = Mock()
|
||||
mock_response = Mock()
|
||||
mock_response.candidates = [Mock()]
|
||||
mock_response.candidates[0].content.parts = [
|
||||
Mock(inline_data=Mock(data=b'fake_image_data'))
|
||||
]
|
||||
mock_client.models.generate_content.return_value = mock_response
|
||||
|
||||
result = gbp.process_file(
|
||||
client=mock_client,
|
||||
file_path='test.txt',
|
||||
prompt='Generate mountain landscape',
|
||||
model='gemini-2.5-flash-image',
|
||||
task='generate',
|
||||
format_output='text',
|
||||
aspect_ratio='16:9',
|
||||
verbose=False
|
||||
)
|
||||
|
||||
# Verify config was called with correct structure
|
||||
call_args = mock_client.models.generate_content.call_args
|
||||
config = call_args.kwargs.get('config')
|
||||
assert config is not None
|
||||
assert result['status'] == 'success'
|
||||
assert 'generated_image' in result
|
||||
|
||||
|
||||
class TestBatchProcessing:
|
||||
"""Test batch processing functionality."""
|
||||
|
||||
@patch('gemini_batch_process.find_api_key')
|
||||
@patch('gemini_batch_process.process_file')
|
||||
@patch('gemini_batch_process.genai.Client')
|
||||
def test_batch_process_success(self, mock_client_class, mock_process, mock_find_key):
|
||||
"""Test successful batch processing."""
|
||||
mock_find_key.return_value = 'test_key'
|
||||
mock_process.return_value = {'status': 'success', 'response': 'Test'}
|
||||
|
||||
results = gbp.batch_process(
|
||||
files=['test1.jpg', 'test2.jpg'],
|
||||
prompt='Analyze',
|
||||
model='gemini-2.5-flash',
|
||||
task='analyze',
|
||||
format_output='text',
|
||||
verbose=False,
|
||||
dry_run=False
|
||||
)
|
||||
|
||||
assert len(results) == 2
|
||||
assert all(r['status'] == 'success' for r in results)
|
||||
|
||||
@patch('gemini_batch_process.find_api_key')
|
||||
def test_batch_process_no_api_key(self, mock_find_key):
|
||||
"""Test batch processing without API key."""
|
||||
mock_find_key.return_value = None
|
||||
|
||||
with pytest.raises(SystemExit):
|
||||
gbp.batch_process(
|
||||
files=['test.jpg'],
|
||||
prompt='Test',
|
||||
model='gemini-2.5-flash',
|
||||
task='analyze',
|
||||
format_output='text',
|
||||
verbose=False,
|
||||
dry_run=False
|
||||
)
|
||||
|
||||
@patch('gemini_batch_process.find_api_key')
|
||||
def test_batch_process_dry_run(self, mock_find_key):
|
||||
"""Test dry run mode."""
|
||||
# API key not needed for dry run, but we mock it to avoid sys.exit
|
||||
mock_find_key.return_value = 'test_key'
|
||||
|
||||
results = gbp.batch_process(
|
||||
files=['test1.jpg', 'test2.jpg'],
|
||||
prompt='Test',
|
||||
model='gemini-2.5-flash',
|
||||
task='analyze',
|
||||
format_output='text',
|
||||
verbose=False,
|
||||
dry_run=True
|
||||
)
|
||||
|
||||
assert results == []
|
||||
|
||||
|
||||
class TestResultsSaving:
|
||||
"""Test results saving functionality."""
|
||||
|
||||
@patch('builtins.open', create=True)
|
||||
@patch('json.dump')
|
||||
def test_save_results_json(self, mock_json_dump, mock_open):
|
||||
"""Test saving results as JSON."""
|
||||
results = [
|
||||
{'file': 'test1.jpg', 'status': 'success', 'response': 'Test1'},
|
||||
{'file': 'test2.jpg', 'status': 'success', 'response': 'Test2'}
|
||||
]
|
||||
|
||||
gbp.save_results(results, 'output.json', 'json')
|
||||
|
||||
mock_json_dump.assert_called_once()
|
||||
|
||||
@patch('builtins.open', create=True)
|
||||
@patch('csv.DictWriter')
|
||||
def test_save_results_csv(self, mock_csv_writer, mock_open):
|
||||
"""Test saving results as CSV."""
|
||||
results = [
|
||||
{'file': 'test1.jpg', 'status': 'success', 'response': 'Test1'},
|
||||
{'file': 'test2.jpg', 'status': 'success', 'response': 'Test2'}
|
||||
]
|
||||
|
||||
gbp.save_results(results, 'output.csv', 'csv')
|
||||
|
||||
# Verify CSV writer was used
|
||||
mock_csv_writer.assert_called_once()
|
||||
|
||||
@patch('builtins.open', create=True)
|
||||
def test_save_results_markdown(self, mock_open):
|
||||
"""Test saving results as Markdown."""
|
||||
mock_file = MagicMock()
|
||||
mock_open.return_value.__enter__.return_value = mock_file
|
||||
|
||||
results = [
|
||||
{'file': 'test1.jpg', 'status': 'success', 'response': 'Test1'},
|
||||
{'file': 'test2.jpg', 'status': 'error', 'error': 'Failed'}
|
||||
]
|
||||
|
||||
gbp.save_results(results, 'output.md', 'markdown')
|
||||
|
||||
# Verify write was called
|
||||
assert mock_file.write.call_count > 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v', '--cov=gemini_batch_process', '--cov-report=term-missing'])
|
||||
@@ -0,0 +1,373 @@
|
||||
"""
|
||||
Tests for media_optimizer.py
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
import json
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import media_optimizer as mo
|
||||
|
||||
|
||||
class TestEnvLoading:
|
||||
"""Test environment variable loading."""
|
||||
|
||||
@patch('media_optimizer.load_dotenv')
|
||||
@patch('pathlib.Path.exists')
|
||||
def test_load_env_files_success(self, mock_exists, mock_load_dotenv):
|
||||
"""Test successful .env file loading."""
|
||||
mock_exists.return_value = True
|
||||
mo.load_env_files()
|
||||
# Should be called for skill, skills, and claude dirs
|
||||
assert mock_load_dotenv.call_count >= 1
|
||||
|
||||
@patch('media_optimizer.load_dotenv', None)
|
||||
def test_load_env_files_no_dotenv(self):
|
||||
"""Test when dotenv is not available."""
|
||||
# Should not raise an error
|
||||
mo.load_env_files()
|
||||
|
||||
|
||||
class TestFFmpegCheck:
|
||||
"""Test ffmpeg availability checking."""
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_ffmpeg_installed(self, mock_run):
|
||||
"""Test when ffmpeg is installed."""
|
||||
mock_run.return_value = Mock()
|
||||
assert mo.check_ffmpeg() is True
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_ffmpeg_not_installed(self, mock_run):
|
||||
"""Test when ffmpeg is not installed."""
|
||||
mock_run.side_effect = FileNotFoundError()
|
||||
assert mo.check_ffmpeg() is False
|
||||
|
||||
@patch('subprocess.run')
|
||||
def test_ffmpeg_error(self, mock_run):
|
||||
"""Test ffmpeg command error."""
|
||||
mock_run.side_effect = Exception("Error")
|
||||
assert mo.check_ffmpeg() is False
|
||||
|
||||
|
||||
class TestMediaInfo:
|
||||
"""Test media information extraction."""
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
@patch('subprocess.run')
|
||||
def test_get_video_info(self, mock_run, mock_check):
|
||||
"""Test extracting video information."""
|
||||
mock_check.return_value = True
|
||||
|
||||
mock_result = Mock()
|
||||
mock_result.stdout = json.dumps({
|
||||
'format': {
|
||||
'size': '10485760',
|
||||
'duration': '120.5',
|
||||
'bit_rate': '691200'
|
||||
},
|
||||
'streams': [
|
||||
{
|
||||
'codec_type': 'video',
|
||||
'width': 1920,
|
||||
'height': 1080,
|
||||
'r_frame_rate': '30/1'
|
||||
},
|
||||
{
|
||||
'codec_type': 'audio',
|
||||
'sample_rate': '48000',
|
||||
'channels': 2
|
||||
}
|
||||
]
|
||||
})
|
||||
mock_run.return_value = mock_result
|
||||
|
||||
info = mo.get_media_info('test.mp4')
|
||||
|
||||
assert info['size'] == 10485760
|
||||
assert info['duration'] == 120.5
|
||||
assert info['width'] == 1920
|
||||
assert info['height'] == 1080
|
||||
assert info['sample_rate'] == 48000
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
def test_get_media_info_no_ffmpeg(self, mock_check):
|
||||
"""Test when ffmpeg is not available."""
|
||||
mock_check.return_value = False
|
||||
info = mo.get_media_info('test.mp4')
|
||||
assert info == {}
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
@patch('subprocess.run')
|
||||
def test_get_media_info_error(self, mock_run, mock_check):
|
||||
"""Test error handling in media info extraction."""
|
||||
mock_check.return_value = True
|
||||
mock_run.side_effect = Exception("Error")
|
||||
|
||||
info = mo.get_media_info('test.mp4')
|
||||
assert info == {}
|
||||
|
||||
|
||||
class TestVideoOptimization:
|
||||
"""Test video optimization functionality."""
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
@patch('media_optimizer.get_media_info')
|
||||
@patch('subprocess.run')
|
||||
def test_optimize_video_success(self, mock_run, mock_info, mock_check):
|
||||
"""Test successful video optimization."""
|
||||
mock_check.return_value = True
|
||||
mock_info.side_effect = [
|
||||
# Input info
|
||||
{
|
||||
'size': 50 * 1024 * 1024,
|
||||
'duration': 120.0,
|
||||
'bit_rate': 3500000,
|
||||
'width': 1920,
|
||||
'height': 1080
|
||||
},
|
||||
# Output info
|
||||
{
|
||||
'size': 25 * 1024 * 1024,
|
||||
'duration': 120.0,
|
||||
'width': 1920,
|
||||
'height': 1080
|
||||
}
|
||||
]
|
||||
|
||||
result = mo.optimize_video(
|
||||
'input.mp4',
|
||||
'output.mp4',
|
||||
quality=23,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
assert result is True
|
||||
mock_run.assert_called_once()
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
def test_optimize_video_no_ffmpeg(self, mock_check):
|
||||
"""Test video optimization without ffmpeg."""
|
||||
mock_check.return_value = False
|
||||
|
||||
result = mo.optimize_video('input.mp4', 'output.mp4')
|
||||
assert result is False
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
@patch('media_optimizer.get_media_info')
|
||||
def test_optimize_video_no_info(self, mock_info, mock_check):
|
||||
"""Test video optimization when info cannot be read."""
|
||||
mock_check.return_value = True
|
||||
mock_info.return_value = {}
|
||||
|
||||
result = mo.optimize_video('input.mp4', 'output.mp4')
|
||||
assert result is False
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
@patch('media_optimizer.get_media_info')
|
||||
@patch('subprocess.run')
|
||||
def test_optimize_video_with_target_size(self, mock_run, mock_info, mock_check):
|
||||
"""Test video optimization with target size."""
|
||||
mock_check.return_value = True
|
||||
mock_info.side_effect = [
|
||||
{'size': 100 * 1024 * 1024, 'duration': 60.0, 'bit_rate': 3500000},
|
||||
{'size': 50 * 1024 * 1024, 'duration': 60.0}
|
||||
]
|
||||
|
||||
result = mo.optimize_video(
|
||||
'input.mp4',
|
||||
'output.mp4',
|
||||
target_size_mb=50,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
assert result is True
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
@patch('media_optimizer.get_media_info')
|
||||
@patch('subprocess.run')
|
||||
def test_optimize_video_with_resolution(self, mock_run, mock_info, mock_check):
|
||||
"""Test video optimization with custom resolution."""
|
||||
mock_check.return_value = True
|
||||
mock_info.side_effect = [
|
||||
{'size': 50 * 1024 * 1024, 'duration': 120.0, 'bit_rate': 3500000},
|
||||
{'size': 25 * 1024 * 1024, 'duration': 120.0}
|
||||
]
|
||||
|
||||
result = mo.optimize_video(
|
||||
'input.mp4',
|
||||
'output.mp4',
|
||||
resolution='1280x720',
|
||||
verbose=False
|
||||
)
|
||||
|
||||
assert result is True
|
||||
|
||||
|
||||
class TestAudioOptimization:
|
||||
"""Test audio optimization functionality."""
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
@patch('media_optimizer.get_media_info')
|
||||
@patch('subprocess.run')
|
||||
def test_optimize_audio_success(self, mock_run, mock_info, mock_check):
|
||||
"""Test successful audio optimization."""
|
||||
mock_check.return_value = True
|
||||
mock_info.side_effect = [
|
||||
{'size': 10 * 1024 * 1024, 'duration': 300.0},
|
||||
{'size': 5 * 1024 * 1024, 'duration': 300.0}
|
||||
]
|
||||
|
||||
result = mo.optimize_audio(
|
||||
'input.mp3',
|
||||
'output.m4a',
|
||||
bitrate='64k',
|
||||
verbose=False
|
||||
)
|
||||
|
||||
assert result is True
|
||||
mock_run.assert_called_once()
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
def test_optimize_audio_no_ffmpeg(self, mock_check):
|
||||
"""Test audio optimization without ffmpeg."""
|
||||
mock_check.return_value = False
|
||||
|
||||
result = mo.optimize_audio('input.mp3', 'output.m4a')
|
||||
assert result is False
|
||||
|
||||
|
||||
class TestImageOptimization:
|
||||
"""Test image optimization functionality."""
|
||||
|
||||
@patch('PIL.Image.open')
|
||||
@patch('pathlib.Path.stat')
|
||||
def test_optimize_image_success(self, mock_stat, mock_image_open):
|
||||
"""Test successful image optimization."""
|
||||
# Mock image
|
||||
mock_resized = Mock()
|
||||
mock_resized.mode = 'RGB'
|
||||
|
||||
mock_img = Mock()
|
||||
mock_img.width = 3840
|
||||
mock_img.height = 2160
|
||||
mock_img.mode = 'RGB'
|
||||
mock_img.resize.return_value = mock_resized
|
||||
mock_image_open.return_value = mock_img
|
||||
|
||||
# Mock file sizes
|
||||
mock_stat.return_value.st_size = 5 * 1024 * 1024
|
||||
|
||||
result = mo.optimize_image(
|
||||
'input.jpg',
|
||||
'output.jpg',
|
||||
max_width=1920,
|
||||
quality=85,
|
||||
verbose=False
|
||||
)
|
||||
|
||||
assert result is True
|
||||
# Since image is resized, save is called on the resized image
|
||||
mock_resized.save.assert_called_once()
|
||||
|
||||
@patch('PIL.Image.open')
|
||||
@patch('pathlib.Path.stat')
|
||||
def test_optimize_image_resize(self, mock_stat, mock_image_open):
|
||||
"""Test image resizing during optimization."""
|
||||
mock_img = Mock()
|
||||
mock_img.width = 3840
|
||||
mock_img.height = 2160
|
||||
mock_img.mode = 'RGB'
|
||||
mock_resized = Mock()
|
||||
mock_img.resize.return_value = mock_resized
|
||||
mock_image_open.return_value = mock_img
|
||||
|
||||
mock_stat.return_value.st_size = 5 * 1024 * 1024
|
||||
|
||||
mo.optimize_image('input.jpg', 'output.jpg', max_width=1920, verbose=False)
|
||||
|
||||
mock_img.resize.assert_called_once()
|
||||
|
||||
@patch('PIL.Image.open')
|
||||
@patch('pathlib.Path.stat')
|
||||
def test_optimize_image_rgba_to_jpg(self, mock_stat, mock_image_open):
|
||||
"""Test converting RGBA to RGB for JPEG."""
|
||||
mock_img = Mock()
|
||||
mock_img.width = 1920
|
||||
mock_img.height = 1080
|
||||
mock_img.mode = 'RGBA'
|
||||
mock_img.split.return_value = [Mock(), Mock(), Mock(), Mock()]
|
||||
mock_image_open.return_value = mock_img
|
||||
|
||||
mock_stat.return_value.st_size = 1024 * 1024
|
||||
|
||||
with patch('PIL.Image.new') as mock_new:
|
||||
mock_rgb = Mock()
|
||||
mock_new.return_value = mock_rgb
|
||||
|
||||
mo.optimize_image('input.png', 'output.jpg', verbose=False)
|
||||
|
||||
mock_new.assert_called_once()
|
||||
|
||||
def test_optimize_image_no_pillow(self):
|
||||
"""Test image optimization without Pillow."""
|
||||
with patch.dict('sys.modules', {'PIL': None}):
|
||||
result = mo.optimize_image('input.jpg', 'output.jpg')
|
||||
# Will fail to import but function handles it
|
||||
assert result is False
|
||||
|
||||
|
||||
class TestVideoSplitting:
|
||||
"""Test video splitting functionality."""
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
@patch('media_optimizer.get_media_info')
|
||||
@patch('subprocess.run')
|
||||
@patch('pathlib.Path.mkdir')
|
||||
def test_split_video_success(self, mock_mkdir, mock_run, mock_info, mock_check):
|
||||
"""Test successful video splitting."""
|
||||
mock_check.return_value = True
|
||||
mock_info.return_value = {'duration': 7200.0} # 2 hours
|
||||
|
||||
result = mo.split_video(
|
||||
'input.mp4',
|
||||
'./chunks',
|
||||
chunk_duration=3600, # 1 hour chunks
|
||||
verbose=False
|
||||
)
|
||||
|
||||
# Duration 7200s / 3600s = 2, +1 for safety = 3 chunks
|
||||
assert len(result) == 3
|
||||
assert mock_run.call_count == 3
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
@patch('media_optimizer.get_media_info')
|
||||
def test_split_video_short_duration(self, mock_info, mock_check):
|
||||
"""Test splitting video shorter than chunk duration."""
|
||||
mock_check.return_value = True
|
||||
mock_info.return_value = {'duration': 1800.0} # 30 minutes
|
||||
|
||||
result = mo.split_video(
|
||||
'input.mp4',
|
||||
'./chunks',
|
||||
chunk_duration=3600, # 1 hour
|
||||
verbose=False
|
||||
)
|
||||
|
||||
assert result == ['input.mp4']
|
||||
|
||||
@patch('media_optimizer.check_ffmpeg')
|
||||
def test_split_video_no_ffmpeg(self, mock_check):
|
||||
"""Test video splitting without ffmpeg."""
|
||||
mock_check.return_value = False
|
||||
|
||||
result = mo.split_video('input.mp4', './chunks')
|
||||
assert result == []
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__, '-v', '--cov=media_optimizer', '--cov-report=term-missing'])
|
||||
@@ -0,0 +1,232 @@
|
||||
"""
|
||||
Tests for minimax_api_client.py - HTTP utilities, auth, polling, downloads.
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import minimax_api_client as mac
|
||||
|
||||
|
||||
class TestFindMinimaxApiKey:
|
||||
"""Test API key discovery."""
|
||||
|
||||
def test_find_key_from_env(self, monkeypatch):
|
||||
monkeypatch.setenv('MINIMAX_API_KEY', 'test-minimax-key')
|
||||
with patch.object(mac, 'CENTRALIZED_RESOLVER_AVAILABLE', False):
|
||||
assert mac.find_minimax_api_key() == 'test-minimax-key'
|
||||
|
||||
def test_find_key_not_found(self, monkeypatch):
|
||||
monkeypatch.delenv('MINIMAX_API_KEY', raising=False)
|
||||
with patch.object(mac, 'CENTRALIZED_RESOLVER_AVAILABLE', False):
|
||||
result = mac.find_minimax_api_key()
|
||||
assert result is None
|
||||
|
||||
def test_find_key_via_centralized_resolver(self, monkeypatch):
|
||||
mock_resolve = Mock(return_value='resolved-key')
|
||||
with patch.object(mac, 'CENTRALIZED_RESOLVER_AVAILABLE', True), \
|
||||
patch.object(mac, 'resolve_env', mock_resolve, create=True):
|
||||
result = mac.find_minimax_api_key()
|
||||
assert result == 'resolved-key'
|
||||
mock_resolve.assert_called_once_with(
|
||||
'MINIMAX_API_KEY', skill='ai-multimodal'
|
||||
)
|
||||
|
||||
|
||||
class TestGetHeaders:
|
||||
"""Test header generation."""
|
||||
|
||||
def test_headers_contain_bearer_token(self):
|
||||
headers = mac.get_headers('my-api-key')
|
||||
assert headers['Authorization'] == 'Bearer my-api-key'
|
||||
assert headers['Content-Type'] == 'application/json'
|
||||
|
||||
def test_headers_with_different_key(self):
|
||||
headers = mac.get_headers('another-key-123')
|
||||
assert 'another-key-123' in headers['Authorization']
|
||||
|
||||
|
||||
class TestApiPost:
|
||||
"""Test POST request handling."""
|
||||
|
||||
@patch('minimax_api_client.requests.post')
|
||||
def test_successful_post(self, mock_post):
|
||||
mock_resp = Mock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = {
|
||||
"base_resp": {"status_code": 0},
|
||||
"data": {"result": "ok"}
|
||||
}
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
result = mac.api_post("test_endpoint", {"key": "val"}, "api-key")
|
||||
assert result["data"]["result"] == "ok"
|
||||
mock_post.assert_called_once()
|
||||
|
||||
@patch('minimax_api_client.requests.post')
|
||||
def test_http_error_raises(self, mock_post):
|
||||
mock_resp = Mock()
|
||||
mock_resp.status_code = 401
|
||||
mock_resp.text = "Unauthorized"
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
with pytest.raises(Exception, match="HTTP 401"):
|
||||
mac.api_post("endpoint", {}, "bad-key")
|
||||
|
||||
@patch('minimax_api_client.requests.post')
|
||||
def test_minimax_error_code_raises(self, mock_post):
|
||||
mock_resp = Mock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = {
|
||||
"base_resp": {"status_code": 1002, "status_msg": "Rate limit"}
|
||||
}
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
with pytest.raises(Exception, match="code 1002.*Rate limit"):
|
||||
mac.api_post("endpoint", {}, "api-key")
|
||||
|
||||
@patch('minimax_api_client.requests.post')
|
||||
def test_custom_timeout(self, mock_post):
|
||||
mock_resp = Mock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = {"base_resp": {"status_code": 0}}
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
mac.api_post("endpoint", {}, "key", timeout=300)
|
||||
_, kwargs = mock_post.call_args
|
||||
assert kwargs['timeout'] == 300
|
||||
|
||||
@patch('minimax_api_client.requests.post')
|
||||
def test_default_timeout_is_120(self, mock_post):
|
||||
mock_resp = Mock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = {"base_resp": {"status_code": 0}}
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
mac.api_post("endpoint", {}, "key")
|
||||
_, kwargs = mock_post.call_args
|
||||
assert kwargs['timeout'] == 120
|
||||
|
||||
@patch('minimax_api_client.requests.post')
|
||||
def test_verbose_prints_url(self, mock_post, capsys):
|
||||
mock_resp = Mock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = {"base_resp": {"status_code": 0}}
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
mac.api_post("image_generation", {}, "key", verbose=True)
|
||||
captured = capsys.readouterr()
|
||||
assert "image_generation" in captured.err
|
||||
|
||||
|
||||
class TestApiGet:
|
||||
"""Test GET request handling."""
|
||||
|
||||
@patch('minimax_api_client.requests.get')
|
||||
def test_successful_get(self, mock_get):
|
||||
mock_resp = Mock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = {"status": "Success", "file_id": "abc"}
|
||||
mock_get.return_value = mock_resp
|
||||
|
||||
result = mac.api_get("query/video_generation", {"task_id": "t1"}, "key")
|
||||
assert result["status"] == "Success"
|
||||
|
||||
@patch('minimax_api_client.requests.get')
|
||||
def test_get_http_error(self, mock_get):
|
||||
mock_resp = Mock()
|
||||
mock_resp.status_code = 500
|
||||
mock_resp.text = "Server Error"
|
||||
mock_get.return_value = mock_resp
|
||||
|
||||
with pytest.raises(Exception, match="HTTP 500"):
|
||||
mac.api_get("endpoint", {}, "key")
|
||||
|
||||
|
||||
class TestPollAsyncTask:
|
||||
"""Test async task polling."""
|
||||
|
||||
@patch('minimax_api_client.time.sleep')
|
||||
@patch('minimax_api_client.api_get')
|
||||
def test_poll_success_first_try(self, mock_get, mock_sleep):
|
||||
mock_get.return_value = {"status": "Success", "file_id": "f123"}
|
||||
|
||||
result = mac.poll_async_task("task1", "video_generation", "key")
|
||||
assert result["file_id"] == "f123"
|
||||
mock_sleep.assert_not_called()
|
||||
|
||||
@patch('minimax_api_client.time.sleep')
|
||||
@patch('minimax_api_client.api_get')
|
||||
def test_poll_success_after_processing(self, mock_get, mock_sleep):
|
||||
mock_get.side_effect = [
|
||||
{"status": "Processing"},
|
||||
{"status": "Processing"},
|
||||
{"status": "Success", "file_id": "f456"}
|
||||
]
|
||||
|
||||
result = mac.poll_async_task("task2", "video_generation", "key",
|
||||
poll_interval=1)
|
||||
assert result["file_id"] == "f456"
|
||||
assert mock_sleep.call_count == 2
|
||||
|
||||
@patch('minimax_api_client.time.sleep')
|
||||
@patch('minimax_api_client.api_get')
|
||||
def test_poll_task_failed(self, mock_get, mock_sleep):
|
||||
mock_get.return_value = {"status": "Failed", "error": "bad input"}
|
||||
|
||||
with pytest.raises(Exception, match="Task failed"):
|
||||
mac.poll_async_task("task3", "video_generation", "key")
|
||||
|
||||
@patch('minimax_api_client.time.sleep')
|
||||
@patch('minimax_api_client.api_get')
|
||||
def test_poll_timeout(self, mock_get, mock_sleep):
|
||||
mock_get.return_value = {"status": "Processing"}
|
||||
|
||||
with pytest.raises(TimeoutError, match="timed out"):
|
||||
mac.poll_async_task("task4", "video_generation", "key",
|
||||
poll_interval=1, max_wait=3)
|
||||
|
||||
|
||||
class TestDownloadFile:
|
||||
"""Test file download."""
|
||||
|
||||
@patch('minimax_api_client.requests.get')
|
||||
@patch('minimax_api_client.api_get')
|
||||
def test_download_success(self, mock_api_get, mock_req_get, tmp_path):
|
||||
mock_api_get.return_value = {
|
||||
"file": {"download_url": "https://cdn.minimax.io/video.mp4"}
|
||||
}
|
||||
mock_resp = Mock()
|
||||
mock_resp.raise_for_status = Mock()
|
||||
mock_resp.iter_content.return_value = [b"video_data"]
|
||||
mock_req_get.return_value = mock_resp
|
||||
|
||||
output = str(tmp_path / "test.mp4")
|
||||
result = mac.download_file("file123", "key", output)
|
||||
assert result == output
|
||||
assert Path(output).exists()
|
||||
|
||||
@patch('minimax_api_client.api_get')
|
||||
def test_download_no_url_raises(self, mock_api_get):
|
||||
mock_api_get.return_value = {"file": {}}
|
||||
|
||||
with pytest.raises(Exception, match="No download URL"):
|
||||
mac.download_file("file123", "key", "/tmp/test.mp4")
|
||||
|
||||
|
||||
class TestGetOutputDir:
|
||||
"""Test output directory resolution."""
|
||||
|
||||
def test_returns_path_object(self):
|
||||
result = mac.get_output_dir()
|
||||
assert isinstance(result, Path)
|
||||
|
||||
def test_directory_exists(self):
|
||||
result = mac.get_output_dir()
|
||||
assert result.exists()
|
||||
assert result.is_dir()
|
||||
185
.opencode/skills/ai-multimodal/scripts/tests/test_minimax_cli.py
Normal file
185
.opencode/skills/ai-multimodal/scripts/tests/test_minimax_cli.py
Normal file
@@ -0,0 +1,185 @@
|
||||
"""
|
||||
Tests for minimax_cli.py - CLI argument parsing and task dispatch.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import minimax_cli as cli
|
||||
|
||||
|
||||
class TestTaskDefaults:
|
||||
"""Test task-to-model default mapping."""
|
||||
|
||||
def test_generate_defaults_to_image_01(self):
|
||||
assert cli.TASK_DEFAULTS['generate'] == 'image-01'
|
||||
|
||||
def test_generate_video_defaults_to_hailuo(self):
|
||||
assert cli.TASK_DEFAULTS['generate-video'] == 'MiniMax-Hailuo-2.3'
|
||||
|
||||
def test_generate_speech_defaults_to_speech_28_hd(self):
|
||||
assert cli.TASK_DEFAULTS['generate-speech'] == 'speech-2.8-hd'
|
||||
|
||||
def test_generate_music_defaults_to_music_25(self):
|
||||
assert cli.TASK_DEFAULTS['generate-music'] == 'music-2.5'
|
||||
|
||||
|
||||
class TestPrintResult:
|
||||
"""Test result formatting."""
|
||||
|
||||
def test_success_image(self, capsys):
|
||||
result = {
|
||||
"status": "success",
|
||||
"generated_images": ["/path/to/img.png"],
|
||||
"model": "image-01"
|
||||
}
|
||||
cli.print_result(result, "generate")
|
||||
output = capsys.readouterr().out
|
||||
assert "success" in output.lower()
|
||||
assert "/path/to/img.png" in output
|
||||
assert "image-01" in output
|
||||
|
||||
def test_success_video(self, capsys):
|
||||
result = {
|
||||
"status": "success",
|
||||
"generated_video": "/path/to/vid.mp4",
|
||||
"generation_time": 45.2,
|
||||
"model": "MiniMax-Hailuo-2.3"
|
||||
}
|
||||
cli.print_result(result, "generate-video")
|
||||
output = capsys.readouterr().out
|
||||
assert "/path/to/vid.mp4" in output
|
||||
assert "45.2s" in output
|
||||
|
||||
def test_success_audio(self, capsys):
|
||||
result = {
|
||||
"status": "success",
|
||||
"generated_audio": "/path/to/audio.mp3",
|
||||
"duration_ms": 140000,
|
||||
"model": "music-2.5"
|
||||
}
|
||||
cli.print_result(result, "generate-music")
|
||||
output = capsys.readouterr().out
|
||||
assert "/path/to/audio.mp3" in output
|
||||
assert "140.0s" in output
|
||||
|
||||
def test_error_result(self, capsys):
|
||||
result = {"status": "error", "error": "Rate limit exceeded"}
|
||||
cli.print_result(result, "generate")
|
||||
output = capsys.readouterr().out
|
||||
assert "Rate limit exceeded" in output
|
||||
|
||||
def test_unknown_status(self, capsys):
|
||||
result = {"model": "image-01"}
|
||||
cli.print_result(result, "generate")
|
||||
output = capsys.readouterr().out
|
||||
assert "unknown" in output.lower()
|
||||
|
||||
|
||||
class TestMainCLI:
|
||||
"""Test CLI main() argument parsing and dispatch."""
|
||||
|
||||
@patch('minimax_cli.find_minimax_api_key', return_value=None)
|
||||
def test_no_api_key_exits(self, mock_key, capsys):
|
||||
with patch('sys.argv', ['cli', '--task', 'generate', '--prompt', 'x']):
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
cli.main()
|
||||
assert exc_info.value.code == 1
|
||||
|
||||
@patch('minimax_cli.generate_image')
|
||||
@patch('minimax_cli.find_minimax_api_key', return_value='test-key')
|
||||
def test_generate_image_dispatch(self, mock_key, mock_gen):
|
||||
mock_gen.return_value = {"status": "success", "generated_images": [],
|
||||
"model": "image-01"}
|
||||
with patch('sys.argv', ['cli', '--task', 'generate',
|
||||
'--prompt', 'A cat']):
|
||||
cli.main()
|
||||
mock_gen.assert_called_once()
|
||||
args = mock_gen.call_args
|
||||
assert args[0][0] == 'test-key'
|
||||
assert args[0][1] == 'A cat'
|
||||
|
||||
@patch('minimax_cli.generate_speech')
|
||||
@patch('minimax_cli.find_minimax_api_key', return_value='test-key')
|
||||
def test_generate_speech_dispatch(self, mock_key, mock_gen):
|
||||
mock_gen.return_value = {"status": "success",
|
||||
"generated_audio": "/x.mp3",
|
||||
"model": "speech-2.8-hd"}
|
||||
with patch('sys.argv', ['cli', '--task', 'generate-speech',
|
||||
'--text', 'Hello world']):
|
||||
cli.main()
|
||||
mock_gen.assert_called_once()
|
||||
|
||||
@patch('minimax_cli.generate_speech')
|
||||
@patch('minimax_cli.find_minimax_api_key', return_value='test-key')
|
||||
def test_speech_uses_text_or_prompt(self, mock_key, mock_gen):
|
||||
mock_gen.return_value = {"status": "success",
|
||||
"generated_audio": "/x.mp3",
|
||||
"model": "speech-2.8-hd"}
|
||||
# --prompt should work as fallback for --text
|
||||
with patch('sys.argv', ['cli', '--task', 'generate-speech',
|
||||
'--prompt', 'Fallback text']):
|
||||
cli.main()
|
||||
call_args = mock_gen.call_args
|
||||
assert call_args[0][1] == 'Fallback text'
|
||||
|
||||
@patch('minimax_cli.generate_music')
|
||||
@patch('minimax_cli.find_minimax_api_key', return_value='test-key')
|
||||
def test_generate_music_dispatch(self, mock_key, mock_gen):
|
||||
mock_gen.return_value = {"status": "success",
|
||||
"generated_audio": "/x.mp3",
|
||||
"duration_ms": 60000,
|
||||
"model": "music-2.5"}
|
||||
with patch('sys.argv', ['cli', '--task', 'generate-music',
|
||||
'--lyrics', 'La la la']):
|
||||
cli.main()
|
||||
mock_gen.assert_called_once()
|
||||
|
||||
@patch('minimax_cli.generate_video')
|
||||
@patch('minimax_cli.find_minimax_api_key', return_value='test-key')
|
||||
def test_generate_video_dispatch(self, mock_key, mock_gen):
|
||||
mock_gen.return_value = {"status": "success",
|
||||
"generated_video": "/x.mp4",
|
||||
"generation_time": 30.0,
|
||||
"model": "MiniMax-Hailuo-2.3"}
|
||||
with patch('sys.argv', ['cli', '--task', 'generate-video',
|
||||
'--prompt', 'A dancer']):
|
||||
cli.main()
|
||||
mock_gen.assert_called_once()
|
||||
|
||||
@patch('minimax_cli.find_minimax_api_key', return_value='test-key')
|
||||
def test_auto_model_detection(self, mock_key):
|
||||
with patch('sys.argv', ['cli', '--task', 'generate-speech',
|
||||
'--text', 'hi']):
|
||||
with patch('minimax_cli.generate_speech') as mock_gen:
|
||||
mock_gen.return_value = {"status": "success",
|
||||
"generated_audio": "/x.mp3",
|
||||
"model": "speech-2.8-hd"}
|
||||
cli.main()
|
||||
# Model should be auto-detected
|
||||
assert mock_gen.call_args[0][2] == 'speech-2.8-hd'
|
||||
|
||||
@patch('minimax_cli.find_minimax_api_key', return_value='test-key')
|
||||
def test_explicit_model_override(self, mock_key):
|
||||
with patch('sys.argv', ['cli', '--task', 'generate-speech',
|
||||
'--text', 'hi', '--model', 'speech-2.8-turbo']):
|
||||
with patch('minimax_cli.generate_speech') as mock_gen:
|
||||
mock_gen.return_value = {"status": "success",
|
||||
"generated_audio": "/x.mp3",
|
||||
"model": "speech-2.8-turbo"}
|
||||
cli.main()
|
||||
assert mock_gen.call_args[0][2] == 'speech-2.8-turbo'
|
||||
|
||||
@patch('minimax_cli.generate_image')
|
||||
@patch('minimax_cli.find_minimax_api_key', return_value='test-key')
|
||||
def test_exception_exits_with_1(self, mock_key, mock_gen):
|
||||
mock_gen.side_effect = Exception("API timeout")
|
||||
with patch('sys.argv', ['cli', '--task', 'generate',
|
||||
'--prompt', 'test']):
|
||||
with pytest.raises(SystemExit) as exc_info:
|
||||
cli.main()
|
||||
assert exc_info.value.code == 1
|
||||
@@ -0,0 +1,393 @@
|
||||
"""
|
||||
Tests for minimax_generate.py - generation functions for image, video, speech, music.
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, MagicMock, call
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import minimax_generate as mg
|
||||
|
||||
|
||||
class TestModelRegistries:
|
||||
"""Test model set definitions."""
|
||||
|
||||
def test_image_models(self):
|
||||
assert 'image-01' in mg.MINIMAX_IMAGE_MODELS
|
||||
assert 'image-01-live' in mg.MINIMAX_IMAGE_MODELS
|
||||
|
||||
def test_video_models(self):
|
||||
assert 'MiniMax-Hailuo-2.3' in mg.MINIMAX_VIDEO_MODELS
|
||||
assert 'MiniMax-Hailuo-2.3-Fast' in mg.MINIMAX_VIDEO_MODELS
|
||||
assert 'S2V-01' in mg.MINIMAX_VIDEO_MODELS
|
||||
|
||||
def test_speech_models(self):
|
||||
assert 'speech-2.8-hd' in mg.MINIMAX_SPEECH_MODELS
|
||||
assert 'speech-2.8-turbo' in mg.MINIMAX_SPEECH_MODELS
|
||||
|
||||
def test_music_models(self):
|
||||
assert 'music-2.5' in mg.MINIMAX_MUSIC_MODELS
|
||||
|
||||
def test_all_models_is_union(self):
|
||||
expected = (mg.MINIMAX_IMAGE_MODELS | mg.MINIMAX_VIDEO_MODELS |
|
||||
mg.MINIMAX_SPEECH_MODELS | mg.MINIMAX_MUSIC_MODELS)
|
||||
assert mg.ALL_MINIMAX_MODELS == expected
|
||||
|
||||
|
||||
class TestIsMinimaxModel:
|
||||
"""Test model detection."""
|
||||
|
||||
def test_known_image_model(self):
|
||||
assert mg.is_minimax_model('image-01') is True
|
||||
|
||||
def test_known_video_model(self):
|
||||
assert mg.is_minimax_model('MiniMax-Hailuo-2.3') is True
|
||||
|
||||
def test_known_speech_model(self):
|
||||
assert mg.is_minimax_model('speech-2.8-hd') is True
|
||||
|
||||
def test_known_music_model(self):
|
||||
assert mg.is_minimax_model('music-2.5') is True
|
||||
|
||||
def test_prefix_minimax(self):
|
||||
assert mg.is_minimax_model('MiniMax-Future-Model') is True
|
||||
|
||||
def test_prefix_speech(self):
|
||||
assert mg.is_minimax_model('speech-3.0-ultra') is True
|
||||
|
||||
def test_prefix_s2v(self):
|
||||
assert mg.is_minimax_model('S2V-02') is True
|
||||
|
||||
def test_non_minimax_model(self):
|
||||
assert mg.is_minimax_model('gemini-2.5-flash') is False
|
||||
|
||||
def test_non_minimax_imagen(self):
|
||||
assert mg.is_minimax_model('imagen-4.0-generate-001') is False
|
||||
|
||||
|
||||
class TestGenerateImage:
|
||||
"""Test image generation."""
|
||||
|
||||
@patch('minimax_generate.get_output_dir')
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_success(self, mock_post, mock_dir, tmp_path):
|
||||
mock_dir.return_value = tmp_path
|
||||
mock_post.return_value = {
|
||||
"data": {"image_urls": ["https://cdn.minimax.io/img1.png"]}
|
||||
}
|
||||
|
||||
with patch('requests.get') as mock_req_get:
|
||||
mock_resp = Mock()
|
||||
mock_resp.content = b'\x89PNG\r\n\x1a\n'
|
||||
mock_resp.raise_for_status = Mock()
|
||||
mock_req_get.return_value = mock_resp
|
||||
|
||||
result = mg.generate_image("key", "A cat", "image-01")
|
||||
|
||||
assert result["status"] == "success"
|
||||
assert len(result["generated_images"]) == 1
|
||||
assert result["model"] == "image-01"
|
||||
|
||||
@patch('minimax_generate.get_output_dir')
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_no_images_returns_error(self, mock_post, mock_dir, tmp_path):
|
||||
mock_dir.return_value = tmp_path
|
||||
mock_post.return_value = {"data": {"image_urls": []}}
|
||||
|
||||
result = mg.generate_image("key", "A cat", "image-01")
|
||||
assert result["status"] == "error"
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_payload_structure(self, mock_post):
|
||||
mock_post.return_value = {"data": {"image_urls": []}}
|
||||
|
||||
mg.generate_image("key", "A dog", "image-01", "16:9", 3)
|
||||
|
||||
payload = mock_post.call_args[0][1]
|
||||
assert payload["model"] == "image-01"
|
||||
assert payload["prompt"] == "A dog"
|
||||
assert payload["aspect_ratio"] == "16:9"
|
||||
assert payload["n"] == 3
|
||||
assert payload["response_format"] == "url"
|
||||
assert payload["prompt_optimizer"] is True
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_num_images_capped_at_9(self, mock_post):
|
||||
mock_post.return_value = {"data": {"image_urls": []}}
|
||||
|
||||
mg.generate_image("key", "test", "image-01", num_images=15)
|
||||
|
||||
payload = mock_post.call_args[0][1]
|
||||
assert payload["n"] == 9
|
||||
|
||||
@patch('minimax_generate.get_output_dir')
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_output_copy(self, mock_post, mock_dir, tmp_path):
|
||||
mock_dir.return_value = tmp_path
|
||||
mock_post.return_value = {
|
||||
"data": {"image_urls": ["https://cdn.minimax.io/img.png"]}
|
||||
}
|
||||
|
||||
with patch('requests.get') as mock_req_get:
|
||||
mock_resp = Mock()
|
||||
mock_resp.content = b'image_bytes'
|
||||
mock_resp.raise_for_status = Mock()
|
||||
mock_req_get.return_value = mock_resp
|
||||
|
||||
output_path = str(tmp_path / "custom_output.png")
|
||||
result = mg.generate_image("key", "test", output=output_path)
|
||||
|
||||
assert Path(output_path).exists()
|
||||
|
||||
|
||||
class TestGenerateVideo:
|
||||
"""Test video generation (async workflow)."""
|
||||
|
||||
@patch('minimax_generate.download_file')
|
||||
@patch('minimax_generate.poll_async_task')
|
||||
@patch('minimax_generate.get_output_dir')
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_success(self, mock_post, mock_dir, mock_poll, mock_dl, tmp_path):
|
||||
mock_dir.return_value = tmp_path
|
||||
mock_post.return_value = {"task_id": "vid-task-123"}
|
||||
mock_poll.return_value = {"file_id": "file-456"}
|
||||
# Create a fake video file so stat() works
|
||||
mock_dl.side_effect = lambda fid, key, path, v: (
|
||||
Path(path).write_bytes(b'fake_video') or path
|
||||
)
|
||||
|
||||
result = mg.generate_video("key", "A dancer")
|
||||
|
||||
assert result["status"] == "success"
|
||||
assert "generated_video" in result
|
||||
assert result["model"] == "MiniMax-Hailuo-2.3"
|
||||
mock_poll.assert_called_once()
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_no_task_id_error(self, mock_post):
|
||||
mock_post.return_value = {"error": "bad request"}
|
||||
|
||||
result = mg.generate_video("key", "test")
|
||||
assert result["status"] == "error"
|
||||
assert "No task_id" in result["error"]
|
||||
|
||||
@patch('minimax_generate.poll_async_task')
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_no_file_id_error(self, mock_post, mock_poll):
|
||||
mock_post.return_value = {"task_id": "t1"}
|
||||
mock_poll.return_value = {"status": "Success"}
|
||||
|
||||
result = mg.generate_video("key", "test")
|
||||
assert result["status"] == "error"
|
||||
assert "No file_id" in result["error"]
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_payload_with_first_frame(self, mock_post):
|
||||
mock_post.return_value = {"task_id": None}
|
||||
|
||||
mg.generate_video("key", "test", first_frame="https://img.url/frame.png")
|
||||
|
||||
payload = mock_post.call_args[0][1]
|
||||
assert payload["first_frame_image"] == "https://img.url/frame.png"
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_payload_duration_resolution(self, mock_post):
|
||||
mock_post.return_value = {"task_id": None}
|
||||
|
||||
mg.generate_video("key", "test", duration=10, resolution="720P")
|
||||
|
||||
payload = mock_post.call_args[0][1]
|
||||
assert payload["duration"] == 10
|
||||
assert payload["resolution"] == "720P"
|
||||
|
||||
|
||||
class TestGenerateSpeech:
|
||||
"""Test speech/TTS generation."""
|
||||
|
||||
@patch('minimax_generate.get_output_dir')
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_success(self, mock_post, mock_dir, tmp_path):
|
||||
mock_dir.return_value = tmp_path
|
||||
# hex-encoded audio bytes
|
||||
mock_post.return_value = {
|
||||
"data": {"audio": "48656c6c6f"} # "Hello" in hex
|
||||
}
|
||||
|
||||
result = mg.generate_speech("key", "Hello world")
|
||||
|
||||
assert result["status"] == "success"
|
||||
assert "generated_audio" in result
|
||||
assert result["model"] == "speech-2.8-hd"
|
||||
# Verify file was written
|
||||
audio_path = Path(result["generated_audio"])
|
||||
assert audio_path.exists()
|
||||
assert audio_path.read_bytes() == bytes.fromhex("48656c6c6f")
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_no_audio_returns_error(self, mock_post):
|
||||
mock_post.return_value = {"data": {}}
|
||||
|
||||
result = mg.generate_speech("key", "test")
|
||||
assert result["status"] == "error"
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_payload_structure(self, mock_post):
|
||||
mock_post.return_value = {"data": {}}
|
||||
|
||||
mg.generate_speech("key", "Test text", "speech-2.8-turbo",
|
||||
voice="English_Warm_Bestie", emotion="happy",
|
||||
output_format="wav", rate=1.5)
|
||||
|
||||
payload = mock_post.call_args[0][1]
|
||||
assert payload["model"] == "speech-2.8-turbo"
|
||||
assert payload["text"] == "Test text"
|
||||
assert payload["stream"] is False
|
||||
assert payload["output_format"] == "hex"
|
||||
assert payload["voice_setting"]["voice_id"] == "English_Warm_Bestie"
|
||||
assert payload["voice_setting"]["speed"] == 1.5
|
||||
assert payload["audio_setting"]["format"] == "wav"
|
||||
assert payload["audio_setting"]["sample_rate"] == 32000
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_text_truncated_at_10000(self, mock_post):
|
||||
mock_post.return_value = {"data": {}}
|
||||
long_text = "x" * 15000
|
||||
|
||||
mg.generate_speech("key", long_text)
|
||||
|
||||
payload = mock_post.call_args[0][1]
|
||||
assert len(payload["text"]) == 10000
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_uses_t2a_v2_endpoint(self, mock_post):
|
||||
mock_post.return_value = {"data": {}}
|
||||
|
||||
mg.generate_speech("key", "test")
|
||||
|
||||
endpoint = mock_post.call_args[0][0]
|
||||
assert endpoint == "t2a_v2"
|
||||
|
||||
@patch('minimax_generate.get_output_dir')
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_wav_extension(self, mock_post, mock_dir, tmp_path):
|
||||
mock_dir.return_value = tmp_path
|
||||
mock_post.return_value = {"data": {"audio": "aabb"}}
|
||||
|
||||
result = mg.generate_speech("key", "test", output_format="wav")
|
||||
assert result["generated_audio"].endswith(".wav")
|
||||
|
||||
@patch('minimax_generate.get_output_dir')
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_pcm_defaults_to_mp3_ext(self, mock_post, mock_dir, tmp_path):
|
||||
mock_dir.return_value = tmp_path
|
||||
mock_post.return_value = {"data": {"audio": "aabb"}}
|
||||
|
||||
result = mg.generate_speech("key", "test", output_format="pcm")
|
||||
assert result["generated_audio"].endswith(".mp3")
|
||||
|
||||
|
||||
class TestGenerateMusic:
|
||||
"""Test music generation."""
|
||||
|
||||
@patch('minimax_generate.get_output_dir')
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_success_with_url(self, mock_post, mock_dir, tmp_path):
|
||||
mock_dir.return_value = tmp_path
|
||||
mock_post.return_value = {
|
||||
"data": {"audio": "https://cdn.minimax.io/music.mp3"},
|
||||
"extra_info": {"music_duration": 120000}
|
||||
}
|
||||
|
||||
with patch('requests.get') as mock_req_get:
|
||||
mock_resp = Mock()
|
||||
mock_resp.content = b'music_data'
|
||||
mock_resp.raise_for_status = Mock()
|
||||
mock_req_get.return_value = mock_resp
|
||||
|
||||
result = mg.generate_music("key", lyrics="La la la",
|
||||
prompt="pop")
|
||||
|
||||
assert result["status"] == "success"
|
||||
assert result["duration_ms"] == 120000
|
||||
assert result["model"] == "music-2.5"
|
||||
|
||||
@patch('minimax_generate.get_output_dir')
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_success_with_hex(self, mock_post, mock_dir, tmp_path):
|
||||
mock_dir.return_value = tmp_path
|
||||
mock_post.return_value = {
|
||||
"data": {"audio": "deadbeef"},
|
||||
"extra_info": {"music_duration": 60000}
|
||||
}
|
||||
|
||||
result = mg.generate_music("key", lyrics="test")
|
||||
|
||||
assert result["status"] == "success"
|
||||
audio_path = Path(result["generated_audio"])
|
||||
assert audio_path.read_bytes() == bytes.fromhex("deadbeef")
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_no_audio_returns_error(self, mock_post):
|
||||
mock_post.return_value = {"data": {}, "extra_info": {}}
|
||||
|
||||
result = mg.generate_music("key", lyrics="test")
|
||||
assert result["status"] == "error"
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_payload_structure(self, mock_post):
|
||||
mock_post.return_value = {"data": {}, "extra_info": {}}
|
||||
|
||||
mg.generate_music("key", lyrics="Verse 1\nHello",
|
||||
prompt="upbeat pop", model="music-2.5",
|
||||
output_format="wav")
|
||||
|
||||
payload = mock_post.call_args[0][1]
|
||||
assert payload["model"] == "music-2.5"
|
||||
assert payload["lyrics"] == "Verse 1\nHello"
|
||||
assert payload["prompt"] == "upbeat pop"
|
||||
assert payload["output_format"] == "url"
|
||||
assert payload["audio_setting"]["format"] == "wav"
|
||||
assert payload["audio_setting"]["sample_rate"] == 44100
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_lyrics_truncated_at_3500(self, mock_post):
|
||||
mock_post.return_value = {"data": {}, "extra_info": {}}
|
||||
|
||||
mg.generate_music("key", lyrics="x" * 5000)
|
||||
|
||||
payload = mock_post.call_args[0][1]
|
||||
assert len(payload["lyrics"]) == 3500
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_prompt_truncated_at_2000(self, mock_post):
|
||||
mock_post.return_value = {"data": {}, "extra_info": {}}
|
||||
|
||||
mg.generate_music("key", prompt="y" * 3000)
|
||||
|
||||
payload = mock_post.call_args[0][1]
|
||||
assert len(payload["prompt"]) == 2000
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_uses_300s_timeout(self, mock_post):
|
||||
mock_post.return_value = {"data": {}, "extra_info": {}}
|
||||
|
||||
mg.generate_music("key", lyrics="test")
|
||||
|
||||
# Check timeout kwarg passed to api_post
|
||||
_, kwargs = mock_post.call_args
|
||||
assert kwargs.get('timeout') == 300
|
||||
|
||||
@patch('minimax_generate.api_post')
|
||||
def test_empty_lyrics_omitted(self, mock_post):
|
||||
mock_post.return_value = {"data": {}, "extra_info": {}}
|
||||
|
||||
mg.generate_music("key", lyrics="", prompt="jazz")
|
||||
|
||||
payload = mock_post.call_args[0][1]
|
||||
assert "lyrics" not in payload
|
||||
assert payload["prompt"] == "jazz"
|
||||
61
.opencode/skills/ask/SKILL.md
Normal file
61
.opencode/skills/ask/SKILL.md
Normal file
@@ -0,0 +1,61 @@
|
||||
---
|
||||
name: ck:ask
|
||||
description: "Answer technical and architectural questions with expert consultation."
|
||||
argument-hint: "[technical-question]"
|
||||
metadata:
|
||||
author: claudekit
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Technical Consultation
|
||||
|
||||
Technical question or architecture challenge:
|
||||
<questions>$ARGUMENTS</questions>
|
||||
|
||||
Current development workflows, system constraints, scale requirements, and business context will be considered:
|
||||
- Primary workflow: `./.opencode/rules/primary-workflow.md`
|
||||
- Development rules: `./.opencode/rules/development-rules.md`
|
||||
- Orchestration protocols: `./.opencode/rules/orchestration-protocol.md`
|
||||
- Documentation management: `./.opencode/rules/documentation-management.md`
|
||||
|
||||
**Project Documentation:**
|
||||
```
|
||||
./docs
|
||||
├── project-overview-pdr.md
|
||||
├── code-standards.md
|
||||
├── codebase-summary.md
|
||||
├── design-guidelines.md
|
||||
├── deployment-guide.md
|
||||
├── system-architecture.md
|
||||
└── project-roadmap.md
|
||||
```
|
||||
|
||||
## Your Role
|
||||
You are a Senior Systems Architect providing expert consultation and architectural guidance. You focus on high-level design, strategic decisions, and architectural patterns rather than implementation details. You orchestrate four specialized architectural advisors:
|
||||
1. **Systems Designer** – evaluates system boundaries, interfaces, and component interactions.
|
||||
2. **Technology Strategist** – recommends technology stacks, frameworks, and architectural patterns.
|
||||
3. **Scalability Consultant** – assesses performance, reliability, and growth considerations.
|
||||
4. **Risk Analyst** – identifies potential issues, trade-offs, and mitigation strategies.
|
||||
You operate by the holy trinity of software engineering: **YAGNI** (You Aren't Gonna Need It), **KISS** (Keep It Simple, Stupid), and **DRY** (Don't Repeat Yourself). Every solution you propose must honor these principles.
|
||||
|
||||
## Process
|
||||
1. **Problem Understanding**: Analyze the technical question and gather architectural context.
|
||||
- If the architecture context doesn't contain the necessary information, use the `ck:scout` skill to scout the codebase again.
|
||||
2. **Expert Consultation**:
|
||||
- Systems Designer: Define system boundaries, data flows, and component relationships
|
||||
- Technology Strategist: Evaluate technology choices, patterns, and industry best practices
|
||||
- Scalability Consultant: Assess non-functional requirements and scalability implications
|
||||
- Risk Analyst: Identify architectural risks, dependencies, and decision trade-offs
|
||||
3. **Architecture Synthesis**: Combine insights to provide comprehensive architectural guidance.
|
||||
4. **Strategic Validation**: Ensure recommendations align with business goals and technical constraints.
|
||||
|
||||
## Output Format
|
||||
**Be honest, be brutal, straight to the point, and be concise.**
|
||||
1. **Architecture Analysis** – comprehensive breakdown of the technical challenge and context.
|
||||
2. **Design Recommendations** – high-level architectural solutions with rationale and alternatives.
|
||||
3. **Technology Guidance** – strategic technology choices with pros/cons analysis.
|
||||
4. **Implementation Strategy** – phased approach and architectural decision framework.
|
||||
5. **Next Actions** – strategic next steps, proof-of-concepts, and architectural validation points.
|
||||
|
||||
## Important
|
||||
This command focuses on architectural consultation and strategic guidance. Do not start implementing anything.
|
||||
98
.opencode/skills/backend-development/SKILL.md
Normal file
98
.opencode/skills/backend-development/SKILL.md
Normal file
@@ -0,0 +1,98 @@
|
||||
---
|
||||
name: ck:backend-development
|
||||
description: Build backends with Node.js, Python, Go (NestJS, FastAPI, Django). Use for REST/GraphQL/gRPC APIs, auth (OAuth, JWT), databases, microservices, security (OWASP), Docker/K8s.
|
||||
license: MIT
|
||||
argument-hint: "[framework] [task]"
|
||||
metadata:
|
||||
author: claudekit
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Backend Development Skill
|
||||
|
||||
Production-ready backend development with modern technologies, best practices, and proven patterns.
|
||||
|
||||
## When to Use
|
||||
|
||||
- Designing RESTful, GraphQL, or gRPC APIs
|
||||
- Building authentication/authorization systems
|
||||
- Optimizing database queries and schemas
|
||||
- Implementing caching and performance optimization
|
||||
- OWASP Top 10 security mitigation
|
||||
- Designing scalable microservices
|
||||
- Testing strategies (unit, integration, E2E)
|
||||
- CI/CD pipelines and deployment
|
||||
- Monitoring and debugging production systems
|
||||
|
||||
## Technology Selection Guide
|
||||
|
||||
**Languages:** Node.js/TypeScript (full-stack), Python (data/ML), Go (concurrency), Rust (performance)
|
||||
**Frameworks:** NestJS, FastAPI, Django, Express, Gin
|
||||
**Databases:** PostgreSQL (ACID), MongoDB (flexible schema), Redis (caching)
|
||||
**APIs:** REST (simple), GraphQL (flexible), gRPC (performance)
|
||||
|
||||
See: `references/backend-technologies.md` for detailed comparisons
|
||||
|
||||
## Reference Navigation
|
||||
|
||||
**Core Technologies:**
|
||||
- `backend-technologies.md` - Languages, frameworks, databases, message queues, ORMs
|
||||
- `backend-api-design.md` - REST, GraphQL, gRPC patterns and best practices
|
||||
|
||||
**Security & Authentication:**
|
||||
- `backend-security.md` - OWASP Top 10 2025, security best practices, input validation
|
||||
- `backend-authentication.md` - OAuth 2.1, JWT, RBAC, MFA, session management
|
||||
|
||||
**Performance & Architecture:**
|
||||
- `backend-performance.md` - Caching, query optimization, load balancing, scaling
|
||||
- `backend-architecture.md` - Microservices, event-driven, CQRS, saga patterns
|
||||
|
||||
**Quality & Operations:**
|
||||
- `backend-testing.md` - Testing strategies, frameworks, tools, CI/CD testing
|
||||
- `backend-code-quality.md` - SOLID principles, design patterns, clean code
|
||||
- `backend-devops.md` - Docker, Kubernetes, deployment strategies, monitoring
|
||||
- `backend-debugging.md` - Debugging strategies, profiling, logging, production debugging
|
||||
- `backend-mindset.md` - Problem-solving, architectural thinking, collaboration
|
||||
|
||||
## Key Best Practices (2025)
|
||||
|
||||
**Security:** Argon2id passwords, parameterized queries (98% SQL injection reduction), OAuth 2.1 + PKCE, rate limiting, security headers
|
||||
|
||||
**Performance:** Redis caching (90% DB load reduction), database indexing (30% I/O reduction), CDN (50%+ latency cut), connection pooling
|
||||
|
||||
**Testing:** 70-20-10 pyramid (unit-integration-E2E), Vitest 50% faster than Jest, contract testing for microservices, 83% migrations fail without tests
|
||||
|
||||
**DevOps:** Blue-green/canary deployments, feature flags (90% fewer failures), Kubernetes 84% adoption, Prometheus/Grafana monitoring, OpenTelemetry tracing
|
||||
|
||||
## Quick Decision Matrix
|
||||
|
||||
| Need | Choose |
|
||||
|------|--------|
|
||||
| Fast development | Node.js + NestJS |
|
||||
| Data/ML integration | Python + FastAPI |
|
||||
| High concurrency | Go + Gin |
|
||||
| Max performance | Rust + Axum |
|
||||
| ACID transactions | PostgreSQL |
|
||||
| Flexible schema | MongoDB |
|
||||
| Caching | Redis |
|
||||
| Internal services | gRPC |
|
||||
| Public APIs | GraphQL/REST |
|
||||
| Real-time events | Kafka |
|
||||
|
||||
## Implementation Checklist
|
||||
|
||||
**API:** Choose style → Design schema → Validate input → Add auth → Rate limiting → Documentation → Error handling
|
||||
|
||||
**Database:** Choose DB → Design schema → Create indexes → Connection pooling → Migration strategy → Backup/restore → Test performance
|
||||
|
||||
**Security:** OWASP Top 10 → Parameterized queries → OAuth 2.1 + JWT → Security headers → Rate limiting → Input validation → Argon2id passwords
|
||||
|
||||
**Testing:** Unit 70% → Integration 20% → E2E 10% → Load tests → Migration tests → Contract tests (microservices)
|
||||
|
||||
**Deployment:** Docker → CI/CD → Blue-green/canary → Feature flags → Monitoring → Logging → Health checks
|
||||
|
||||
## Resources
|
||||
|
||||
- OWASP Top 10: https://owasp.org/www-project-top-ten/
|
||||
- OAuth 2.1: https://oauth.net/2.1/
|
||||
- OpenTelemetry: https://opentelemetry.io/
|
||||
@@ -0,0 +1,495 @@
|
||||
# Backend API Design
|
||||
|
||||
Comprehensive guide to designing RESTful, GraphQL, and gRPC APIs with best practices (2025).
|
||||
|
||||
## REST API Design
|
||||
|
||||
### Resource-Based URLs
|
||||
|
||||
**Good:**
|
||||
```
|
||||
GET /api/v1/users # List users
|
||||
GET /api/v1/users/:id # Get specific user
|
||||
POST /api/v1/users # Create user
|
||||
PUT /api/v1/users/:id # Update user (full)
|
||||
PATCH /api/v1/users/:id # Update user (partial)
|
||||
DELETE /api/v1/users/:id # Delete user
|
||||
|
||||
GET /api/v1/users/:id/posts # Get user's posts
|
||||
POST /api/v1/users/:id/posts # Create post for user
|
||||
```
|
||||
|
||||
**Bad (Avoid):**
|
||||
```
|
||||
GET /api/v1/getUser?id=123 # RPC-style, not RESTful
|
||||
POST /api/v1/createUser # Verb in URL
|
||||
GET /api/v1/user-posts # Unclear relationship
|
||||
```
|
||||
|
||||
### HTTP Status Codes (Meaningful Responses)
|
||||
|
||||
**Success:**
|
||||
- `200 OK` - Successful GET, PUT, PATCH
|
||||
- `201 Created` - Successful POST (resource created)
|
||||
- `204 No Content` - Successful DELETE
|
||||
|
||||
**Client Errors:**
|
||||
- `400 Bad Request` - Invalid input/validation error
|
||||
- `401 Unauthorized` - Missing or invalid authentication
|
||||
- `403 Forbidden` - Authenticated but not authorized
|
||||
- `404 Not Found` - Resource doesn't exist
|
||||
- `409 Conflict` - Resource conflict (duplicate email)
|
||||
- `422 Unprocessable Entity` - Validation error (detailed)
|
||||
- `429 Too Many Requests` - Rate limit exceeded
|
||||
|
||||
**Server Errors:**
|
||||
- `500 Internal Server Error` - Generic server error
|
||||
- `502 Bad Gateway` - Upstream service error
|
||||
- `503 Service Unavailable` - Temporary downtime
|
||||
- `504 Gateway Timeout` - Upstream service timeout
|
||||
|
||||
### Request/Response Format
|
||||
|
||||
**Request:**
|
||||
```typescript
|
||||
POST /api/v1/users
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"email": "user@example.com",
|
||||
"name": "John Doe",
|
||||
"age": 30
|
||||
}
|
||||
```
|
||||
|
||||
**Success Response:**
|
||||
```typescript
|
||||
HTTP/1.1 201 Created
|
||||
Content-Type: application/json
|
||||
Location: /api/v1/users/123
|
||||
|
||||
{
|
||||
"id": "123",
|
||||
"email": "user@example.com",
|
||||
"name": "John Doe",
|
||||
"age": 30,
|
||||
"createdAt": "2025-01-09T12:00:00Z",
|
||||
"updatedAt": "2025-01-09T12:00:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
**Error Response:**
|
||||
```typescript
|
||||
HTTP/1.1 400 Bad Request
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"error": {
|
||||
"code": "VALIDATION_ERROR",
|
||||
"message": "Invalid input data",
|
||||
"details": [
|
||||
{
|
||||
"field": "email",
|
||||
"message": "Invalid email format",
|
||||
"value": "invalid-email"
|
||||
},
|
||||
{
|
||||
"field": "age",
|
||||
"message": "Age must be between 18 and 120",
|
||||
"value": 15
|
||||
}
|
||||
],
|
||||
"timestamp": "2025-01-09T12:00:00Z",
|
||||
"path": "/api/v1/users"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Pagination
|
||||
|
||||
```typescript
|
||||
// Request
|
||||
GET /api/v1/users?page=2&limit=50
|
||||
|
||||
// Response
|
||||
{
|
||||
"data": [...],
|
||||
"pagination": {
|
||||
"page": 2,
|
||||
"limit": 50,
|
||||
"total": 1234,
|
||||
"totalPages": 25,
|
||||
"hasNext": true,
|
||||
"hasPrev": true
|
||||
},
|
||||
"links": {
|
||||
"first": "/api/v1/users?page=1&limit=50",
|
||||
"prev": "/api/v1/users?page=1&limit=50",
|
||||
"next": "/api/v1/users?page=3&limit=50",
|
||||
"last": "/api/v1/users?page=25&limit=50"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Filtering and Sorting
|
||||
|
||||
```
|
||||
GET /api/v1/users?status=active&role=admin&sort=-createdAt,name&limit=20
|
||||
|
||||
# Filters: status=active AND role=admin
|
||||
# Sort: createdAt DESC, name ASC
|
||||
# Limit: 20 results
|
||||
```
|
||||
|
||||
### API Versioning Strategies
|
||||
|
||||
**URL Versioning (Most Common):**
|
||||
```
|
||||
/api/v1/users
|
||||
/api/v2/users
|
||||
```
|
||||
|
||||
**Header Versioning:**
|
||||
```
|
||||
GET /api/users
|
||||
Accept: application/vnd.myapi.v2+json
|
||||
```
|
||||
|
||||
**Query Parameter:**
|
||||
```
|
||||
/api/users?version=2
|
||||
```
|
||||
|
||||
**Recommendation:** URL versioning for simplicity and discoverability
|
||||
|
||||
## GraphQL API Design
|
||||
|
||||
### Schema Definition
|
||||
|
||||
```graphql
|
||||
type User {
|
||||
id: ID!
|
||||
email: String!
|
||||
name: String!
|
||||
posts: [Post!]!
|
||||
createdAt: DateTime!
|
||||
}
|
||||
|
||||
type Post {
|
||||
id: ID!
|
||||
title: String!
|
||||
content: String!
|
||||
author: User!
|
||||
published: Boolean!
|
||||
createdAt: DateTime!
|
||||
}
|
||||
|
||||
type Query {
|
||||
user(id: ID!): User
|
||||
users(limit: Int = 50, offset: Int = 0): [User!]!
|
||||
post(id: ID!): Post
|
||||
posts(authorId: ID, published: Boolean): [Post!]!
|
||||
}
|
||||
|
||||
type Mutation {
|
||||
createUser(input: CreateUserInput!): User!
|
||||
updateUser(id: ID!, input: UpdateUserInput!): User!
|
||||
deleteUser(id: ID!): Boolean!
|
||||
|
||||
createPost(input: CreatePostInput!): Post!
|
||||
publishPost(id: ID!): Post!
|
||||
}
|
||||
|
||||
input CreateUserInput {
|
||||
email: String!
|
||||
name: String!
|
||||
password: String!
|
||||
}
|
||||
|
||||
input UpdateUserInput {
|
||||
email: String
|
||||
name: String
|
||||
}
|
||||
```
|
||||
|
||||
### Queries
|
||||
|
||||
```graphql
|
||||
# Flexible data fetching - client specifies exactly what they need
|
||||
query {
|
||||
user(id: "123") {
|
||||
id
|
||||
name
|
||||
email
|
||||
posts {
|
||||
id
|
||||
title
|
||||
published
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# With variables
|
||||
query GetUser($userId: ID!) {
|
||||
user(id: $userId) {
|
||||
id
|
||||
name
|
||||
posts(published: true) {
|
||||
title
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Mutations
|
||||
|
||||
```graphql
|
||||
mutation CreateUser($input: CreateUserInput!) {
|
||||
createUser(input: $input) {
|
||||
id
|
||||
email
|
||||
name
|
||||
createdAt
|
||||
}
|
||||
}
|
||||
|
||||
# Variables
|
||||
{
|
||||
"input": {
|
||||
"email": "user@example.com",
|
||||
"name": "John Doe",
|
||||
"password": "SecurePass123!"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Resolvers (NestJS Example)
|
||||
|
||||
```typescript
|
||||
@Resolver(() => User)
|
||||
export class UserResolver {
|
||||
constructor(
|
||||
private userService: UserService,
|
||||
private postService: PostService,
|
||||
) {}
|
||||
|
||||
@Query(() => User, { nullable: true })
|
||||
async user(@Args('id') id: string) {
|
||||
return this.userService.findById(id);
|
||||
}
|
||||
|
||||
@Query(() => [User])
|
||||
async users(
|
||||
@Args('limit', { defaultValue: 50 }) limit: number,
|
||||
@Args('offset', { defaultValue: 0 }) offset: number,
|
||||
) {
|
||||
return this.userService.findAll({ limit, offset });
|
||||
}
|
||||
|
||||
@Mutation(() => User)
|
||||
async createUser(@Args('input') input: CreateUserInput) {
|
||||
return this.userService.create(input);
|
||||
}
|
||||
|
||||
// Field resolver - lazy load posts
|
||||
@ResolveField(() => [Post])
|
||||
async posts(@Parent() user: User) {
|
||||
return this.postService.findByAuthorId(user.id);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### GraphQL Best Practices
|
||||
|
||||
1. **Avoid N+1 Problem** - Use DataLoader
|
||||
```typescript
|
||||
import DataLoader from 'dataloader';
|
||||
|
||||
const postLoader = new DataLoader(async (authorIds: string[]) => {
|
||||
const posts = await db.posts.findAll({ where: { authorId: authorIds } });
|
||||
return authorIds.map(id => posts.filter(p => p.authorId === id));
|
||||
});
|
||||
|
||||
// In resolver
|
||||
@ResolveField(() => [Post])
|
||||
async posts(@Parent() user: User) {
|
||||
return this.postLoader.load(user.id);
|
||||
}
|
||||
```
|
||||
|
||||
2. **Pagination** - Relay-style cursor pagination
|
||||
3. **Error Handling** - Return errors in response
|
||||
4. **Depth Limiting** - Prevent deeply nested queries
|
||||
5. **Query Complexity Analysis** - Limit expensive queries
|
||||
|
||||
## gRPC API Design
|
||||
|
||||
### Protocol Buffers Schema
|
||||
|
||||
```protobuf
|
||||
syntax = "proto3";
|
||||
|
||||
package user;
|
||||
|
||||
service UserService {
|
||||
rpc GetUser (GetUserRequest) returns (User);
|
||||
rpc ListUsers (ListUsersRequest) returns (ListUsersResponse);
|
||||
rpc CreateUser (CreateUserRequest) returns (User);
|
||||
rpc UpdateUser (UpdateUserRequest) returns (User);
|
||||
rpc DeleteUser (DeleteUserRequest) returns (DeleteUserResponse);
|
||||
|
||||
// Streaming
|
||||
rpc StreamUsers (StreamUsersRequest) returns (stream User);
|
||||
}
|
||||
|
||||
message User {
|
||||
string id = 1;
|
||||
string email = 2;
|
||||
string name = 3;
|
||||
int64 created_at = 4;
|
||||
}
|
||||
|
||||
message GetUserRequest {
|
||||
string id = 1;
|
||||
}
|
||||
|
||||
message ListUsersRequest {
|
||||
int32 limit = 1;
|
||||
int32 offset = 2;
|
||||
}
|
||||
|
||||
message ListUsersResponse {
|
||||
repeated User users = 1;
|
||||
int32 total = 2;
|
||||
}
|
||||
|
||||
message CreateUserRequest {
|
||||
string email = 1;
|
||||
string name = 2;
|
||||
string password = 3;
|
||||
}
|
||||
```
|
||||
|
||||
### Implementation (Node.js)
|
||||
|
||||
```typescript
|
||||
import * as grpc from '@grpc/grpc-js';
|
||||
import * as protoLoader from '@grpc/proto-loader';
|
||||
|
||||
const packageDefinition = protoLoader.loadSync('user.proto');
|
||||
const userProto = grpc.loadPackageDefinition(packageDefinition).user;
|
||||
|
||||
// Server implementation
|
||||
const server = new grpc.Server();
|
||||
|
||||
server.addService(userProto.UserService.service, {
|
||||
async getUser(call, callback) {
|
||||
const user = await userService.findById(call.request.id);
|
||||
callback(null, user);
|
||||
},
|
||||
|
||||
async createUser(call, callback) {
|
||||
const user = await userService.create(call.request);
|
||||
callback(null, user);
|
||||
},
|
||||
|
||||
async streamUsers(call) {
|
||||
const users = await userService.findAll();
|
||||
for (const user of users) {
|
||||
call.write(user);
|
||||
}
|
||||
call.end();
|
||||
},
|
||||
});
|
||||
|
||||
server.bindAsync(
|
||||
'0.0.0.0:50051',
|
||||
grpc.ServerCredentials.createInsecure(),
|
||||
() => server.start()
|
||||
);
|
||||
```
|
||||
|
||||
### gRPC Benefits
|
||||
|
||||
- **Performance:** 7-10x faster than REST (binary protocol)
|
||||
- **Streaming:** Bi-directional streaming
|
||||
- **Type Safety:** Strong typing via Protocol Buffers
|
||||
- **Code Generation:** Auto-generate client/server code
|
||||
- **Best For:** Internal microservices, high-performance systems
|
||||
|
||||
## API Design Decision Matrix
|
||||
|
||||
| Feature | REST | GraphQL | gRPC |
|
||||
|---------|------|---------|------|
|
||||
| **Use Case** | Public APIs, CRUD | Flexible data fetching | Microservices, performance |
|
||||
| **Performance** | Moderate | Moderate | Fastest (7-10x REST) |
|
||||
| **Caching** | HTTP caching built-in | Complex | No built-in caching |
|
||||
| **Browser Support** | Native | Native | Requires gRPC-Web |
|
||||
| **Learning Curve** | Easy | Moderate | Steep |
|
||||
| **Streaming** | Limited (SSE) | Subscriptions | Bi-directional |
|
||||
| **Tooling** | Excellent | Excellent | Good |
|
||||
| **Documentation** | OpenAPI/Swagger | Schema introspection | Protobuf definition |
|
||||
|
||||
## API Security Checklist
|
||||
|
||||
- [ ] HTTPS/TLS only (no HTTP)
|
||||
- [ ] Authentication (OAuth 2.1, JWT, API keys)
|
||||
- [ ] Authorization (RBAC, check permissions)
|
||||
- [ ] Rate limiting (prevent abuse)
|
||||
- [ ] Input validation (all endpoints)
|
||||
- [ ] CORS configured properly
|
||||
- [ ] Security headers (CSP, HSTS, X-Frame-Options)
|
||||
- [ ] API versioning implemented
|
||||
- [ ] Error messages don't leak system info
|
||||
- [ ] Audit logging (who did what, when)
|
||||
|
||||
## API Documentation
|
||||
|
||||
### OpenAPI/Swagger (REST)
|
||||
|
||||
```yaml
|
||||
openapi: 3.0.0
|
||||
info:
|
||||
title: User API
|
||||
version: 1.0.0
|
||||
paths:
|
||||
/api/v1/users:
|
||||
get:
|
||||
summary: List users
|
||||
parameters:
|
||||
- name: limit
|
||||
in: query
|
||||
schema:
|
||||
type: integer
|
||||
default: 50
|
||||
responses:
|
||||
'200':
|
||||
description: Successful response
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/User'
|
||||
components:
|
||||
schemas:
|
||||
User:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
email:
|
||||
type: string
|
||||
name:
|
||||
type: string
|
||||
```
|
||||
|
||||
## Resources
|
||||
|
||||
- **REST Best Practices:** https://restfulapi.net/
|
||||
- **GraphQL:** https://graphql.org/learn/
|
||||
- **gRPC:** https://grpc.io/docs/
|
||||
- **OpenAPI:** https://swagger.io/specification/
|
||||
@@ -0,0 +1,454 @@
|
||||
# Backend Architecture Patterns
|
||||
|
||||
Microservices, event-driven architecture, and scalability patterns (2025).
|
||||
|
||||
## Monolith vs Microservices
|
||||
|
||||
### Monolithic Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────┐
|
||||
│ Single Application │
|
||||
│ │
|
||||
│ ┌─────────┐ ┌──────────┐ │
|
||||
│ │ Users │ │ Products │ │
|
||||
│ └─────────┘ └──────────┘ │
|
||||
│ ┌─────────┐ ┌──────────┐ │
|
||||
│ │ Orders │ │ Payments │ │
|
||||
│ └─────────┘ └──────────┘ │
|
||||
│ │
|
||||
│ Single Database │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Simple to develop and deploy
|
||||
- Easy local testing
|
||||
- Single codebase
|
||||
- Strong consistency (ACID transactions)
|
||||
|
||||
**Cons:**
|
||||
- Tight coupling
|
||||
- Scaling limitations
|
||||
- Deployment risk (all-or-nothing)
|
||||
- Tech stack lock-in
|
||||
|
||||
**When to Use:** Startups, MVPs, small teams, unclear domain boundaries
|
||||
|
||||
### Microservices Architecture
|
||||
|
||||
```
|
||||
┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐
|
||||
│ User │ │ Product │ │ Order │ │ Payment │
|
||||
│ Service │ │ Service │ │ Service │ │ Service │
|
||||
└────┬─────┘ └────┬─────┘ └────┬─────┘ └────┬─────┘
|
||||
│ │ │ │
|
||||
┌──▼──┐ ┌──▼──┐ ┌──▼──┐ ┌──▼──┐
|
||||
│ DB │ │ DB │ │ DB │ │ DB │
|
||||
└─────┘ └─────┘ └─────┘ └─────┘
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Independent deployment
|
||||
- Technology flexibility
|
||||
- Fault isolation
|
||||
- Easier scaling (scale services independently)
|
||||
|
||||
**Cons:**
|
||||
- Complex deployment
|
||||
- Distributed system challenges (network latency, partial failures)
|
||||
- Data consistency (eventual consistency)
|
||||
- Operational overhead
|
||||
|
||||
**When to Use:** Large teams, clear domain boundaries, need independent scaling, tech diversity
|
||||
|
||||
## Microservices Patterns
|
||||
|
||||
### Database per Service Pattern
|
||||
|
||||
**Concept:** Each service owns its database
|
||||
|
||||
```
|
||||
User Service → User DB (PostgreSQL)
|
||||
Product Service → Product DB (MongoDB)
|
||||
Order Service → Order DB (PostgreSQL)
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- Service independence
|
||||
- Technology choice per service
|
||||
- Fault isolation
|
||||
|
||||
**Challenges:**
|
||||
- No joins across services
|
||||
- Distributed transactions
|
||||
- Data duplication
|
||||
|
||||
### API Gateway Pattern
|
||||
|
||||
```
|
||||
Client
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ API Gateway │ - Authentication
|
||||
│ (Kong/NGINX) │ - Rate limiting
|
||||
└────────┬────────┘ - Request routing
|
||||
│
|
||||
┌────┴────┬────────┬────────┐
|
||||
▼ ▼ ▼ ▼
|
||||
User Product Order Payment
|
||||
Service Service Service Service
|
||||
```
|
||||
|
||||
**Responsibilities:**
|
||||
- Request routing
|
||||
- Authentication/authorization
|
||||
- Rate limiting
|
||||
- Request/response transformation
|
||||
- Caching
|
||||
|
||||
**Implementation (Kong):**
|
||||
```yaml
|
||||
services:
|
||||
- name: user-service
|
||||
url: http://user-service:3000
|
||||
routes:
|
||||
- name: user-route
|
||||
paths:
|
||||
- /api/users
|
||||
|
||||
- name: product-service
|
||||
url: http://product-service:3001
|
||||
routes:
|
||||
- name: product-route
|
||||
paths:
|
||||
- /api/products
|
||||
|
||||
plugins:
|
||||
- name: rate-limiting
|
||||
config:
|
||||
minute: 100
|
||||
- name: jwt
|
||||
```
|
||||
|
||||
### Service Discovery
|
||||
|
||||
**Concept:** Services find each other dynamically
|
||||
|
||||
```typescript
|
||||
// Consul service discovery
|
||||
import Consul from 'consul';
|
||||
|
||||
const consul = new Consul();
|
||||
|
||||
// Register service
|
||||
await consul.agent.service.register({
|
||||
name: 'user-service',
|
||||
address: '192.168.1.10',
|
||||
port: 3000,
|
||||
check: {
|
||||
http: 'http://192.168.1.10:3000/health',
|
||||
interval: '10s',
|
||||
},
|
||||
});
|
||||
|
||||
// Discover service
|
||||
const services = await consul.catalog.service.nodes('product-service');
|
||||
const productServiceUrl = `http://${services[0].ServiceAddress}:${services[0].ServicePort}`;
|
||||
```
|
||||
|
||||
### Circuit Breaker Pattern
|
||||
|
||||
**Concept:** Stop calling failing service, prevent cascade failures
|
||||
|
||||
```typescript
|
||||
import CircuitBreaker from 'opossum';
|
||||
|
||||
const breaker = new CircuitBreaker(callExternalService, {
|
||||
timeout: 3000, // 3s timeout
|
||||
errorThresholdPercentage: 50, // Open circuit after 50% failures
|
||||
resetTimeout: 30000, // Try again after 30s
|
||||
});
|
||||
|
||||
breaker.on('open', () => {
|
||||
console.log('Circuit breaker opened!');
|
||||
});
|
||||
|
||||
breaker.fallback(() => ({
|
||||
data: 'fallback-response',
|
||||
source: 'cache',
|
||||
}));
|
||||
|
||||
const result = await breaker.fire(requestParams);
|
||||
```
|
||||
|
||||
**States:**
|
||||
- **Closed:** Normal operation, requests go through
|
||||
- **Open:** Too many failures, requests fail immediately
|
||||
- **Half-Open:** Testing if service recovered
|
||||
|
||||
### Saga Pattern (Distributed Transactions)
|
||||
|
||||
**Choreography-Based Saga:**
|
||||
```
|
||||
Order Service: Create Order → Publish "OrderCreated"
|
||||
↓
|
||||
Payment Service: Reserve Payment → Publish "PaymentReserved"
|
||||
↓
|
||||
Inventory Service: Reserve Stock → Publish "StockReserved"
|
||||
↓
|
||||
Shipping Service: Create Shipment → Publish "ShipmentCreated"
|
||||
|
||||
If any step fails → Compensating transactions (rollback)
|
||||
```
|
||||
|
||||
**Orchestration-Based Saga:**
|
||||
```
|
||||
Saga Orchestrator
|
||||
↓ Create Order
|
||||
Order Service
|
||||
↓ Reserve Payment
|
||||
Payment Service
|
||||
↓ Reserve Stock
|
||||
Inventory Service
|
||||
↓ Create Shipment
|
||||
Shipping Service
|
||||
```
|
||||
|
||||
## Event-Driven Architecture
|
||||
|
||||
**Impact:** 85% organizations recognize business value
|
||||
|
||||
### Event Sourcing
|
||||
|
||||
**Concept:** Store events, not current state
|
||||
|
||||
```typescript
|
||||
// Traditional: Store current state
|
||||
{
|
||||
userId: '123',
|
||||
balance: 500
|
||||
}
|
||||
|
||||
// Event Sourcing: Store events
|
||||
[
|
||||
{ type: 'AccountCreated', userId: '123', timestamp: '...' },
|
||||
{ type: 'MoneyDeposited', amount: 1000, timestamp: '...' },
|
||||
{ type: 'MoneyWithdrawn', amount: 500, timestamp: '...' },
|
||||
]
|
||||
|
||||
// Reconstruct state by replaying events
|
||||
const balance = events
|
||||
.filter(e => e.userId === '123')
|
||||
.reduce((acc, event) => {
|
||||
if (event.type === 'MoneyDeposited') return acc + event.amount;
|
||||
if (event.type === 'MoneyWithdrawn') return acc - event.amount;
|
||||
return acc;
|
||||
}, 0);
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- Complete audit trail
|
||||
- Temporal queries (state at any point in time)
|
||||
- Event replay for debugging
|
||||
- Flexible projections
|
||||
|
||||
### Message Broker Patterns
|
||||
|
||||
**Kafka (Event Streaming):**
|
||||
```typescript
|
||||
import { Kafka } from 'kafkajs';
|
||||
|
||||
const kafka = new Kafka({
|
||||
clientId: 'order-service',
|
||||
brokers: ['kafka:9092'],
|
||||
});
|
||||
|
||||
// Producer
|
||||
const producer = kafka.producer();
|
||||
await producer.send({
|
||||
topic: 'order-events',
|
||||
messages: [
|
||||
{
|
||||
key: order.id,
|
||||
value: JSON.stringify({
|
||||
type: 'OrderCreated',
|
||||
orderId: order.id,
|
||||
userId: order.userId,
|
||||
total: order.total,
|
||||
}),
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Consumer
|
||||
const consumer = kafka.consumer({ groupId: 'inventory-service' });
|
||||
await consumer.subscribe({ topic: 'order-events' });
|
||||
await consumer.run({
|
||||
eachMessage: async ({ topic, partition, message }) => {
|
||||
const event = JSON.parse(message.value.toString());
|
||||
if (event.type === 'OrderCreated') {
|
||||
await reserveInventory(event.orderId);
|
||||
}
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
**RabbitMQ (Task Queues):**
|
||||
```typescript
|
||||
import amqp from 'amqplib';
|
||||
|
||||
const connection = await amqp.connect('amqp://localhost');
|
||||
const channel = await connection.createChannel();
|
||||
|
||||
// Producer
|
||||
await channel.assertQueue('email-queue', { durable: true });
|
||||
channel.sendToQueue('email-queue', Buffer.from(JSON.stringify({
|
||||
to: user.email,
|
||||
subject: 'Welcome!',
|
||||
body: 'Thank you for signing up',
|
||||
})));
|
||||
|
||||
// Consumer
|
||||
await channel.consume('email-queue', async (msg) => {
|
||||
const emailData = JSON.parse(msg.content.toString());
|
||||
await sendEmail(emailData);
|
||||
channel.ack(msg);
|
||||
});
|
||||
```
|
||||
|
||||
## CQRS (Command Query Responsibility Segregation)
|
||||
|
||||
**Concept:** Separate read and write models
|
||||
|
||||
```
|
||||
Write Side (Commands): Read Side (Queries):
|
||||
CreateOrder GetOrderById
|
||||
UpdateOrder GetUserOrders
|
||||
↓ ↑
|
||||
┌─────────┐ ┌─────────┐
|
||||
│ Write │ → Events → │ Read │
|
||||
│ DB │ (sync) │ DB │
|
||||
│(Postgres) │(MongoDB)│
|
||||
└─────────┘ └─────────┘
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- Optimized read models
|
||||
- Scalable (scale reads independently)
|
||||
- Flexible (different DB for reads/writes)
|
||||
|
||||
**Implementation:**
|
||||
```typescript
|
||||
// Command (Write)
|
||||
class CreateOrderCommand {
|
||||
constructor(public userId: string, public items: OrderItem[]) {}
|
||||
}
|
||||
|
||||
class CreateOrderHandler {
|
||||
async execute(command: CreateOrderCommand) {
|
||||
const order = await Order.create(command);
|
||||
await eventBus.publish(new OrderCreatedEvent(order));
|
||||
return order.id;
|
||||
}
|
||||
}
|
||||
|
||||
// Query (Read)
|
||||
class GetOrderQuery {
|
||||
constructor(public orderId: string) {}
|
||||
}
|
||||
|
||||
class GetOrderHandler {
|
||||
async execute(query: GetOrderQuery) {
|
||||
// Read from optimized read model
|
||||
return await OrderReadModel.findById(query.orderId);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Scalability Patterns
|
||||
|
||||
### Horizontal Scaling (Scale Out)
|
||||
|
||||
```
|
||||
Load Balancer
|
||||
↓
|
||||
┌───┴───┬───────┬───────┐
|
||||
│ App 1 │ App 2 │ App 3 │ ... App N
|
||||
└───┬───┴───┬───┴───┬───┘
|
||||
└───────┴───────┘
|
||||
↓
|
||||
Shared Database
|
||||
(with read replicas)
|
||||
```
|
||||
|
||||
### Database Sharding
|
||||
|
||||
**Range-Based Sharding:**
|
||||
```
|
||||
Users 1-1M → Shard 1
|
||||
Users 1M-2M → Shard 2
|
||||
Users 2M-3M → Shard 3
|
||||
```
|
||||
|
||||
**Hash-Based Sharding:**
|
||||
```typescript
|
||||
function getShardId(userId: string): number {
|
||||
const hash = crypto.createHash('md5').update(userId).digest('hex');
|
||||
return parseInt(hash.substring(0, 8), 16) % SHARD_COUNT;
|
||||
}
|
||||
|
||||
const shardId = getShardId(userId);
|
||||
const db = shards[shardId];
|
||||
const user = await db.users.findById(userId);
|
||||
```
|
||||
|
||||
### Caching Layers
|
||||
|
||||
```
|
||||
Client
|
||||
→ CDN (static assets)
|
||||
→ API Gateway Cache (public endpoints)
|
||||
→ Application Cache (Redis - user sessions, hot data)
|
||||
→ Database Query Cache
|
||||
→ Database
|
||||
```
|
||||
|
||||
## Architecture Decision Matrix
|
||||
|
||||
| Pattern | When to Use | Complexity | Benefits |
|
||||
|---------|-------------|------------|----------|
|
||||
| **Monolith** | Small team, MVP, unclear boundaries | Low | Simple, fast development |
|
||||
| **Microservices** | Large team, clear domains, need scaling | High | Independent deployment, fault isolation |
|
||||
| **Event-Driven** | Async workflows, audit trail needed | Moderate | Decoupling, scalability |
|
||||
| **CQRS** | Different read/write patterns | High | Optimized queries, scalability |
|
||||
| **Serverless** | Spiky traffic, event-driven | Low | Auto-scaling, pay-per-use |
|
||||
|
||||
## Anti-Patterns to Avoid
|
||||
|
||||
1. **Distributed Monolith** - Microservices that all depend on each other
|
||||
2. **Chatty Services** - Too many inter-service calls (network overhead)
|
||||
3. **Shared Database** - Microservices sharing same DB (tight coupling)
|
||||
4. **Over-Engineering** - Using microservices for small apps
|
||||
5. **No Circuit Breakers** - Cascade failures in distributed systems
|
||||
|
||||
## Architecture Checklist
|
||||
|
||||
- [ ] Clear service boundaries (domain-driven design)
|
||||
- [ ] Database per service (no shared databases)
|
||||
- [ ] API Gateway for client requests
|
||||
- [ ] Service discovery configured
|
||||
- [ ] Circuit breakers for resilience
|
||||
- [ ] Event-driven communication (Kafka/RabbitMQ)
|
||||
- [ ] CQRS for read-heavy systems
|
||||
- [ ] Distributed tracing (Jaeger/OpenTelemetry)
|
||||
- [ ] Health checks for all services
|
||||
- [ ] Horizontal scaling capability
|
||||
|
||||
## Resources
|
||||
|
||||
- **Microservices Patterns:** https://microservices.io/patterns/
|
||||
- **Martin Fowler - Microservices:** https://martinfowler.com/articles/microservices.html
|
||||
- **Event-Driven Architecture:** https://aws.amazon.com/event-driven-architecture/
|
||||
- **CQRS Pattern:** https://martinfowler.com/bliki/CQRS.html
|
||||
@@ -0,0 +1,338 @@
|
||||
# Backend Authentication & Authorization
|
||||
|
||||
Modern authentication patterns including OAuth 2.1, JWT, RBAC, and MFA (2025 standards).
|
||||
|
||||
## OAuth 2.1 (2025 Standard)
|
||||
|
||||
### Key Changes from OAuth 2.0
|
||||
|
||||
**Mandatory:**
|
||||
- PKCE (Proof Key for Code Exchange) for all clients
|
||||
- Exact redirect URI matching
|
||||
- State parameter for CSRF protection
|
||||
|
||||
**Deprecated:**
|
||||
- Implicit grant flow (security risk)
|
||||
- Resource owner password credentials grant
|
||||
- Bearer token in query strings
|
||||
|
||||
### Authorization Code Flow with PKCE
|
||||
|
||||
```typescript
|
||||
// Step 1: Generate code verifier and challenge
|
||||
import crypto from 'crypto';
|
||||
|
||||
const codeVerifier = crypto.randomBytes(32).toString('base64url');
|
||||
const codeChallenge = crypto
|
||||
.createHash('sha256')
|
||||
.update(codeVerifier)
|
||||
.digest('base64url');
|
||||
|
||||
// Step 2: Redirect to authorization endpoint
|
||||
const authUrl = new URL('https://auth.example.com/authorize');
|
||||
authUrl.searchParams.set('client_id', 'your-client-id');
|
||||
authUrl.searchParams.set('redirect_uri', 'https://app.example.com/callback');
|
||||
authUrl.searchParams.set('response_type', 'code');
|
||||
authUrl.searchParams.set('scope', 'openid profile email');
|
||||
authUrl.searchParams.set('state', crypto.randomBytes(16).toString('hex'));
|
||||
authUrl.searchParams.set('code_challenge', codeChallenge);
|
||||
authUrl.searchParams.set('code_challenge_method', 'S256');
|
||||
|
||||
// Step 3: Exchange code for token (with code_verifier)
|
||||
const tokenResponse = await fetch('https://auth.example.com/token', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
|
||||
body: new URLSearchParams({
|
||||
grant_type: 'authorization_code',
|
||||
code: authCode,
|
||||
redirect_uri: redirectUri,
|
||||
client_id: clientId,
|
||||
code_verifier: codeVerifier,
|
||||
}),
|
||||
});
|
||||
```
|
||||
|
||||
## JWT (JSON Web Tokens)
|
||||
|
||||
### Structure
|
||||
|
||||
```
|
||||
Header.Payload.Signature
|
||||
eyJhbGciOi... . eyJzdWIiOi... . SflKxwRJ...
|
||||
```
|
||||
|
||||
### Best Practices (2025)
|
||||
|
||||
1. **Short expiration** - Access tokens: 15 minutes, Refresh tokens: 7 days
|
||||
2. **Use RS256** - Asymmetric signing (not HS256 for public APIs)
|
||||
3. **Validate everything** - Signature, issuer, audience, expiration
|
||||
4. **Include minimal claims** - Don't include sensitive data
|
||||
5. **Refresh token rotation** - Issue new refresh token on each use
|
||||
|
||||
### Implementation
|
||||
|
||||
```typescript
|
||||
import jwt from 'jsonwebtoken';
|
||||
|
||||
// Generate JWT
|
||||
const accessToken = jwt.sign(
|
||||
{
|
||||
sub: user.id,
|
||||
email: user.email,
|
||||
roles: user.roles,
|
||||
},
|
||||
process.env.JWT_PRIVATE_KEY,
|
||||
{
|
||||
algorithm: 'RS256',
|
||||
expiresIn: '15m',
|
||||
issuer: 'https://api.example.com',
|
||||
audience: 'https://app.example.com',
|
||||
}
|
||||
);
|
||||
|
||||
// Verify JWT
|
||||
const decoded = jwt.verify(token, process.env.JWT_PUBLIC_KEY, {
|
||||
algorithms: ['RS256'],
|
||||
issuer: 'https://api.example.com',
|
||||
audience: 'https://app.example.com',
|
||||
});
|
||||
```
|
||||
|
||||
## Role-Based Access Control (RBAC)
|
||||
|
||||
### RBAC Model
|
||||
|
||||
```
|
||||
Users → Roles → Permissions → Resources
|
||||
```
|
||||
|
||||
### Implementation (NestJS Example)
|
||||
|
||||
```typescript
|
||||
// Define roles
|
||||
export enum Role {
|
||||
ADMIN = 'admin',
|
||||
EDITOR = 'editor',
|
||||
VIEWER = 'viewer',
|
||||
}
|
||||
|
||||
// Role decorator
|
||||
export const Roles = (...roles: Role[]) => SetMetadata('roles', roles);
|
||||
|
||||
// Guard implementation
|
||||
@Injectable()
|
||||
export class RolesGuard implements CanActivate {
|
||||
constructor(private reflector: Reflector) {}
|
||||
|
||||
canActivate(context: ExecutionContext): boolean {
|
||||
const requiredRoles = this.reflector.get<Role[]>('roles', context.getHandler());
|
||||
if (!requiredRoles) return true;
|
||||
|
||||
const request = context.switchToHttp().getRequest();
|
||||
const user = request.user;
|
||||
|
||||
return requiredRoles.some((role) => user.roles?.includes(role));
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
@Post()
|
||||
@UseGuards(JwtAuthGuard, RolesGuard)
|
||||
@Roles(Role.ADMIN, Role.EDITOR)
|
||||
async createPost(@Body() createPostDto: CreatePostDto) {
|
||||
return this.postsService.create(createPostDto);
|
||||
}
|
||||
```
|
||||
|
||||
### RBAC Best Practices
|
||||
|
||||
1. **Deny by default** - Explicitly grant permissions
|
||||
2. **Least privilege** - Minimum permissions needed
|
||||
3. **Role hierarchy** - Admin inherits Editor inherits Viewer
|
||||
4. **Separate roles and permissions** - Flexible permission assignment
|
||||
5. **Audit trail** - Log role changes and access
|
||||
|
||||
## Multi-Factor Authentication (MFA)
|
||||
|
||||
### TOTP (Time-Based One-Time Password)
|
||||
|
||||
```typescript
|
||||
import speakeasy from 'speakeasy';
|
||||
import QRCode from 'qrcode';
|
||||
|
||||
// Generate secret
|
||||
const secret = speakeasy.generateSecret({
|
||||
name: 'MyApp',
|
||||
issuer: 'MyCompany',
|
||||
});
|
||||
|
||||
// Generate QR code for user
|
||||
const qrCode = await QRCode.toDataURL(secret.otpauth_url);
|
||||
|
||||
// Verify TOTP token
|
||||
const verified = speakeasy.totp.verify({
|
||||
secret: secret.base32,
|
||||
encoding: 'base32',
|
||||
token: userToken,
|
||||
window: 2, // Allow 2 time steps drift
|
||||
});
|
||||
```
|
||||
|
||||
### FIDO2/WebAuthn (Passwordless - 2025 Standard)
|
||||
|
||||
**Benefits:**
|
||||
- Phishing-resistant
|
||||
- No shared secrets
|
||||
- Hardware-backed security
|
||||
- Better UX (biometrics, security keys)
|
||||
|
||||
**Implementation:**
|
||||
```typescript
|
||||
// Registration
|
||||
const publicKeyCredentialCreationOptions = {
|
||||
challenge: crypto.randomBytes(32),
|
||||
rp: { name: 'MyApp', id: 'example.com' },
|
||||
user: {
|
||||
id: Buffer.from(user.id),
|
||||
name: user.email,
|
||||
displayName: user.name,
|
||||
},
|
||||
pubKeyCredParams: [{ alg: -7, type: 'public-key' }], // ES256
|
||||
authenticatorSelection: {
|
||||
authenticatorAttachment: 'platform', // 'platform' or 'cross-platform'
|
||||
userVerification: 'required',
|
||||
},
|
||||
timeout: 60000,
|
||||
attestation: 'direct',
|
||||
};
|
||||
|
||||
// Use @simplewebauthn/server library
|
||||
import { verifyRegistrationResponse, verifyAuthenticationResponse } from '@simplewebauthn/server';
|
||||
```
|
||||
|
||||
## Session Management
|
||||
|
||||
### Best Practices
|
||||
|
||||
1. **Secure cookies** - HttpOnly, Secure, SameSite=Strict
|
||||
2. **Session timeout** - Idle: 15 minutes, Absolute: 8 hours
|
||||
3. **Regenerate session ID** - After login, privilege elevation
|
||||
4. **Server-side storage** - Redis for distributed systems
|
||||
5. **CSRF protection** - SameSite cookies + CSRF tokens
|
||||
|
||||
### Implementation
|
||||
|
||||
```typescript
|
||||
import session from 'express-session';
|
||||
import RedisStore from 'connect-redis';
|
||||
import { createClient } from 'redis';
|
||||
|
||||
const redisClient = createClient();
|
||||
await redisClient.connect();
|
||||
|
||||
app.use(
|
||||
session({
|
||||
store: new RedisStore({ client: redisClient }),
|
||||
secret: process.env.SESSION_SECRET,
|
||||
resave: false,
|
||||
saveUninitialized: false,
|
||||
cookie: {
|
||||
secure: true, // HTTPS only
|
||||
httpOnly: true, // No JavaScript access
|
||||
sameSite: 'strict', // CSRF protection
|
||||
maxAge: 1000 * 60 * 15, // 15 minutes
|
||||
},
|
||||
})
|
||||
);
|
||||
```
|
||||
|
||||
## Password Security
|
||||
|
||||
### Argon2id (2025 Standard - Replaces bcrypt)
|
||||
|
||||
**Why Argon2id:**
|
||||
- Winner of Password Hashing Competition (2015)
|
||||
- Memory-hard (resistant to GPU/ASIC attacks)
|
||||
- Configurable CPU and memory cost
|
||||
- Combines Argon2i (data-independent) + Argon2d (data-dependent)
|
||||
|
||||
```typescript
|
||||
import argon2 from 'argon2';
|
||||
|
||||
// Hash password
|
||||
const hash = await argon2.hash('password123', {
|
||||
type: argon2.argon2id,
|
||||
memoryCost: 65536, // 64 MB
|
||||
timeCost: 3, // 3 iterations
|
||||
parallelism: 4, // 4 threads
|
||||
});
|
||||
|
||||
// Verify password
|
||||
const valid = await argon2.verify(hash, 'password123');
|
||||
```
|
||||
|
||||
### Password Policy (2025 NIST Guidelines)
|
||||
|
||||
- **Minimum length:** 12 characters (not 8)
|
||||
- **No composition rules** - Allow passphrases
|
||||
- **Check against breach databases** - HaveIBeenPwned API
|
||||
- **No periodic rotation** - Only on compromise
|
||||
- **Allow all printable characters** - Including spaces, emojis
|
||||
|
||||
## API Key Authentication
|
||||
|
||||
### Best Practices
|
||||
|
||||
1. **Prefix keys** - `sk_live_`, `pk_test_` (identify type/environment)
|
||||
2. **Hash stored keys** - Store SHA-256 hash, not plaintext
|
||||
3. **Key rotation** - Allow users to rotate keys
|
||||
4. **Scope limiting** - Separate keys for read/write operations
|
||||
5. **Rate limiting** - Per API key limits
|
||||
|
||||
```typescript
|
||||
// Generate API key
|
||||
const apiKey = `sk_${env}_${crypto.randomBytes(24).toString('base64url')}`;
|
||||
|
||||
// Store hashed version
|
||||
const hashedKey = crypto.createHash('sha256').update(apiKey).digest('hex');
|
||||
await db.apiKeys.create({ userId, hashedKey, scopes: ['read'] });
|
||||
|
||||
// Validate API key
|
||||
const providedHash = crypto.createHash('sha256').update(providedKey).digest('hex');
|
||||
const keyRecord = await db.apiKeys.findOne({ hashedKey: providedHash });
|
||||
```
|
||||
|
||||
## Authentication Decision Matrix
|
||||
|
||||
| Use Case | Recommended Approach |
|
||||
|----------|---------------------|
|
||||
| Web application | OAuth 2.1 + JWT |
|
||||
| Mobile app | OAuth 2.1 + PKCE |
|
||||
| SPA (Single Page App) | OAuth 2.1 Authorization Code + PKCE |
|
||||
| Server-to-server | Client credentials grant + mTLS |
|
||||
| Third-party API access | API keys with scopes |
|
||||
| High-security | WebAuthn/FIDO2 + MFA |
|
||||
| Internal admin | JWT + RBAC + MFA |
|
||||
| Microservices | Service mesh (mTLS) + JWT |
|
||||
|
||||
## Security Checklist
|
||||
|
||||
- [ ] OAuth 2.1 with PKCE implemented
|
||||
- [ ] JWT tokens expire in 15 minutes
|
||||
- [ ] Refresh token rotation enabled
|
||||
- [ ] RBAC with deny-by-default
|
||||
- [ ] MFA required for admin accounts
|
||||
- [ ] Passwords hashed with Argon2id
|
||||
- [ ] Session cookies: HttpOnly, Secure, SameSite
|
||||
- [ ] Rate limiting on auth endpoints (10 attempts/15 min)
|
||||
- [ ] Account lockout after failed attempts
|
||||
- [ ] Password policy: 12+ chars, breach check
|
||||
- [ ] Audit logging for authentication events
|
||||
|
||||
## Resources
|
||||
|
||||
- **OAuth 2.1:** https://oauth.net/2.1/
|
||||
- **JWT Best Practices:** https://datatracker.ietf.org/doc/html/rfc8725
|
||||
- **WebAuthn:** https://webauthn.guide/
|
||||
- **NIST Password Guidelines:** https://pages.nist.gov/800-63-3/
|
||||
- **OWASP Auth Cheat Sheet:** https://cheatsheetseries.owasp.org/cheatsheets/Authentication_Cheat_Sheet.html
|
||||
@@ -0,0 +1,659 @@
|
||||
# Backend Code Quality
|
||||
|
||||
SOLID principles, design patterns, clean code practices, and refactoring strategies (2025).
|
||||
|
||||
## SOLID Principles
|
||||
|
||||
### Single Responsibility Principle (SRP)
|
||||
|
||||
**Concept:** Class/module should have one reason to change
|
||||
|
||||
**Bad:**
|
||||
```typescript
|
||||
class User {
|
||||
saveToDatabase() { /* ... */ }
|
||||
sendWelcomeEmail() { /* ... */ }
|
||||
generateReport() { /* ... */ }
|
||||
validateInput() { /* ... */ }
|
||||
}
|
||||
```
|
||||
|
||||
**Good:**
|
||||
```typescript
|
||||
class User {
|
||||
constructor(public id: string, public email: string, public name: string) {}
|
||||
}
|
||||
|
||||
class UserRepository {
|
||||
async save(user: User) { /* ... */ }
|
||||
async findById(id: string) { /* ... */ }
|
||||
}
|
||||
|
||||
class EmailService {
|
||||
async sendWelcomeEmail(user: User) { /* ... */ }
|
||||
}
|
||||
|
||||
class UserValidator {
|
||||
validate(userData: any) { /* ... */ }
|
||||
}
|
||||
|
||||
class ReportGenerator {
|
||||
generateUserReport(user: User) { /* ... */ }
|
||||
}
|
||||
```
|
||||
|
||||
### Open/Closed Principle (OCP)
|
||||
|
||||
**Concept:** Open for extension, closed for modification
|
||||
|
||||
**Bad:**
|
||||
```typescript
|
||||
class PaymentProcessor {
|
||||
process(amount: number, method: string) {
|
||||
if (method === 'stripe') {
|
||||
// Stripe logic
|
||||
} else if (method === 'paypal') {
|
||||
// PayPal logic
|
||||
}
|
||||
// Adding new payment method requires modifying this class
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Good (Strategy Pattern):**
|
||||
```typescript
|
||||
interface PaymentStrategy {
|
||||
process(amount: number): Promise<PaymentResult>;
|
||||
}
|
||||
|
||||
class StripePayment implements PaymentStrategy {
|
||||
async process(amount: number) {
|
||||
// Stripe-specific logic
|
||||
return { success: true, transactionId: '...' };
|
||||
}
|
||||
}
|
||||
|
||||
class PayPalPayment implements PaymentStrategy {
|
||||
async process(amount: number) {
|
||||
// PayPal-specific logic
|
||||
return { success: true, transactionId: '...' };
|
||||
}
|
||||
}
|
||||
|
||||
class PaymentProcessor {
|
||||
constructor(private strategy: PaymentStrategy) {}
|
||||
|
||||
async process(amount: number) {
|
||||
return this.strategy.process(amount);
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
const processor = new PaymentProcessor(new StripePayment());
|
||||
await processor.process(100);
|
||||
```
|
||||
|
||||
### Liskov Substitution Principle (LSP)
|
||||
|
||||
**Concept:** Subtypes must be substitutable for base types
|
||||
|
||||
**Bad:**
|
||||
```typescript
|
||||
class Bird {
|
||||
fly() { /* ... */ }
|
||||
}
|
||||
|
||||
class Penguin extends Bird {
|
||||
fly() {
|
||||
throw new Error('Penguins cannot fly!');
|
||||
}
|
||||
}
|
||||
|
||||
// Violates LSP - Penguin breaks Bird contract
|
||||
```
|
||||
|
||||
**Good:**
|
||||
```typescript
|
||||
interface Bird {
|
||||
move(): void;
|
||||
}
|
||||
|
||||
class FlyingBird implements Bird {
|
||||
move() {
|
||||
this.fly();
|
||||
}
|
||||
private fly() { /* ... */ }
|
||||
}
|
||||
|
||||
class Penguin implements Bird {
|
||||
move() {
|
||||
this.swim();
|
||||
}
|
||||
private swim() { /* ... */ }
|
||||
}
|
||||
```
|
||||
|
||||
### Interface Segregation Principle (ISP)
|
||||
|
||||
**Concept:** Clients shouldn't depend on interfaces they don't use
|
||||
|
||||
**Bad:**
|
||||
```typescript
|
||||
interface Worker {
|
||||
work(): void;
|
||||
eat(): void;
|
||||
sleep(): void;
|
||||
}
|
||||
|
||||
class Robot implements Worker {
|
||||
work() { /* ... */ }
|
||||
eat() { throw new Error('Robots don't eat'); }
|
||||
sleep() { throw new Error('Robots don't sleep'); }
|
||||
}
|
||||
```
|
||||
|
||||
**Good:**
|
||||
```typescript
|
||||
interface Workable {
|
||||
work(): void;
|
||||
}
|
||||
|
||||
interface Eatable {
|
||||
eat(): void;
|
||||
}
|
||||
|
||||
interface Sleepable {
|
||||
sleep(): void;
|
||||
}
|
||||
|
||||
class Human implements Workable, Eatable, Sleepable {
|
||||
work() { /* ... */ }
|
||||
eat() { /* ... */ }
|
||||
sleep() { /* ... */ }
|
||||
}
|
||||
|
||||
class Robot implements Workable {
|
||||
work() { /* ... */ }
|
||||
}
|
||||
```
|
||||
|
||||
### Dependency Inversion Principle (DIP)
|
||||
|
||||
**Concept:** Depend on abstractions, not concretions
|
||||
|
||||
**Bad:**
|
||||
```typescript
|
||||
class MySQLDatabase {
|
||||
query(sql: string) { /* ... */ }
|
||||
}
|
||||
|
||||
class UserService {
|
||||
private db = new MySQLDatabase(); // Tight coupling
|
||||
|
||||
async getUser(id: string) {
|
||||
return this.db.query(`SELECT * FROM users WHERE id = ${id}`);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Good (Dependency Injection):**
|
||||
```typescript
|
||||
interface Database {
|
||||
query(sql: string, params: any[]): Promise<any>;
|
||||
}
|
||||
|
||||
class MySQLDatabase implements Database {
|
||||
async query(sql: string, params: any[]) { /* ... */ }
|
||||
}
|
||||
|
||||
class PostgreSQLDatabase implements Database {
|
||||
async query(sql: string, params: any[]) { /* ... */ }
|
||||
}
|
||||
|
||||
class UserService {
|
||||
constructor(private db: Database) {} // Injected dependency
|
||||
|
||||
async getUser(id: string) {
|
||||
return this.db.query('SELECT * FROM users WHERE id = $1', [id]);
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
const db = new PostgreSQLDatabase();
|
||||
const userService = new UserService(db);
|
||||
```
|
||||
|
||||
## Design Patterns
|
||||
|
||||
### Repository Pattern
|
||||
|
||||
**Concept:** Abstraction layer between business logic and data access
|
||||
|
||||
```typescript
|
||||
// Domain entity
|
||||
class User {
|
||||
constructor(
|
||||
public id: string,
|
||||
public email: string,
|
||||
public name: string,
|
||||
) {}
|
||||
}
|
||||
|
||||
// Repository interface
|
||||
interface UserRepository {
|
||||
findById(id: string): Promise<User | null>;
|
||||
findByEmail(email: string): Promise<User | null>;
|
||||
save(user: User): Promise<void>;
|
||||
delete(id: string): Promise<void>;
|
||||
}
|
||||
|
||||
// Implementation
|
||||
class PostgresUserRepository implements UserRepository {
|
||||
constructor(private db: Database) {}
|
||||
|
||||
async findById(id: string): Promise<User | null> {
|
||||
const row = await this.db.query('SELECT * FROM users WHERE id = $1', [id]);
|
||||
return row ? new User(row.id, row.email, row.name) : null;
|
||||
}
|
||||
|
||||
async save(user: User): Promise<void> {
|
||||
await this.db.query(
|
||||
'INSERT INTO users (id, email, name) VALUES ($1, $2, $3)',
|
||||
[user.id, user.email, user.name]
|
||||
);
|
||||
}
|
||||
|
||||
// Other methods...
|
||||
}
|
||||
|
||||
// Service layer uses repository
|
||||
class UserService {
|
||||
constructor(private userRepo: UserRepository) {}
|
||||
|
||||
async getUser(id: string) {
|
||||
return this.userRepo.findById(id);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Factory Pattern
|
||||
|
||||
**Concept:** Create objects without specifying exact class
|
||||
|
||||
```typescript
|
||||
interface Notification {
|
||||
send(message: string): Promise<void>;
|
||||
}
|
||||
|
||||
class EmailNotification implements Notification {
|
||||
async send(message: string) {
|
||||
console.log(`Email sent: ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
class SMSNotification implements Notification {
|
||||
async send(message: string) {
|
||||
console.log(`SMS sent: ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
class PushNotification implements Notification {
|
||||
async send(message: string) {
|
||||
console.log(`Push notification sent: ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
class NotificationFactory {
|
||||
static create(type: 'email' | 'sms' | 'push'): Notification {
|
||||
switch (type) {
|
||||
case 'email':
|
||||
return new EmailNotification();
|
||||
case 'sms':
|
||||
return new SMSNotification();
|
||||
case 'push':
|
||||
return new PushNotification();
|
||||
default:
|
||||
throw new Error(`Unknown notification type: ${type}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
const notification = NotificationFactory.create('email');
|
||||
await notification.send('Hello!');
|
||||
```
|
||||
|
||||
### Decorator Pattern
|
||||
|
||||
**Concept:** Add behavior to objects dynamically
|
||||
|
||||
```typescript
|
||||
interface Coffee {
|
||||
cost(): number;
|
||||
description(): string;
|
||||
}
|
||||
|
||||
class SimpleCoffee implements Coffee {
|
||||
cost() {
|
||||
return 10;
|
||||
}
|
||||
|
||||
description() {
|
||||
return 'Simple coffee';
|
||||
}
|
||||
}
|
||||
|
||||
class MilkDecorator implements Coffee {
|
||||
constructor(private coffee: Coffee) {}
|
||||
|
||||
cost() {
|
||||
return this.coffee.cost() + 2;
|
||||
}
|
||||
|
||||
description() {
|
||||
return `${this.coffee.description()}, milk`;
|
||||
}
|
||||
}
|
||||
|
||||
class SugarDecorator implements Coffee {
|
||||
constructor(private coffee: Coffee) {}
|
||||
|
||||
cost() {
|
||||
return this.coffee.cost() + 1;
|
||||
}
|
||||
|
||||
description() {
|
||||
return `${this.coffee.description()}, sugar`;
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
let coffee: Coffee = new SimpleCoffee();
|
||||
coffee = new MilkDecorator(coffee);
|
||||
coffee = new SugarDecorator(coffee);
|
||||
|
||||
console.log(coffee.description()); // "Simple coffee, milk, sugar"
|
||||
console.log(coffee.cost()); // 13
|
||||
```
|
||||
|
||||
### Observer Pattern (Pub/Sub)
|
||||
|
||||
**Concept:** Notify multiple objects about state changes
|
||||
|
||||
```typescript
|
||||
interface Observer {
|
||||
update(event: any): void;
|
||||
}
|
||||
|
||||
class EventEmitter {
|
||||
private observers: Map<string, Observer[]> = new Map();
|
||||
|
||||
subscribe(event: string, observer: Observer) {
|
||||
if (!this.observers.has(event)) {
|
||||
this.observers.set(event, []);
|
||||
}
|
||||
this.observers.get(event)!.push(observer);
|
||||
}
|
||||
|
||||
emit(event: string, data: any) {
|
||||
const observers = this.observers.get(event) || [];
|
||||
observers.forEach(observer => observer.update(data));
|
||||
}
|
||||
}
|
||||
|
||||
// Observers
|
||||
class EmailNotifier implements Observer {
|
||||
update(event: any) {
|
||||
console.log(`Sending email about: ${event.type}`);
|
||||
}
|
||||
}
|
||||
|
||||
class LoggerObserver implements Observer {
|
||||
update(event: any) {
|
||||
console.log(`Logging event: ${JSON.stringify(event)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
const eventEmitter = new EventEmitter();
|
||||
eventEmitter.subscribe('user.created', new EmailNotifier());
|
||||
eventEmitter.subscribe('user.created', new LoggerObserver());
|
||||
|
||||
eventEmitter.emit('user.created', { type: 'user.created', userId: '123' });
|
||||
```
|
||||
|
||||
## Clean Code Practices
|
||||
|
||||
### Meaningful Names
|
||||
|
||||
**Bad:**
|
||||
```typescript
|
||||
function d(a: number, b: number) {
|
||||
return a * b * 0.0254;
|
||||
}
|
||||
```
|
||||
|
||||
**Good:**
|
||||
```typescript
|
||||
function calculateAreaInMeters(widthInInches: number, heightInInches: number) {
|
||||
const INCHES_TO_METERS = 0.0254;
|
||||
return widthInInches * heightInInches * INCHES_TO_METERS;
|
||||
}
|
||||
```
|
||||
|
||||
### Small Functions
|
||||
|
||||
**Bad:**
|
||||
```typescript
|
||||
async function processOrder(orderId: string) {
|
||||
// 200 lines of code doing everything
|
||||
// - validate order
|
||||
// - check inventory
|
||||
// - process payment
|
||||
// - update database
|
||||
// - send notifications
|
||||
// - generate invoice
|
||||
}
|
||||
```
|
||||
|
||||
**Good:**
|
||||
```typescript
|
||||
async function processOrder(orderId: string) {
|
||||
const order = await validateOrder(orderId);
|
||||
await checkInventory(order);
|
||||
const payment = await processPayment(order);
|
||||
await updateOrderStatus(orderId, 'paid');
|
||||
await sendConfirmationEmail(order);
|
||||
await generateInvoice(order, payment);
|
||||
}
|
||||
```
|
||||
|
||||
### Avoid Magic Numbers
|
||||
|
||||
**Bad:**
|
||||
```typescript
|
||||
if (user.age < 18) {
|
||||
throw new Error('Too young');
|
||||
}
|
||||
|
||||
setTimeout(fetchData, 86400000);
|
||||
```
|
||||
|
||||
**Good:**
|
||||
```typescript
|
||||
const MINIMUM_AGE = 18;
|
||||
if (user.age < MINIMUM_AGE) {
|
||||
throw new Error('Too young');
|
||||
}
|
||||
|
||||
const ONE_DAY_IN_MS = 24 * 60 * 60 * 1000;
|
||||
setTimeout(fetchData, ONE_DAY_IN_MS);
|
||||
```
|
||||
|
||||
### Error Handling
|
||||
|
||||
**Bad:**
|
||||
```typescript
|
||||
try {
|
||||
const user = await db.findUser(id);
|
||||
return user;
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
return null;
|
||||
}
|
||||
```
|
||||
|
||||
**Good:**
|
||||
```typescript
|
||||
try {
|
||||
const user = await db.findUser(id);
|
||||
if (!user) {
|
||||
throw new UserNotFoundError(id);
|
||||
}
|
||||
return user;
|
||||
} catch (error) {
|
||||
logger.error('Failed to fetch user', {
|
||||
userId: id,
|
||||
error: error.message,
|
||||
stack: error.stack,
|
||||
});
|
||||
throw new DatabaseError('User fetch failed', { cause: error });
|
||||
}
|
||||
```
|
||||
|
||||
### Don't Repeat Yourself (DRY)
|
||||
|
||||
**Bad:**
|
||||
```typescript
|
||||
app.post('/api/users', async (req, res) => {
|
||||
if (!req.body.email || !req.body.email.includes('@')) {
|
||||
return res.status(400).json({ error: 'Invalid email' });
|
||||
}
|
||||
// ...
|
||||
});
|
||||
|
||||
app.put('/api/users/:id', async (req, res) => {
|
||||
if (!req.body.email || !req.body.email.includes('@')) {
|
||||
return res.status(400).json({ error: 'Invalid email' });
|
||||
}
|
||||
// ...
|
||||
});
|
||||
```
|
||||
|
||||
**Good:**
|
||||
```typescript
|
||||
function validateEmail(email: string) {
|
||||
if (!email || !email.includes('@')) {
|
||||
throw new ValidationError('Invalid email');
|
||||
}
|
||||
}
|
||||
|
||||
app.post('/api/users', async (req, res) => {
|
||||
validateEmail(req.body.email);
|
||||
// ...
|
||||
});
|
||||
|
||||
app.put('/api/users/:id', async (req, res) => {
|
||||
validateEmail(req.body.email);
|
||||
// ...
|
||||
});
|
||||
```
|
||||
|
||||
## Code Refactoring Techniques
|
||||
|
||||
### Extract Method
|
||||
|
||||
**Before:**
|
||||
```typescript
|
||||
function renderOrder(order: Order) {
|
||||
console.log('Order Details:');
|
||||
console.log(`ID: ${order.id}`);
|
||||
console.log(`Total: $${order.total}`);
|
||||
|
||||
console.log('Items:');
|
||||
order.items.forEach(item => {
|
||||
console.log(`- ${item.name}: $${item.price}`);
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
**After:**
|
||||
```typescript
|
||||
function renderOrder(order: Order) {
|
||||
printOrderHeader(order);
|
||||
printOrderItems(order.items);
|
||||
}
|
||||
|
||||
function printOrderHeader(order: Order) {
|
||||
console.log('Order Details:');
|
||||
console.log(`ID: ${order.id}`);
|
||||
console.log(`Total: $${order.total}`);
|
||||
}
|
||||
|
||||
function printOrderItems(items: OrderItem[]) {
|
||||
console.log('Items:');
|
||||
items.forEach(item => {
|
||||
console.log(`- ${item.name}: $${item.price}`);
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
### Replace Conditional with Polymorphism
|
||||
|
||||
**Before:**
|
||||
```typescript
|
||||
function getShippingCost(order: Order) {
|
||||
if (order.shippingMethod === 'standard') {
|
||||
return 5;
|
||||
} else if (order.shippingMethod === 'express') {
|
||||
return 15;
|
||||
} else if (order.shippingMethod === 'overnight') {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**After:**
|
||||
```typescript
|
||||
interface ShippingMethod {
|
||||
getCost(): number;
|
||||
}
|
||||
|
||||
class StandardShipping implements ShippingMethod {
|
||||
getCost() {
|
||||
return 5;
|
||||
}
|
||||
}
|
||||
|
||||
class ExpressShipping implements ShippingMethod {
|
||||
getCost() {
|
||||
return 15;
|
||||
}
|
||||
}
|
||||
|
||||
class OvernightShipping implements ShippingMethod {
|
||||
getCost() {
|
||||
return 30;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Code Quality Checklist
|
||||
|
||||
- [ ] SOLID principles applied
|
||||
- [ ] Functions are small (< 20 lines ideal)
|
||||
- [ ] Meaningful variable/function names
|
||||
- [ ] No magic numbers (use constants)
|
||||
- [ ] Proper error handling (no silent failures)
|
||||
- [ ] DRY (no code duplication)
|
||||
- [ ] Comments explain "why", not "what"
|
||||
- [ ] Design patterns used appropriately
|
||||
- [ ] Dependency injection for testability
|
||||
- [ ] Code is readable (readable > clever)
|
||||
|
||||
## Resources
|
||||
|
||||
- **Clean Code (Book):** Robert C. Martin
|
||||
- **Refactoring (Book):** Martin Fowler
|
||||
- **Design Patterns:** https://refactoring.guru/design-patterns
|
||||
- **SOLID Principles:** https://en.wikipedia.org/wiki/SOLID
|
||||
@@ -0,0 +1,904 @@
|
||||
# Backend Debugging Strategies
|
||||
|
||||
Comprehensive debugging techniques, tools, and best practices for backend systems (2025).
|
||||
|
||||
## Debugging Mindset
|
||||
|
||||
### The Scientific Method for Debugging
|
||||
|
||||
1. **Observe** - Gather symptoms and data
|
||||
2. **Hypothesize** - Form theories about the cause
|
||||
3. **Test** - Verify or disprove theories
|
||||
4. **Iterate** - Refine understanding
|
||||
5. **Fix** - Apply solution
|
||||
6. **Verify** - Confirm fix works
|
||||
|
||||
### Golden Rules
|
||||
|
||||
1. **Reproduce first** - Debugging without reproduction is guessing
|
||||
2. **Simplify the problem** - Isolate variables
|
||||
3. **Read the logs** - Error messages contain clues
|
||||
4. **Check assumptions** - "It should work" isn't debugging
|
||||
5. **Use scientific method** - Avoid random changes
|
||||
6. **Document findings** - Future you will thank you
|
||||
|
||||
## Logging Best Practices
|
||||
|
||||
### Structured Logging
|
||||
|
||||
**Node.js (Pino - Fastest)**
|
||||
```typescript
|
||||
import pino from 'pino';
|
||||
|
||||
const logger = pino({
|
||||
level: process.env.LOG_LEVEL || 'info',
|
||||
transport: {
|
||||
target: 'pino-pretty',
|
||||
options: { colorize: true }
|
||||
}
|
||||
});
|
||||
|
||||
// Structured logging with context
|
||||
logger.info({ userId: '123', action: 'login' }, 'User logged in');
|
||||
|
||||
// Error logging with stack trace
|
||||
try {
|
||||
await riskyOperation();
|
||||
} catch (error) {
|
||||
logger.error({ err: error, userId: '123' }, 'Operation failed');
|
||||
}
|
||||
```
|
||||
|
||||
**Python (Structlog)**
|
||||
```python
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Structured context
|
||||
logger.info("user_login", user_id="123", ip="192.168.1.1")
|
||||
|
||||
# Error with exception
|
||||
try:
|
||||
risky_operation()
|
||||
except Exception as e:
|
||||
logger.error("operation_failed", user_id="123", exc_info=True)
|
||||
```
|
||||
|
||||
**Go (Zap - High Performance)**
|
||||
```go
|
||||
import "go.uber.org/zap"
|
||||
|
||||
logger, _ := zap.NewProduction()
|
||||
defer logger.Sync()
|
||||
|
||||
// Structured fields
|
||||
logger.Info("user logged in",
|
||||
zap.String("user_id", "123"),
|
||||
zap.String("ip", "192.168.1.1"),
|
||||
)
|
||||
|
||||
// Error logging
|
||||
if err := riskyOperation(); err != nil {
|
||||
logger.Error("operation failed",
|
||||
zap.Error(err),
|
||||
zap.String("user_id", "123"),
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
### Log Levels
|
||||
|
||||
| Level | Purpose | Example |
|
||||
|-------|---------|---------|
|
||||
| **TRACE** | Very detailed, dev only | Request/response bodies |
|
||||
| **DEBUG** | Detailed info for debugging | SQL queries, cache hits |
|
||||
| **INFO** | General informational | User login, API calls |
|
||||
| **WARN** | Potential issues | Deprecated API usage |
|
||||
| **ERROR** | Error conditions | Failed API calls, exceptions |
|
||||
| **FATAL** | Critical failures | Database connection lost |
|
||||
|
||||
### What to Log
|
||||
|
||||
**✅ DO LOG:**
|
||||
- Request/response metadata (not bodies in prod)
|
||||
- Error messages with context
|
||||
- Performance metrics (duration, size)
|
||||
- Security events (login, permission changes)
|
||||
- Business events (orders, payments)
|
||||
|
||||
**❌ DON'T LOG:**
|
||||
- Passwords or secrets
|
||||
- Credit card numbers
|
||||
- Personal identifiable information (PII)
|
||||
- Session tokens
|
||||
- Full request bodies in production
|
||||
|
||||
## Debugging Tools by Language
|
||||
|
||||
### Node.js / TypeScript
|
||||
|
||||
**1. Chrome DevTools (Built-in)**
|
||||
```bash
|
||||
# Run with inspect flag
|
||||
node --inspect-brk app.js
|
||||
|
||||
# Open chrome://inspect in Chrome
|
||||
# Set breakpoints, step through code
|
||||
```
|
||||
|
||||
**2. VS Code Debugger**
|
||||
```json
|
||||
// .vscode/launch.json
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "node",
|
||||
"request": "launch",
|
||||
"name": "Debug Server",
|
||||
"skipFiles": ["<node_internals>/**"],
|
||||
"program": "${workspaceFolder}/src/index.ts",
|
||||
"preLaunchTask": "npm: build",
|
||||
"outFiles": ["${workspaceFolder}/dist/**/*.js"]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**3. Debug Module**
|
||||
```typescript
|
||||
import debug from 'debug';
|
||||
|
||||
const log = debug('app:server');
|
||||
const error = debug('app:error');
|
||||
|
||||
log('Starting server on port %d', 3000);
|
||||
error('Failed to connect to database');
|
||||
|
||||
// Run with: DEBUG=app:* node app.js
|
||||
```
|
||||
|
||||
### Python
|
||||
|
||||
**1. PDB (Built-in Debugger)**
|
||||
```python
|
||||
import pdb
|
||||
|
||||
def problematic_function(data):
|
||||
# Set breakpoint
|
||||
pdb.set_trace()
|
||||
|
||||
# Debugger commands:
|
||||
# l - list code
|
||||
# n - next line
|
||||
# s - step into
|
||||
# c - continue
|
||||
# p variable - print variable
|
||||
# q - quit
|
||||
result = process(data)
|
||||
return result
|
||||
```
|
||||
|
||||
**2. IPython Debugger (Better)**
|
||||
```python
|
||||
from IPython import embed
|
||||
|
||||
def problematic_function(data):
|
||||
# Drop into IPython shell
|
||||
embed()
|
||||
|
||||
result = process(data)
|
||||
return result
|
||||
```
|
||||
|
||||
**3. VS Code Debugger**
|
||||
```json
|
||||
// .vscode/launch.json
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Python: FastAPI",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"module": "uvicorn",
|
||||
"args": ["main:app", "--reload"],
|
||||
"jinja": true
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Go
|
||||
|
||||
**1. Delve (Standard Debugger)**
|
||||
```bash
|
||||
# Install
|
||||
go install github.com/go-delve/delve/cmd/dlv@latest
|
||||
|
||||
# Debug
|
||||
dlv debug main.go
|
||||
|
||||
# Commands:
|
||||
# b main.main - set breakpoint
|
||||
# c - continue
|
||||
# n - next line
|
||||
# s - step into
|
||||
# p variable - print variable
|
||||
# q - quit
|
||||
```
|
||||
|
||||
**2. VS Code Debugger**
|
||||
```json
|
||||
// .vscode/launch.json
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Launch Package",
|
||||
"type": "go",
|
||||
"request": "launch",
|
||||
"mode": "debug",
|
||||
"program": "${workspaceFolder}"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Rust
|
||||
|
||||
**1. LLDB/GDB (Native Debuggers)**
|
||||
```bash
|
||||
# Build with debug info
|
||||
cargo build
|
||||
|
||||
# Debug with LLDB
|
||||
rust-lldb ./target/debug/myapp
|
||||
|
||||
# Debug with GDB
|
||||
rust-gdb ./target/debug/myapp
|
||||
```
|
||||
|
||||
**2. VS Code Debugger (CodeLLDB)**
|
||||
```json
|
||||
// .vscode/launch.json
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"name": "Debug",
|
||||
"program": "${workspaceFolder}/target/debug/myapp",
|
||||
"args": [],
|
||||
"cwd": "${workspaceFolder}"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Database Debugging
|
||||
|
||||
### SQL Query Debugging (PostgreSQL)
|
||||
|
||||
**1. EXPLAIN ANALYZE**
|
||||
```sql
|
||||
-- Show query execution plan and actual timings
|
||||
EXPLAIN ANALYZE
|
||||
SELECT u.name, COUNT(o.id) as order_count
|
||||
FROM users u
|
||||
LEFT JOIN orders o ON u.id = o.user_id
|
||||
WHERE u.created_at > '2024-01-01'
|
||||
GROUP BY u.id, u.name
|
||||
ORDER BY order_count DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- Look for:
|
||||
-- - Seq Scan on large tables (missing indexes)
|
||||
-- - High execution time
|
||||
-- - Large row estimates
|
||||
```
|
||||
|
||||
**2. Enable Slow Query Logging**
|
||||
```sql
|
||||
-- PostgreSQL configuration
|
||||
ALTER DATABASE mydb SET log_min_duration_statement = 1000; -- Log queries >1s
|
||||
|
||||
-- Check slow queries
|
||||
SELECT query, calls, total_exec_time, mean_exec_time
|
||||
FROM pg_stat_statements
|
||||
ORDER BY mean_exec_time DESC
|
||||
LIMIT 10;
|
||||
```
|
||||
|
||||
**3. Active Query Monitoring**
|
||||
```sql
|
||||
-- See currently running queries
|
||||
SELECT pid, now() - query_start as duration, query, state
|
||||
FROM pg_stat_activity
|
||||
WHERE state = 'active'
|
||||
ORDER BY duration DESC;
|
||||
|
||||
-- Kill a long-running query
|
||||
SELECT pg_terminate_backend(pid);
|
||||
```
|
||||
|
||||
### MongoDB Debugging
|
||||
|
||||
**1. Explain Query Performance**
|
||||
```javascript
|
||||
db.users.find({ email: 'test@example.com' }).explain('executionStats')
|
||||
|
||||
// Look for:
|
||||
// - totalDocsExamined vs nReturned (should be close)
|
||||
// - COLLSCAN (collection scan - needs index)
|
||||
// - executionTimeMillis (should be low)
|
||||
```
|
||||
|
||||
**2. Profile Slow Queries**
|
||||
```javascript
|
||||
// Enable profiling for queries >100ms
|
||||
db.setProfilingLevel(1, { slowms: 100 })
|
||||
|
||||
// View slow queries
|
||||
db.system.profile.find().limit(5).sort({ ts: -1 }).pretty()
|
||||
|
||||
// Disable profiling
|
||||
db.setProfilingLevel(0)
|
||||
```
|
||||
|
||||
### Redis Debugging
|
||||
|
||||
**1. Monitor Commands**
|
||||
```bash
|
||||
# See all commands in real-time
|
||||
redis-cli MONITOR
|
||||
|
||||
# Check slow log
|
||||
redis-cli SLOWLOG GET 10
|
||||
|
||||
# Set slow log threshold (microseconds)
|
||||
redis-cli CONFIG SET slowlog-log-slower-than 10000
|
||||
```
|
||||
|
||||
**2. Memory Analysis**
|
||||
```bash
|
||||
# Memory usage by key pattern
|
||||
redis-cli --bigkeys
|
||||
|
||||
# Memory usage details
|
||||
redis-cli INFO memory
|
||||
|
||||
# Analyze specific key
|
||||
redis-cli MEMORY USAGE mykey
|
||||
```
|
||||
|
||||
## API Debugging
|
||||
|
||||
### HTTP Request Debugging
|
||||
|
||||
**1. cURL Testing**
|
||||
```bash
|
||||
# Verbose output with headers
|
||||
curl -v https://api.example.com/users
|
||||
|
||||
# Include response headers
|
||||
curl -i https://api.example.com/users
|
||||
|
||||
# POST with JSON
|
||||
curl -X POST https://api.example.com/users \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"name":"John","email":"john@example.com"}' \
|
||||
-v
|
||||
|
||||
# Save response to file
|
||||
curl https://api.example.com/users -o response.json
|
||||
```
|
||||
|
||||
**2. HTTPie (User-Friendly)**
|
||||
```bash
|
||||
# Install
|
||||
pip install httpie
|
||||
|
||||
# Simple GET
|
||||
http GET https://api.example.com/users
|
||||
|
||||
# POST with JSON
|
||||
http POST https://api.example.com/users name=John email=john@example.com
|
||||
|
||||
# Custom headers
|
||||
http GET https://api.example.com/users Authorization:"Bearer token123"
|
||||
```
|
||||
|
||||
**3. Request Logging Middleware**
|
||||
|
||||
**Express/Node.js:**
|
||||
```typescript
|
||||
import morgan from 'morgan';
|
||||
|
||||
// Development
|
||||
app.use(morgan('dev'));
|
||||
|
||||
// Production (JSON format)
|
||||
app.use(morgan('combined'));
|
||||
|
||||
// Custom format
|
||||
app.use(morgan(':method :url :status :response-time ms - :res[content-length]'));
|
||||
```
|
||||
|
||||
**FastAPI/Python:**
|
||||
```python
|
||||
from fastapi import Request
|
||||
import time
|
||||
|
||||
@app.middleware("http")
|
||||
async def log_requests(request: Request, call_next):
|
||||
start_time = time.time()
|
||||
response = await call_next(request)
|
||||
duration = time.time() - start_time
|
||||
|
||||
logger.info(
|
||||
"request_processed",
|
||||
method=request.method,
|
||||
path=request.url.path,
|
||||
status_code=response.status_code,
|
||||
duration_ms=duration * 1000
|
||||
)
|
||||
return response
|
||||
```
|
||||
|
||||
## Performance Debugging
|
||||
|
||||
### CPU Profiling
|
||||
|
||||
**Node.js (0x)**
|
||||
```bash
|
||||
# Install
|
||||
npm install -g 0x
|
||||
|
||||
# Profile application
|
||||
0x node app.js
|
||||
|
||||
# Open flamegraph in browser
|
||||
# Identify hot spots (red areas)
|
||||
```
|
||||
|
||||
**Node.js (Clinic.js)**
|
||||
```bash
|
||||
# Install
|
||||
npm install -g clinic
|
||||
|
||||
# CPU profiling
|
||||
clinic doctor -- node app.js
|
||||
|
||||
# Heap profiling
|
||||
clinic heapprofiler -- node app.js
|
||||
|
||||
# Event loop analysis
|
||||
clinic bubbleprof -- node app.js
|
||||
```
|
||||
|
||||
**Python (cProfile)**
|
||||
```python
|
||||
import cProfile
|
||||
import pstats
|
||||
|
||||
# Profile function
|
||||
profiler = cProfile.Profile()
|
||||
profiler.enable()
|
||||
|
||||
# Your code
|
||||
result = expensive_operation()
|
||||
|
||||
profiler.disable()
|
||||
stats = pstats.Stats(profiler)
|
||||
stats.sort_stats('cumulative')
|
||||
stats.print_stats(10) # Top 10 functions
|
||||
```
|
||||
|
||||
**Go (pprof)**
|
||||
```go
|
||||
import (
|
||||
"net/http"
|
||||
_ "net/http/pprof"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Enable profiling endpoint
|
||||
go func() {
|
||||
http.ListenAndServe("localhost:6060", nil)
|
||||
}()
|
||||
|
||||
// Your application
|
||||
startServer()
|
||||
}
|
||||
|
||||
// Profile CPU
|
||||
// go tool pprof http://localhost:6060/debug/pprof/profile?seconds=30
|
||||
|
||||
// Profile heap
|
||||
// go tool pprof http://localhost:6060/debug/pprof/heap
|
||||
```
|
||||
|
||||
### Memory Debugging
|
||||
|
||||
**Node.js (Heap Snapshots)**
|
||||
```typescript
|
||||
// Take heap snapshot programmatically
|
||||
import { writeHeapSnapshot } from 'v8';
|
||||
|
||||
app.get('/debug/heap', (req, res) => {
|
||||
const filename = writeHeapSnapshot();
|
||||
res.send(`Heap snapshot written to ${filename}`);
|
||||
});
|
||||
|
||||
// Analyze in Chrome DevTools
|
||||
// 1. Load heap snapshot
|
||||
// 2. Compare snapshots to find memory leaks
|
||||
// 3. Look for detached DOM nodes, large arrays
|
||||
```
|
||||
|
||||
**Python (Memory Profiler)**
|
||||
```python
|
||||
from memory_profiler import profile
|
||||
|
||||
@profile
|
||||
def memory_intensive_function():
|
||||
large_list = [i for i in range(1000000)]
|
||||
return sum(large_list)
|
||||
|
||||
# Run with: python -m memory_profiler script.py
|
||||
# Shows line-by-line memory usage
|
||||
```
|
||||
|
||||
## Production Debugging
|
||||
|
||||
### Application Performance Monitoring (APM)
|
||||
|
||||
**New Relic**
|
||||
```typescript
|
||||
// newrelic.js
|
||||
export const config = {
|
||||
app_name: ['My Backend API'],
|
||||
license_key: process.env.NEW_RELIC_LICENSE_KEY,
|
||||
logging: { level: 'info' },
|
||||
distributed_tracing: { enabled: true },
|
||||
};
|
||||
|
||||
// Import at app entry
|
||||
import 'newrelic';
|
||||
```
|
||||
|
||||
**DataDog**
|
||||
```typescript
|
||||
import tracer from 'dd-trace';
|
||||
|
||||
tracer.init({
|
||||
service: 'backend-api',
|
||||
env: process.env.NODE_ENV,
|
||||
version: '1.0.0',
|
||||
logInjection: true
|
||||
});
|
||||
```
|
||||
|
||||
**Sentry (Error Tracking)**
|
||||
```typescript
|
||||
import * as Sentry from '@sentry/node';
|
||||
|
||||
Sentry.init({
|
||||
dsn: process.env.SENTRY_DSN,
|
||||
environment: process.env.NODE_ENV,
|
||||
tracesSampleRate: 1.0,
|
||||
});
|
||||
|
||||
// Capture errors
|
||||
try {
|
||||
await riskyOperation();
|
||||
} catch (error) {
|
||||
Sentry.captureException(error, {
|
||||
user: { id: userId },
|
||||
tags: { operation: 'payment' },
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
### Distributed Tracing
|
||||
|
||||
**OpenTelemetry (Vendor-Agnostic)**
|
||||
```typescript
|
||||
import { NodeSDK } from '@opentelemetry/sdk-node';
|
||||
import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
|
||||
import { JaegerExporter } from '@opentelemetry/exporter-jaeger';
|
||||
|
||||
const sdk = new NodeSDK({
|
||||
traceExporter: new JaegerExporter({
|
||||
endpoint: 'http://localhost:14268/api/traces',
|
||||
}),
|
||||
instrumentations: [getNodeAutoInstrumentations()],
|
||||
});
|
||||
|
||||
sdk.start();
|
||||
|
||||
// Traces HTTP, database, Redis automatically
|
||||
```
|
||||
|
||||
### Log Aggregation
|
||||
|
||||
**ELK Stack (Elasticsearch, Logstash, Kibana)**
|
||||
```yaml
|
||||
# docker-compose.yml
|
||||
version: '3'
|
||||
services:
|
||||
elasticsearch:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
ports:
|
||||
- 9200:9200
|
||||
|
||||
logstash:
|
||||
image: docker.elastic.co/logstash/logstash:8.11.0
|
||||
volumes:
|
||||
- ./logstash.conf:/usr/share/logstash/pipeline/logstash.conf
|
||||
|
||||
kibana:
|
||||
image: docker.elastic.co/kibana/kibana:8.11.0
|
||||
ports:
|
||||
- 5601:5601
|
||||
```
|
||||
|
||||
**Loki + Grafana (Lightweight)**
|
||||
```yaml
|
||||
# promtail config for log shipping
|
||||
server:
|
||||
http_listen_port: 9080
|
||||
|
||||
positions:
|
||||
filename: /tmp/positions.yaml
|
||||
|
||||
clients:
|
||||
- url: http://loki:3100/loki/api/v1/push
|
||||
|
||||
scrape_configs:
|
||||
- job_name: system
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost
|
||||
labels:
|
||||
job: backend-api
|
||||
__path__: /var/log/app/*.log
|
||||
```
|
||||
|
||||
## Common Debugging Scenarios
|
||||
|
||||
### 1. High CPU Usage
|
||||
|
||||
**Steps:**
|
||||
1. Profile CPU (flamegraph)
|
||||
2. Identify hot functions
|
||||
3. Check for:
|
||||
- Infinite loops
|
||||
- Heavy regex operations
|
||||
- Inefficient algorithms (O(n²))
|
||||
- Blocking operations in event loop (Node.js)
|
||||
|
||||
**Node.js Example:**
|
||||
```typescript
|
||||
// ❌ Bad: Blocking event loop
|
||||
function fibonacci(n) {
|
||||
if (n <= 1) return n;
|
||||
return fibonacci(n - 1) + fibonacci(n - 2); // Exponential time
|
||||
}
|
||||
|
||||
// ✅ Good: Memoized or iterative
|
||||
const memo = new Map();
|
||||
function fibonacciMemo(n) {
|
||||
if (n <= 1) return n;
|
||||
if (memo.has(n)) return memo.get(n);
|
||||
const result = fibonacciMemo(n - 1) + fibonacciMemo(n - 2);
|
||||
memo.set(n, result);
|
||||
return result;
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Memory Leaks
|
||||
|
||||
**Symptoms:**
|
||||
- Memory usage grows over time
|
||||
- Eventually crashes (OOM)
|
||||
- Performance degradation
|
||||
|
||||
**Common Causes:**
|
||||
```typescript
|
||||
// ❌ Memory leak: Event listeners not removed
|
||||
class DataService {
|
||||
constructor(eventBus) {
|
||||
eventBus.on('data', (data) => this.processData(data));
|
||||
// Listener never removed, holds reference to DataService
|
||||
}
|
||||
}
|
||||
|
||||
// ✅ Fix: Remove listeners
|
||||
class DataService {
|
||||
constructor(eventBus) {
|
||||
this.eventBus = eventBus;
|
||||
this.handler = (data) => this.processData(data);
|
||||
eventBus.on('data', this.handler);
|
||||
}
|
||||
|
||||
destroy() {
|
||||
this.eventBus.off('data', this.handler);
|
||||
}
|
||||
}
|
||||
|
||||
// ❌ Memory leak: Global cache without limits
|
||||
const cache = new Map();
|
||||
function getCachedData(key) {
|
||||
if (!cache.has(key)) {
|
||||
cache.set(key, expensiveOperation(key)); // Grows forever
|
||||
}
|
||||
return cache.get(key);
|
||||
}
|
||||
|
||||
// ✅ Fix: LRU cache with size limit
|
||||
import LRU from 'lru-cache';
|
||||
const cache = new LRU({ max: 1000, ttl: 1000 * 60 * 60 });
|
||||
```
|
||||
|
||||
**Detection:**
|
||||
```bash
|
||||
# Node.js: Check heap size over time
|
||||
node --expose-gc --max-old-space-size=4096 app.js
|
||||
|
||||
# Take periodic heap snapshots
|
||||
# Compare snapshots in Chrome DevTools
|
||||
```
|
||||
|
||||
### 3. Slow Database Queries
|
||||
|
||||
**Steps:**
|
||||
1. Enable slow query log
|
||||
2. Analyze with EXPLAIN
|
||||
3. Add indexes
|
||||
4. Optimize query
|
||||
|
||||
**PostgreSQL Example:**
|
||||
```sql
|
||||
-- Before: Slow full table scan
|
||||
SELECT * FROM orders
|
||||
WHERE user_id = 123
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 10;
|
||||
|
||||
-- EXPLAIN shows: Seq Scan on orders
|
||||
|
||||
-- Fix: Add index
|
||||
CREATE INDEX idx_orders_user_id_created_at
|
||||
ON orders(user_id, created_at DESC);
|
||||
|
||||
-- After: Index Scan using idx_orders_user_id_created_at
|
||||
-- 100x faster
|
||||
```
|
||||
|
||||
### 4. Connection Pool Exhaustion
|
||||
|
||||
**Symptoms:**
|
||||
- "Connection pool exhausted" errors
|
||||
- Requests hang indefinitely
|
||||
- Database connections at max
|
||||
|
||||
**Causes & Fixes:**
|
||||
```typescript
|
||||
// ❌ Bad: Connection leak
|
||||
async function getUser(id) {
|
||||
const client = await pool.connect();
|
||||
const result = await client.query('SELECT * FROM users WHERE id = $1', [id]);
|
||||
return result.rows[0];
|
||||
// Connection never released!
|
||||
}
|
||||
|
||||
// ✅ Good: Always release
|
||||
async function getUser(id) {
|
||||
const client = await pool.connect();
|
||||
try {
|
||||
const result = await client.query('SELECT * FROM users WHERE id = $1', [id]);
|
||||
return result.rows[0];
|
||||
} finally {
|
||||
client.release(); // Always release
|
||||
}
|
||||
}
|
||||
|
||||
// ✅ Better: Use pool directly
|
||||
async function getUser(id) {
|
||||
const result = await pool.query('SELECT * FROM users WHERE id = $1', [id]);
|
||||
return result.rows[0];
|
||||
// Automatically releases
|
||||
}
|
||||
```
|
||||
|
||||
### 5. Race Conditions
|
||||
|
||||
**Example:**
|
||||
```typescript
|
||||
// ❌ Bad: Race condition
|
||||
let counter = 0;
|
||||
|
||||
async function incrementCounter() {
|
||||
const current = counter; // Thread 1 reads 0
|
||||
await doSomethingAsync(); // Thread 2 reads 0
|
||||
counter = current + 1; // Thread 1 writes 1, Thread 2 writes 1
|
||||
// Expected: 2, Actual: 1
|
||||
}
|
||||
|
||||
// ✅ Fix: Atomic operations (Redis)
|
||||
async function incrementCounter() {
|
||||
return await redis.incr('counter');
|
||||
// Atomic, thread-safe
|
||||
}
|
||||
|
||||
// ✅ Fix: Database transactions
|
||||
async function incrementCounter(userId) {
|
||||
await db.transaction(async (trx) => {
|
||||
const user = await trx('users')
|
||||
.where({ id: userId })
|
||||
.forUpdate() // Row-level lock
|
||||
.first();
|
||||
|
||||
await trx('users')
|
||||
.where({ id: userId })
|
||||
.update({ counter: user.counter + 1 });
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
## Debugging Checklist
|
||||
|
||||
**Before Diving Into Code:**
|
||||
- [ ] Read error message completely
|
||||
- [ ] Check logs for context
|
||||
- [ ] Reproduce the issue reliably
|
||||
- [ ] Isolate the problem (binary search)
|
||||
- [ ] Verify assumptions
|
||||
|
||||
**Investigation:**
|
||||
- [ ] Enable debug logging
|
||||
- [ ] Add strategic log points
|
||||
- [ ] Use debugger breakpoints
|
||||
- [ ] Profile performance if slow
|
||||
- [ ] Check database queries
|
||||
- [ ] Monitor system resources
|
||||
|
||||
**Production Issues:**
|
||||
- [ ] Check APM dashboards
|
||||
- [ ] Review distributed traces
|
||||
- [ ] Analyze error rates
|
||||
- [ ] Compare with previous baseline
|
||||
- [ ] Check for recent deployments
|
||||
- [ ] Review infrastructure changes
|
||||
|
||||
**After Fix:**
|
||||
- [ ] Verify fix in development
|
||||
- [ ] Add regression test
|
||||
- [ ] Document the issue
|
||||
- [ ] Deploy with monitoring
|
||||
- [ ] Confirm fix in production
|
||||
|
||||
## Debugging Resources
|
||||
|
||||
**Tools:**
|
||||
- Node.js: https://nodejs.org/en/docs/guides/debugging-getting-started/
|
||||
- Chrome DevTools: https://developer.chrome.com/docs/devtools/
|
||||
- Clinic.js: https://clinicjs.org/
|
||||
- Sentry: https://docs.sentry.io/
|
||||
- DataDog: https://docs.datadoghq.com/
|
||||
- New Relic: https://docs.newrelic.com/
|
||||
|
||||
**Best Practices:**
|
||||
- 12 Factor App Logs: https://12factor.net/logs
|
||||
- Google SRE Book: https://sre.google/sre-book/table-of-contents/
|
||||
- OpenTelemetry: https://opentelemetry.io/docs/
|
||||
|
||||
**Database:**
|
||||
- PostgreSQL EXPLAIN: https://www.postgresql.org/docs/current/using-explain.html
|
||||
- MongoDB Performance: https://www.mongodb.com/docs/manual/administration/analyzing-mongodb-performance/
|
||||
@@ -0,0 +1,494 @@
|
||||
# Backend DevOps Practices
|
||||
|
||||
CI/CD pipelines, containerization, deployment strategies, and monitoring (2025).
|
||||
|
||||
## Deployment Strategies
|
||||
|
||||
### Blue-Green Deployment
|
||||
|
||||
**Concept:** Two identical environments (Blue = current, Green = new)
|
||||
|
||||
```
|
||||
Production Traffic → Blue (v1.0)
|
||||
Green (v2.0) ← Deploy & Test
|
||||
|
||||
Switch:
|
||||
Production Traffic → Green (v2.0)
|
||||
Blue (v1.0) ← Instant rollback available
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Zero downtime
|
||||
- Instant rollback
|
||||
- Full environment testing before switch
|
||||
|
||||
**Cons:**
|
||||
- Requires double infrastructure
|
||||
- Database migrations complex
|
||||
|
||||
### Canary Deployment
|
||||
|
||||
**Concept:** Gradual rollout (1% → 5% → 25% → 100%)
|
||||
|
||||
```bash
|
||||
# Kubernetes canary deployment
|
||||
kubectl set image deployment/api api=myapp:v2
|
||||
kubectl rollout pause deployment/api # Pause at initial replicas
|
||||
|
||||
# Monitor metrics, then continue
|
||||
kubectl rollout resume deployment/api
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
- Risk mitigation
|
||||
- Early issue detection
|
||||
- Real user feedback
|
||||
|
||||
**Cons:**
|
||||
- Requires monitoring
|
||||
- Longer deployment time
|
||||
|
||||
### Feature Flags (Progressive Delivery)
|
||||
|
||||
**Impact:** 90% fewer deployment failures when combined with canary
|
||||
|
||||
```typescript
|
||||
import { LaunchDarkly } from 'launchdarkly-node-server-sdk';
|
||||
|
||||
const client = LaunchDarkly.init(process.env.LD_SDK_KEY);
|
||||
|
||||
// Check feature flag
|
||||
const showNewCheckout = await client.variation('new-checkout', user, false);
|
||||
|
||||
if (showNewCheckout) {
|
||||
return newCheckoutFlow(req, res);
|
||||
} else {
|
||||
return oldCheckoutFlow(req, res);
|
||||
}
|
||||
```
|
||||
|
||||
**Use Cases:**
|
||||
- Gradual feature rollout
|
||||
- A/B testing
|
||||
- Kill switch for problematic features
|
||||
- Decouple deployment from release
|
||||
|
||||
## Containerization with Docker
|
||||
|
||||
### Multi-Stage Builds (Optimize Image Size)
|
||||
|
||||
```dockerfile
|
||||
# Build stage
|
||||
FROM node:20-alpine AS builder
|
||||
WORKDIR /app
|
||||
COPY package*.json ./
|
||||
RUN npm ci --only=production
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
# Production stage
|
||||
FROM node:20-alpine
|
||||
WORKDIR /app
|
||||
|
||||
# Copy only necessary files
|
||||
COPY --from=builder /app/dist ./dist
|
||||
COPY --from=builder /app/node_modules ./node_modules
|
||||
COPY package.json ./
|
||||
|
||||
# Security: Run as non-root
|
||||
RUN addgroup -g 1001 -S nodejs && \
|
||||
adduser -S nodejs -u 1001
|
||||
USER nodejs
|
||||
|
||||
EXPOSE 3000
|
||||
CMD ["node", "dist/main.js"]
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- Smaller image size (50-90% reduction)
|
||||
- Faster deployments
|
||||
- Reduced attack surface
|
||||
|
||||
### Docker Compose (Local Development)
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
api:
|
||||
build: .
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://postgres:password@db:5432/myapp
|
||||
- REDIS_URL=redis://redis:6379
|
||||
depends_on:
|
||||
- db
|
||||
- redis
|
||||
|
||||
db:
|
||||
image: postgres:15-alpine
|
||||
environment:
|
||||
- POSTGRES_PASSWORD=password
|
||||
- POSTGRES_DB=myapp
|
||||
volumes:
|
||||
- postgres-data:/var/lib/postgresql/data
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
ports:
|
||||
- "6379:6379"
|
||||
|
||||
volumes:
|
||||
postgres-data:
|
||||
```
|
||||
|
||||
## Kubernetes Orchestration
|
||||
|
||||
### Deployment Manifest
|
||||
|
||||
```yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: api-deployment
|
||||
spec:
|
||||
replicas: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: api
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: api
|
||||
spec:
|
||||
containers:
|
||||
- name: api
|
||||
image: myregistry/api:v1.0.0
|
||||
ports:
|
||||
- containerPort: 3000
|
||||
env:
|
||||
- name: DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: db-secret
|
||||
key: url
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
cpu: "250m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 3000
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: 3000
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
```
|
||||
|
||||
### Horizontal Pod Autoscaling
|
||||
|
||||
```yaml
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: api-hpa
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: api-deployment
|
||||
minReplicas: 3
|
||||
maxReplicas: 10
|
||||
metrics:
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: 70
|
||||
```
|
||||
|
||||
## CI/CD Pipelines
|
||||
|
||||
### GitHub Actions (Modern, Integrated)
|
||||
|
||||
```yaml
|
||||
name: CI/CD Pipeline
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: 'npm'
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Run linter
|
||||
run: npm run lint
|
||||
|
||||
- name: Run tests
|
||||
run: npm run test:ci
|
||||
|
||||
- name: Upload coverage
|
||||
uses: codecov/codecov-action@v3
|
||||
|
||||
security:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Run Snyk scan
|
||||
uses: snyk/actions/node@master
|
||||
env:
|
||||
SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
|
||||
|
||||
- name: Container scan
|
||||
run: |
|
||||
docker build -t myapp:${{ github.sha }} .
|
||||
docker scan myapp:${{ github.sha }}
|
||||
|
||||
deploy:
|
||||
needs: [test, security]
|
||||
runs-on: ubuntu-latest
|
||||
if: github.ref == 'refs/heads/main'
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Build and push Docker image
|
||||
run: |
|
||||
echo ${{ secrets.GITHUB_TOKEN }} | docker login ghcr.io -u ${{ github.actor }} --password-stdin
|
||||
docker build -t ghcr.io/${{ github.repository }}:${{ github.sha }} .
|
||||
docker push ghcr.io/${{ github.repository }}:${{ github.sha }}
|
||||
|
||||
- name: Deploy to Kubernetes
|
||||
run: |
|
||||
kubectl set image deployment/api api=ghcr.io/${{ github.repository }}:${{ github.sha }}
|
||||
kubectl rollout status deployment/api
|
||||
```
|
||||
|
||||
## Monitoring & Observability
|
||||
|
||||
### Three Pillars of Observability
|
||||
|
||||
**1. Metrics (Prometheus + Grafana)**
|
||||
|
||||
```typescript
|
||||
import { Counter, Histogram, register } from 'prom-client';
|
||||
|
||||
// Request counter
|
||||
const httpRequestTotal = new Counter({
|
||||
name: 'http_requests_total',
|
||||
help: 'Total HTTP requests',
|
||||
labelNames: ['method', 'route', 'status'],
|
||||
});
|
||||
|
||||
// Response time histogram
|
||||
const httpRequestDuration = new Histogram({
|
||||
name: 'http_request_duration_seconds',
|
||||
help: 'HTTP request duration',
|
||||
labelNames: ['method', 'route'],
|
||||
buckets: [0.1, 0.5, 1, 2, 5],
|
||||
});
|
||||
|
||||
// Middleware to track metrics
|
||||
app.use((req, res, next) => {
|
||||
const start = Date.now();
|
||||
|
||||
res.on('finish', () => {
|
||||
const duration = (Date.now() - start) / 1000;
|
||||
httpRequestTotal.inc({ method: req.method, route: req.route?.path, status: res.statusCode });
|
||||
httpRequestDuration.observe({ method: req.method, route: req.route?.path }, duration);
|
||||
});
|
||||
|
||||
next();
|
||||
});
|
||||
|
||||
// Metrics endpoint
|
||||
app.get('/metrics', async (req, res) => {
|
||||
res.set('Content-Type', register.contentType);
|
||||
res.end(await register.metrics());
|
||||
});
|
||||
```
|
||||
|
||||
**2. Logs (ELK Stack - Elasticsearch, Logstash, Kibana)**
|
||||
|
||||
```typescript
|
||||
import winston from 'winston';
|
||||
import { ElasticsearchTransport } from 'winston-elasticsearch';
|
||||
|
||||
const logger = winston.createLogger({
|
||||
level: 'info',
|
||||
format: winston.format.json(),
|
||||
transports: [
|
||||
new winston.transports.Console(),
|
||||
new ElasticsearchTransport({
|
||||
level: 'info',
|
||||
clientOpts: { node: 'http://localhost:9200' },
|
||||
index: 'logs',
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
// Structured logging
|
||||
logger.info('User created', {
|
||||
userId: user.id,
|
||||
email: user.email,
|
||||
ipAddress: req.ip,
|
||||
userAgent: req.headers['user-agent'],
|
||||
});
|
||||
```
|
||||
|
||||
**3. Traces (Jaeger/OpenTelemetry)**
|
||||
|
||||
```typescript
|
||||
import { NodeSDK } from '@opentelemetry/sdk-node';
|
||||
import { JaegerExporter } from '@opentelemetry/exporter-jaeger';
|
||||
|
||||
const sdk = new NodeSDK({
|
||||
traceExporter: new JaegerExporter({
|
||||
endpoint: 'http://localhost:14268/api/traces',
|
||||
}),
|
||||
serviceName: 'api-service',
|
||||
});
|
||||
|
||||
sdk.start();
|
||||
|
||||
// Traces automatically captured for HTTP requests, database queries, etc.
|
||||
```
|
||||
|
||||
### Health Checks
|
||||
|
||||
```typescript
|
||||
// Liveness probe - Is the app running?
|
||||
app.get('/health/liveness', (req, res) => {
|
||||
res.status(200).json({ status: 'ok', timestamp: Date.now() });
|
||||
});
|
||||
|
||||
// Readiness probe - Is the app ready to serve traffic?
|
||||
app.get('/health/readiness', async (req, res) => {
|
||||
const checks = {
|
||||
database: await checkDatabase(),
|
||||
redis: await checkRedis(),
|
||||
externalAPI: await checkExternalAPI(),
|
||||
};
|
||||
|
||||
const isReady = Object.values(checks).every(Boolean);
|
||||
res.status(isReady ? 200 : 503).json({
|
||||
status: isReady ? 'ready' : 'not ready',
|
||||
checks,
|
||||
});
|
||||
});
|
||||
|
||||
async function checkDatabase() {
|
||||
try {
|
||||
await db.query('SELECT 1');
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Secrets Management
|
||||
|
||||
### HashiCorp Vault
|
||||
|
||||
```bash
|
||||
# Store secret
|
||||
vault kv put secret/myapp/db password=super-secret
|
||||
|
||||
# Retrieve secret
|
||||
vault kv get -field=password secret/myapp/db
|
||||
```
|
||||
|
||||
### Kubernetes Secrets
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: db-secret
|
||||
type: Opaque
|
||||
stringData:
|
||||
url: postgresql://user:pass@host:5432/db
|
||||
---
|
||||
# Reference in deployment
|
||||
env:
|
||||
- name: DATABASE_URL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: db-secret
|
||||
key: url
|
||||
```
|
||||
|
||||
## Infrastructure as Code (Terraform)
|
||||
|
||||
```hcl
|
||||
# main.tf
|
||||
resource "aws_db_instance" "main" {
|
||||
identifier = "myapp-db"
|
||||
engine = "postgres"
|
||||
engine_version = "15.3"
|
||||
instance_class = "db.t3.micro"
|
||||
allocated_storage = 20
|
||||
username = "admin"
|
||||
password = var.db_password
|
||||
|
||||
backup_retention_period = 7
|
||||
skip_final_snapshot = false
|
||||
}
|
||||
|
||||
resource "aws_elasticache_cluster" "redis" {
|
||||
cluster_id = "myapp-redis"
|
||||
engine = "redis"
|
||||
node_type = "cache.t3.micro"
|
||||
num_cache_nodes = 1
|
||||
parameter_group_name = "default.redis7"
|
||||
}
|
||||
```
|
||||
|
||||
## DevOps Checklist
|
||||
|
||||
- [ ] CI/CD pipeline configured (GitHub Actions/GitLab CI/Jenkins)
|
||||
- [ ] Docker multi-stage builds implemented
|
||||
- [ ] Kubernetes deployment manifests created
|
||||
- [ ] Blue-green or canary deployment strategy
|
||||
- [ ] Feature flags configured (LaunchDarkly/Unleash)
|
||||
- [ ] Health checks (liveness + readiness probes)
|
||||
- [ ] Monitoring: Prometheus + Grafana
|
||||
- [ ] Logging: ELK Stack or similar
|
||||
- [ ] Distributed tracing: Jaeger/OpenTelemetry
|
||||
- [ ] Secrets management (Vault/AWS Secrets Manager)
|
||||
- [ ] Infrastructure as Code (Terraform/CloudFormation)
|
||||
- [ ] Autoscaling configured
|
||||
- [ ] Backup and disaster recovery plan
|
||||
|
||||
## Resources
|
||||
|
||||
- **Kubernetes:** https://kubernetes.io/docs/
|
||||
- **Docker:** https://docs.docker.com/
|
||||
- **Prometheus:** https://prometheus.io/docs/
|
||||
- **OpenTelemetry:** https://opentelemetry.io/docs/
|
||||
- **Terraform:** https://www.terraform.io/docs/
|
||||
@@ -0,0 +1,387 @@
|
||||
# Backend Development Mindset
|
||||
|
||||
Problem-solving approaches, architectural thinking, and collaboration patterns for backend engineers (2025).
|
||||
|
||||
## Problem-Solving Mindset
|
||||
|
||||
### Systems Thinking Approach
|
||||
|
||||
**Holistic Engineering** - Understanding how components interact within larger ecosystem
|
||||
|
||||
```
|
||||
User Request
|
||||
→ Load Balancer
|
||||
→ API Gateway (auth, rate limiting)
|
||||
→ Application (business logic)
|
||||
→ Cache Layer (Redis)
|
||||
→ Database (persistent storage)
|
||||
→ Message Queue (async processing)
|
||||
→ External Services
|
||||
```
|
||||
|
||||
**Questions to Ask:**
|
||||
- What happens if this component fails?
|
||||
- How does this scale under load?
|
||||
- What are the dependencies?
|
||||
- Where are the bottlenecks?
|
||||
- What's the blast radius of changes?
|
||||
|
||||
### Breaking Down Complex Problems
|
||||
|
||||
**Decomposition Strategy:**
|
||||
|
||||
1. **Understand requirements** - What problem are we solving?
|
||||
2. **Identify constraints** - Performance, budget, timeline, tech stack
|
||||
3. **Break into modules** - Separate concerns (auth, data, business logic)
|
||||
4. **Define interfaces** - API contracts between modules
|
||||
5. **Prioritize** - Critical path first
|
||||
6. **Iterate** - Build, test, refine
|
||||
|
||||
**Example: Building Payment System**
|
||||
|
||||
```
|
||||
Complex: "Build payment processing"
|
||||
|
||||
Decomposed:
|
||||
1. Payment gateway integration (Stripe/PayPal)
|
||||
2. Order creation and validation
|
||||
3. Payment intent creation
|
||||
4. Webhook handling (success/failure)
|
||||
5. Idempotency (prevent double charges)
|
||||
6. Retry logic for transient failures
|
||||
7. Audit logging
|
||||
8. Refund processing
|
||||
9. Reconciliation system
|
||||
```
|
||||
|
||||
## Trade-Off Analysis
|
||||
|
||||
### CAP Theorem (Choose 2 of 3)
|
||||
|
||||
**Consistency** - All nodes see same data at same time
|
||||
**Availability** - Every request receives response
|
||||
**Partition Tolerance** - System works despite network failures
|
||||
|
||||
**Real-World Choices:**
|
||||
- **CP (Consistency + Partition Tolerance):** Banking systems, financial transactions
|
||||
- **AP (Availability + Partition Tolerance):** Social media feeds, product catalogs
|
||||
- **CA (Consistency + Availability):** Single-node databases (not distributed)
|
||||
|
||||
### PACELC Extension
|
||||
|
||||
**If Partition:** Choose Availability or Consistency
|
||||
**Else (no partition):** Choose Latency or Consistency
|
||||
|
||||
**Examples:**
|
||||
- **PA/EL:** Cassandra (available during partition, low latency normally)
|
||||
- **PC/EC:** HBase (consistent during partition, consistent over latency)
|
||||
- **PA/EC:** DynamoDB (configurable consistency vs latency)
|
||||
|
||||
### Performance vs Maintainability
|
||||
|
||||
| Optimize For | When to Choose |
|
||||
|--------------|---------------|
|
||||
| **Performance** | Hot paths, high-traffic endpoints, real-time systems |
|
||||
| **Maintainability** | Internal tools, admin dashboards, CRUD operations |
|
||||
| **Both** | Core business logic, payment processing, authentication |
|
||||
|
||||
**Example:**
|
||||
```typescript
|
||||
// Maintainable: Readable, easy to debug
|
||||
const users = await db.users.findAll({
|
||||
where: { active: true },
|
||||
include: ['posts', 'comments'],
|
||||
});
|
||||
|
||||
// Performant: Optimized query, reduced joins
|
||||
const users = await db.query(`
|
||||
SELECT u.*,
|
||||
(SELECT COUNT(*) FROM posts WHERE user_id = u.id) as post_count,
|
||||
(SELECT COUNT(*) FROM comments WHERE user_id = u.id) as comment_count
|
||||
FROM users u
|
||||
WHERE u.active = true
|
||||
`);
|
||||
```
|
||||
|
||||
### Technical Debt Management
|
||||
|
||||
**20-40% productivity increase** from addressing technical debt properly
|
||||
|
||||
**Debt Quadrants:**
|
||||
1. **Reckless + Deliberate:** "We don't have time for design"
|
||||
2. **Reckless + Inadvertent:** "What's layering?"
|
||||
3. **Prudent + Deliberate:** "Ship now, refactor later" (acceptable)
|
||||
4. **Prudent + Inadvertent:** "Now we know better" (acceptable)
|
||||
|
||||
**Prioritization:**
|
||||
- High interest, high impact → Fix immediately
|
||||
- High interest, low impact → Schedule in sprint
|
||||
- Low interest, high impact → Tech debt backlog
|
||||
- Low interest, low impact → Leave as-is
|
||||
|
||||
## Architectural Thinking
|
||||
|
||||
### Domain-Driven Design (DDD)
|
||||
|
||||
**Bounded Contexts** - Separate models for different domains
|
||||
|
||||
```
|
||||
E-commerce System:
|
||||
|
||||
[Sales Context] [Inventory Context] [Shipping Context]
|
||||
- Order (id, items, - Product (id, stock, - Shipment (id,
|
||||
total, customer) location, reserved) address, status)
|
||||
- Customer (id, email) - Warehouse (id, name) - Carrier (name, API)
|
||||
- Payment (status) - StockLevel (quantity) - Tracking (number)
|
||||
|
||||
Each context has its own:
|
||||
- Data model
|
||||
- Business rules
|
||||
- Database schema
|
||||
- API contracts
|
||||
```
|
||||
|
||||
**Ubiquitous Language** - Shared vocabulary between devs and domain experts
|
||||
|
||||
### Layered Architecture (Separation of Concerns)
|
||||
|
||||
```
|
||||
┌─────────────────────────────┐
|
||||
│ Presentation Layer │ Controllers, Routes, DTOs
|
||||
│ (API endpoints) │
|
||||
├─────────────────────────────┤
|
||||
│ Business Logic Layer │ Services, Use Cases, Domain Logic
|
||||
│ (Core logic) │
|
||||
├─────────────────────────────┤
|
||||
│ Data Access Layer │ Repositories, ORMs, Database
|
||||
│ (Persistence) │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- Clear responsibilities
|
||||
- Easier testing (mock layers)
|
||||
- Flexibility to change implementations
|
||||
- Reduced coupling
|
||||
|
||||
### Designing for Failure (Resilience)
|
||||
|
||||
**Assume everything fails eventually**
|
||||
|
||||
**Patterns:**
|
||||
1. **Circuit Breaker** - Stop calling failing service
|
||||
2. **Retry with Backoff** - Exponential delay between retries
|
||||
3. **Timeout** - Don't wait forever
|
||||
4. **Fallback** - Graceful degradation
|
||||
5. **Bulkhead** - Isolate failures (resource pools)
|
||||
|
||||
```typescript
|
||||
import { CircuitBreaker } from 'opossum';
|
||||
|
||||
const breaker = new CircuitBreaker(externalAPICall, {
|
||||
timeout: 3000, // 3s timeout
|
||||
errorThresholdPercentage: 50, // Open after 50% failures
|
||||
resetTimeout: 30000, // Try again after 30s
|
||||
});
|
||||
|
||||
breaker.fallback(() => ({ data: 'cached-response' }));
|
||||
|
||||
const result = await breaker.fire(requestParams);
|
||||
```
|
||||
|
||||
## Developer Mindset
|
||||
|
||||
### Writing Maintainable Code
|
||||
|
||||
**SOLID Principles:**
|
||||
|
||||
**S - Single Responsibility** - Class/function does one thing
|
||||
```typescript
|
||||
// Bad: User class handles auth + email + logging
|
||||
class User {
|
||||
authenticate() {}
|
||||
sendEmail() {}
|
||||
logActivity() {}
|
||||
}
|
||||
|
||||
// Good: Separate responsibilities
|
||||
class User {
|
||||
authenticate() {}
|
||||
}
|
||||
class EmailService {
|
||||
sendEmail() {}
|
||||
}
|
||||
class Logger {
|
||||
logActivity() {}
|
||||
}
|
||||
```
|
||||
|
||||
**O - Open/Closed** - Open for extension, closed for modification
|
||||
```typescript
|
||||
// Good: Strategy pattern
|
||||
interface PaymentStrategy {
|
||||
process(amount: number): Promise<PaymentResult>;
|
||||
}
|
||||
|
||||
class StripePayment implements PaymentStrategy {
|
||||
async process(amount: number) { /* ... */ }
|
||||
}
|
||||
|
||||
class PayPalPayment implements PaymentStrategy {
|
||||
async process(amount: number) { /* ... */ }
|
||||
}
|
||||
```
|
||||
|
||||
### Thinking About Edge Cases
|
||||
|
||||
**Common Edge Cases:**
|
||||
- Empty arrays/collections
|
||||
- Null/undefined values
|
||||
- Boundary values (min/max integers)
|
||||
- Concurrent requests (race conditions)
|
||||
- Network failures
|
||||
- Duplicate requests (idempotency)
|
||||
- Invalid input (SQL injection, XSS)
|
||||
|
||||
```typescript
|
||||
// Good: Handle edge cases explicitly
|
||||
async function getUsers(limit?: number) {
|
||||
// Validate input
|
||||
if (limit !== undefined && (limit < 1 || limit > 1000)) {
|
||||
throw new Error('Limit must be between 1 and 1000');
|
||||
}
|
||||
|
||||
// Handle undefined
|
||||
const safeLimit = limit ?? 50;
|
||||
|
||||
// Prevent SQL injection with parameterized query
|
||||
const users = await db.query('SELECT * FROM users LIMIT $1', [safeLimit]);
|
||||
|
||||
// Handle empty results
|
||||
return users.length > 0 ? users : [];
|
||||
}
|
||||
```
|
||||
|
||||
### Testing Mindset (TDD/BDD)
|
||||
|
||||
**70% happy-path tests drafted by AI, humans focus on edge cases**
|
||||
|
||||
**Test-Driven Development (TDD):**
|
||||
```
|
||||
1. Write failing test
|
||||
2. Write minimal code to pass
|
||||
3. Refactor
|
||||
4. Repeat
|
||||
```
|
||||
|
||||
**Behavior-Driven Development (BDD):**
|
||||
```gherkin
|
||||
Feature: User Registration
|
||||
Scenario: User registers with valid email
|
||||
Given I am on the registration page
|
||||
When I enter "test@example.com" as email
|
||||
And I enter "SecurePass123!" as password
|
||||
Then I should see "Registration successful"
|
||||
And I should receive a welcome email
|
||||
```
|
||||
|
||||
### Observability and Debugging Approach
|
||||
|
||||
**100% median ROI, $500k average return** from observability investments
|
||||
|
||||
**Three Questions:**
|
||||
1. **Is it slow?** → Check metrics (response time, DB queries)
|
||||
2. **Is it broken?** → Check logs (errors, stack traces)
|
||||
3. **Where is it broken?** → Check traces (distributed systems)
|
||||
|
||||
```typescript
|
||||
// Good: Structured logging with context
|
||||
logger.error('Payment processing failed', {
|
||||
orderId: order.id,
|
||||
userId: user.id,
|
||||
amount: order.total,
|
||||
error: error.message,
|
||||
stack: error.stack,
|
||||
timestamp: Date.now(),
|
||||
ipAddress: req.ip,
|
||||
});
|
||||
```
|
||||
|
||||
## Collaboration & Communication
|
||||
|
||||
### API Contract Design (Treating APIs as Products)
|
||||
|
||||
**Principles:**
|
||||
1. **Versioning** - `/api/v1/users`, `/api/v2/users`
|
||||
2. **Consistency** - Same patterns across endpoints
|
||||
3. **Documentation** - OpenAPI/Swagger
|
||||
4. **Backward compatibility** - Don't break existing clients
|
||||
5. **Clear error messages** - Help clients fix issues
|
||||
|
||||
```typescript
|
||||
// Good: Consistent API design
|
||||
GET /api/v1/users # List users
|
||||
GET /api/v1/users/:id # Get user
|
||||
POST /api/v1/users # Create user
|
||||
PUT /api/v1/users/:id # Update user
|
||||
DELETE /api/v1/users/:id # Delete user
|
||||
|
||||
// Consistent error format
|
||||
{
|
||||
"error": {
|
||||
"code": "VALIDATION_ERROR",
|
||||
"message": "Invalid email format",
|
||||
"field": "email",
|
||||
"timestamp": "2025-01-09T12:00:00Z"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Database Schema Design Discussions
|
||||
|
||||
**Key Considerations:**
|
||||
- **Normalization vs Denormalization** - Trade-offs for performance
|
||||
- **Indexing strategy** - Query patterns dictate indexes
|
||||
- **Migration path** - How to evolve schema without downtime
|
||||
- **Data types** - VARCHAR(255) vs TEXT, INT vs BIGINT
|
||||
- **Constraints** - Foreign keys, unique constraints, check constraints
|
||||
|
||||
### Code Review Mindset (Prevention-First)
|
||||
|
||||
**What to Look For:**
|
||||
- Security vulnerabilities (SQL injection, XSS)
|
||||
- Performance issues (N+1 queries, missing indexes)
|
||||
- Error handling (uncaught exceptions)
|
||||
- Edge cases (null checks, boundary values)
|
||||
- Readability (naming, comments for complex logic)
|
||||
- Tests (coverage for new code)
|
||||
|
||||
**Constructive Feedback:**
|
||||
```
|
||||
# Good review comment
|
||||
"This could be vulnerable to SQL injection. Consider using parameterized queries:
|
||||
`db.query('SELECT * FROM users WHERE id = $1', [userId])`"
|
||||
|
||||
# Bad review comment
|
||||
"This is wrong. Fix it."
|
||||
```
|
||||
|
||||
## Mindset Checklist
|
||||
|
||||
- [ ] Think in systems (understand dependencies)
|
||||
- [ ] Analyze trade-offs (CAP, performance vs maintainability)
|
||||
- [ ] Design for failure (circuit breakers, retries)
|
||||
- [ ] Apply SOLID principles
|
||||
- [ ] Consider edge cases (null, empty, boundaries)
|
||||
- [ ] Write tests first (TDD/BDD)
|
||||
- [ ] Log with context (structured logging)
|
||||
- [ ] Design APIs as products (versioning, docs)
|
||||
- [ ] Plan database schema evolution
|
||||
- [ ] Give constructive code reviews
|
||||
|
||||
## Resources
|
||||
|
||||
- **Domain-Driven Design:** https://martinfowler.com/bliki/DomainDrivenDesign.html
|
||||
- **CAP Theorem:** https://en.wikipedia.org/wiki/CAP_theorem
|
||||
- **SOLID Principles:** https://en.wikipedia.org/wiki/SOLID
|
||||
- **Resilience Patterns:** https://docs.microsoft.com/en-us/azure/architecture/patterns/
|
||||
@@ -0,0 +1,397 @@
|
||||
# Backend Performance & Scalability
|
||||
|
||||
Performance optimization strategies, caching patterns, and scalability best practices (2025).
|
||||
|
||||
## Database Performance
|
||||
|
||||
### Query Optimization
|
||||
|
||||
#### Indexing Strategies
|
||||
|
||||
**Impact:** 30% disk I/O reduction, 10-100x query speedup
|
||||
|
||||
```sql
|
||||
-- Create index on frequently queried columns
|
||||
CREATE INDEX idx_users_email ON users(email);
|
||||
CREATE INDEX idx_orders_user_id ON orders(user_id);
|
||||
|
||||
-- Composite index for multi-column queries
|
||||
CREATE INDEX idx_orders_user_date ON orders(user_id, created_at DESC);
|
||||
|
||||
-- Partial index for filtered queries
|
||||
CREATE INDEX idx_active_users ON users(email) WHERE active = true;
|
||||
|
||||
-- Analyze query performance
|
||||
EXPLAIN ANALYZE SELECT * FROM orders
|
||||
WHERE user_id = 123 AND created_at > '2025-01-01';
|
||||
```
|
||||
|
||||
**Index Types:**
|
||||
- **B-tree** - Default, general-purpose (equality, range queries)
|
||||
- **Hash** - Fast equality lookups, no range queries
|
||||
- **GIN** - Full-text search, JSONB queries
|
||||
- **GiST** - Geospatial queries, range types
|
||||
|
||||
**When NOT to Index:**
|
||||
- Small tables (<1000 rows)
|
||||
- Frequently updated columns
|
||||
- Low-cardinality columns (e.g., boolean with 2 values)
|
||||
|
||||
### Connection Pooling
|
||||
|
||||
**Impact:** 5-10x performance improvement
|
||||
|
||||
```typescript
|
||||
// PostgreSQL with pg-pool
|
||||
import { Pool } from 'pg';
|
||||
|
||||
const pool = new Pool({
|
||||
host: process.env.DB_HOST,
|
||||
database: process.env.DB_NAME,
|
||||
user: process.env.DB_USER,
|
||||
password: process.env.DB_PASSWORD,
|
||||
max: 20, // Maximum connections
|
||||
min: 5, // Minimum connections
|
||||
idleTimeoutMillis: 30000, // Close idle connections after 30s
|
||||
connectionTimeoutMillis: 2000, // Error if can't connect in 2s
|
||||
});
|
||||
|
||||
// Use pool for queries
|
||||
const result = await pool.query('SELECT * FROM users WHERE id = $1', [userId]);
|
||||
```
|
||||
|
||||
**Recommended Pool Sizes:**
|
||||
- **Web servers:** `connections = (core_count * 2) + effective_spindle_count`
|
||||
- **Typical:** 20-30 connections per app instance
|
||||
- **Monitor:** Connection saturation in production
|
||||
|
||||
### N+1 Query Problem
|
||||
|
||||
**Bad: N+1 queries**
|
||||
```typescript
|
||||
// Fetches 1 query for posts, then N queries for authors
|
||||
const posts = await Post.findAll();
|
||||
for (const post of posts) {
|
||||
post.author = await User.findById(post.authorId); // N queries!
|
||||
}
|
||||
```
|
||||
|
||||
**Good: Join or eager loading**
|
||||
```typescript
|
||||
// Single query with JOIN
|
||||
const posts = await Post.findAll({
|
||||
include: [{ model: User, as: 'author' }],
|
||||
});
|
||||
```
|
||||
|
||||
## Caching Strategies
|
||||
|
||||
### Redis Caching
|
||||
|
||||
**Impact:** 90% DB load reduction, 10-100x faster response
|
||||
|
||||
#### Cache-Aside Pattern (Lazy Loading)
|
||||
|
||||
```typescript
|
||||
async function getUser(userId: string) {
|
||||
// Try cache first
|
||||
const cached = await redis.get(`user:${userId}`);
|
||||
if (cached) return JSON.parse(cached);
|
||||
|
||||
// Cache miss - fetch from DB
|
||||
const user = await db.users.findById(userId);
|
||||
|
||||
// Store in cache (TTL: 1 hour)
|
||||
await redis.setex(`user:${userId}`, 3600, JSON.stringify(user));
|
||||
|
||||
return user;
|
||||
}
|
||||
```
|
||||
|
||||
#### Write-Through Pattern
|
||||
|
||||
```typescript
|
||||
async function updateUser(userId: string, data: UpdateUserDto) {
|
||||
// Update database
|
||||
const user = await db.users.update(userId, data);
|
||||
|
||||
// Update cache immediately
|
||||
await redis.setex(`user:${userId}`, 3600, JSON.stringify(user));
|
||||
|
||||
return user;
|
||||
}
|
||||
```
|
||||
|
||||
#### Cache Invalidation
|
||||
|
||||
```typescript
|
||||
// Invalidate on update
|
||||
async function deleteUser(userId: string) {
|
||||
await db.users.delete(userId);
|
||||
await redis.del(`user:${userId}`);
|
||||
await redis.del(`user:${userId}:posts`); // Invalidate related caches
|
||||
}
|
||||
|
||||
// Pattern-based invalidation
|
||||
await redis.keys('user:*').then(keys => redis.del(...keys));
|
||||
```
|
||||
|
||||
### Cache Layers
|
||||
|
||||
```
|
||||
Client
|
||||
→ CDN Cache (static assets, 50%+ latency reduction)
|
||||
→ API Gateway Cache (public endpoints)
|
||||
→ Application Cache (Redis)
|
||||
→ Database Query Cache
|
||||
→ Database
|
||||
```
|
||||
|
||||
### Cache Best Practices
|
||||
|
||||
1. **Cache frequently accessed data** - User profiles, config, product catalogs
|
||||
2. **Set appropriate TTL** - Balance freshness vs performance
|
||||
3. **Invalidate on write** - Keep cache consistent
|
||||
4. **Use cache keys wisely** - `resource:id:attribute` pattern
|
||||
5. **Monitor hit rates** - Target >80% hit rate
|
||||
|
||||
## Load Balancing
|
||||
|
||||
### Algorithms
|
||||
|
||||
**Round Robin** - Distribute evenly across servers
|
||||
```nginx
|
||||
upstream backend {
|
||||
server backend1.example.com;
|
||||
server backend2.example.com;
|
||||
server backend3.example.com;
|
||||
}
|
||||
```
|
||||
|
||||
**Least Connections** - Route to server with fewest connections
|
||||
```nginx
|
||||
upstream backend {
|
||||
least_conn;
|
||||
server backend1.example.com;
|
||||
server backend2.example.com;
|
||||
}
|
||||
```
|
||||
|
||||
**IP Hash** - Same client → same server (session affinity)
|
||||
```nginx
|
||||
upstream backend {
|
||||
ip_hash;
|
||||
server backend1.example.com;
|
||||
server backend2.example.com;
|
||||
}
|
||||
```
|
||||
|
||||
### Health Checks
|
||||
|
||||
```typescript
|
||||
// Express health check endpoint
|
||||
app.get('/health', async (req, res) => {
|
||||
const checks = {
|
||||
uptime: process.uptime(),
|
||||
timestamp: Date.now(),
|
||||
database: await checkDatabase(),
|
||||
redis: await checkRedis(),
|
||||
memory: process.memoryUsage(),
|
||||
};
|
||||
|
||||
const isHealthy = checks.database && checks.redis;
|
||||
res.status(isHealthy ? 200 : 503).json(checks);
|
||||
});
|
||||
```
|
||||
|
||||
## Asynchronous Processing
|
||||
|
||||
### Message Queues for Long-Running Tasks
|
||||
|
||||
```typescript
|
||||
// Producer - Add job to queue
|
||||
import Queue from 'bull';
|
||||
|
||||
const emailQueue = new Queue('email', {
|
||||
redis: { host: 'localhost', port: 6379 },
|
||||
});
|
||||
|
||||
await emailQueue.add('send-welcome', {
|
||||
userId: user.id,
|
||||
email: user.email,
|
||||
});
|
||||
|
||||
// Consumer - Process jobs
|
||||
emailQueue.process('send-welcome', async (job) => {
|
||||
await sendWelcomeEmail(job.data.email);
|
||||
});
|
||||
```
|
||||
|
||||
**Use Cases:**
|
||||
- Email sending
|
||||
- Image/video processing
|
||||
- Report generation
|
||||
- Data export
|
||||
- Webhook delivery
|
||||
|
||||
## CDN (Content Delivery Network)
|
||||
|
||||
**Impact:** 50%+ latency reduction for global users
|
||||
|
||||
### Configuration
|
||||
|
||||
```typescript
|
||||
// Cache-Control headers
|
||||
res.setHeader('Cache-Control', 'public, max-age=31536000, immutable'); // Static assets
|
||||
res.setHeader('Cache-Control', 'public, max-age=3600'); // API responses
|
||||
res.setHeader('Cache-Control', 'private, no-cache'); // User-specific data
|
||||
```
|
||||
|
||||
**CDN Providers:**
|
||||
- Cloudflare (generous free tier, global coverage)
|
||||
- AWS CloudFront (AWS integration)
|
||||
- Fastly (real-time purging)
|
||||
|
||||
## Horizontal vs Vertical Scaling
|
||||
|
||||
### Horizontal Scaling (Scale Out)
|
||||
|
||||
**Pros:**
|
||||
- Better fault tolerance
|
||||
- Unlimited scaling potential
|
||||
- Cost-effective (commodity hardware)
|
||||
|
||||
**Cons:**
|
||||
- Complex architecture
|
||||
- Data consistency challenges
|
||||
- Network overhead
|
||||
|
||||
**When to use:** High traffic, need redundancy, stateless applications
|
||||
|
||||
### Vertical Scaling (Scale Up)
|
||||
|
||||
**Pros:**
|
||||
- Simple architecture
|
||||
- No code changes needed
|
||||
- Easier data consistency
|
||||
|
||||
**Cons:**
|
||||
- Hardware limits
|
||||
- Single point of failure
|
||||
- Expensive at high end
|
||||
|
||||
**When to use:** Monolithic apps, rapid scaling needed, data consistency critical
|
||||
|
||||
## Database Scaling Patterns
|
||||
|
||||
### Read Replicas
|
||||
|
||||
```
|
||||
Primary (Write) → Replica 1 (Read)
|
||||
→ Replica 2 (Read)
|
||||
→ Replica 3 (Read)
|
||||
```
|
||||
|
||||
**Implementation:**
|
||||
```typescript
|
||||
// Write to primary
|
||||
await primaryDb.users.create(userData);
|
||||
|
||||
// Read from replica
|
||||
const users = await replicaDb.users.findAll();
|
||||
```
|
||||
|
||||
**Use Cases:**
|
||||
- Read-heavy workloads (90%+ reads)
|
||||
- Analytics queries
|
||||
- Reporting dashboards
|
||||
|
||||
### Database Sharding
|
||||
|
||||
**Horizontal Partitioning** - Split data across databases
|
||||
|
||||
```typescript
|
||||
// Shard by user ID
|
||||
function getShardId(userId: string): number {
|
||||
return hashCode(userId) % SHARD_COUNT;
|
||||
}
|
||||
|
||||
const shardId = getShardId(userId);
|
||||
const db = shards[shardId];
|
||||
const user = await db.users.findById(userId);
|
||||
```
|
||||
|
||||
**Sharding Strategies:**
|
||||
- **Range-based:** Users 1-1M → Shard 1, 1M-2M → Shard 2
|
||||
- **Hash-based:** Hash(userId) % shard_count
|
||||
- **Geographic:** EU users → EU shard, US users → US shard
|
||||
- **Entity-based:** Users → Shard 1, Orders → Shard 2
|
||||
|
||||
## Performance Monitoring
|
||||
|
||||
### Key Metrics
|
||||
|
||||
**Application:**
|
||||
- Response time (p50, p95, p99)
|
||||
- Throughput (requests/second)
|
||||
- Error rate
|
||||
- CPU/memory usage
|
||||
|
||||
**Database:**
|
||||
- Query execution time
|
||||
- Connection pool saturation
|
||||
- Cache hit rate
|
||||
- Slow query log
|
||||
|
||||
**Tools:**
|
||||
- Prometheus + Grafana (metrics)
|
||||
- New Relic / Datadog (APM)
|
||||
- Sentry (error tracking)
|
||||
- OpenTelemetry (distributed tracing)
|
||||
|
||||
## Performance Optimization Checklist
|
||||
|
||||
### Database
|
||||
- [ ] Indexes on frequently queried columns
|
||||
- [ ] Connection pooling configured
|
||||
- [ ] N+1 queries eliminated
|
||||
- [ ] Slow query log monitored
|
||||
- [ ] Query execution plans analyzed
|
||||
|
||||
### Caching
|
||||
- [ ] Redis cache for hot data
|
||||
- [ ] Cache TTL configured appropriately
|
||||
- [ ] Cache invalidation on writes
|
||||
- [ ] CDN for static assets
|
||||
- [ ] >80% cache hit rate achieved
|
||||
|
||||
### Application
|
||||
- [ ] Async processing for long tasks
|
||||
- [ ] Response compression enabled (gzip)
|
||||
- [ ] Load balancing configured
|
||||
- [ ] Health checks implemented
|
||||
- [ ] Resource limits set (CPU, memory)
|
||||
|
||||
### Monitoring
|
||||
- [ ] APM tool configured (New Relic/Datadog)
|
||||
- [ ] Error tracking (Sentry)
|
||||
- [ ] Performance dashboards (Grafana)
|
||||
- [ ] Alerting on key metrics
|
||||
- [ ] Distributed tracing for microservices
|
||||
|
||||
## Common Performance Pitfalls
|
||||
|
||||
1. **No caching** - Repeatedly querying same data
|
||||
2. **Missing indexes** - Full table scans
|
||||
3. **N+1 queries** - Fetching related data in loops
|
||||
4. **Synchronous processing** - Blocking on long tasks
|
||||
5. **No connection pooling** - Creating new connections per request
|
||||
6. **Unbounded queries** - No LIMIT on large tables
|
||||
7. **No CDN** - Serving static assets from origin
|
||||
|
||||
## Resources
|
||||
|
||||
- **PostgreSQL Performance:** https://www.postgresql.org/docs/current/performance-tips.html
|
||||
- **Redis Best Practices:** https://redis.io/docs/management/optimization/
|
||||
- **Web Performance:** https://web.dev/performance/
|
||||
- **Database Indexing:** https://use-the-index-luke.com/
|
||||
@@ -0,0 +1,290 @@
|
||||
# Backend Security
|
||||
|
||||
Security best practices, OWASP Top 10 mitigation, and modern security standards (2025).
|
||||
|
||||
## OWASP Top 10 (2025 RC1)
|
||||
|
||||
### New Entries (2025)
|
||||
- **Supply Chain Failures** - Vulnerable dependencies, compromised packages
|
||||
- **Mishandling of Exceptional Conditions** - Improper error handling exposing system info
|
||||
|
||||
### Top Vulnerabilities & Mitigation
|
||||
|
||||
#### 1. Broken Access Control
|
||||
**Risk:** Users access unauthorized resources (28% of vulnerabilities)
|
||||
|
||||
**Mitigation:**
|
||||
- Implement RBAC (Role-Based Access Control)
|
||||
- Deny by default, explicitly allow
|
||||
- Log access control failures
|
||||
- Enforce authorization on backend (never client-side)
|
||||
- Use JWT with proper claims validation
|
||||
|
||||
```typescript
|
||||
// Good: Server-side authorization check
|
||||
@UseGuards(JwtAuthGuard, RolesGuard)
|
||||
@Roles('admin')
|
||||
async deleteUser(@Param('id') id: string) {
|
||||
// Verify user can access this resource
|
||||
return this.usersService.delete(id);
|
||||
}
|
||||
```
|
||||
|
||||
#### 2. Cryptographic Failures
|
||||
**Risk:** Sensitive data exposure, weak encryption
|
||||
|
||||
**Mitigation:**
|
||||
- Use Argon2id for password hashing (replaces bcrypt as of 2025)
|
||||
- TLS 1.3 for data in transit
|
||||
- Encrypt sensitive data at rest (AES-256)
|
||||
- Use crypto.randomBytes() for tokens, not Math.random()
|
||||
- Never store passwords in plain text
|
||||
|
||||
```python
|
||||
# Good: Argon2id password hashing
|
||||
from argon2 import PasswordHasher
|
||||
|
||||
ph = PasswordHasher()
|
||||
hash = ph.hash("password123") # Auto-salted, memory-hard
|
||||
ph.verify(hash, "password123") # Verify password
|
||||
```
|
||||
|
||||
#### 3. Injection Attacks
|
||||
**Risk:** SQL injection, NoSQL injection, command injection (6x increase 2020-2024)
|
||||
|
||||
**Mitigation (98% vulnerability reduction):**
|
||||
- Use parameterized queries ALWAYS
|
||||
- Input validation with allow-lists
|
||||
- Escape special characters
|
||||
- Use ORMs properly (avoid raw queries)
|
||||
|
||||
```typescript
|
||||
// Bad: Vulnerable to SQL injection
|
||||
const query = `SELECT * FROM users WHERE email = '${email}'`;
|
||||
|
||||
// Good: Parameterized query
|
||||
const query = 'SELECT * FROM users WHERE email = $1';
|
||||
const result = await db.query(query, [email]);
|
||||
```
|
||||
|
||||
#### 4. Insecure Design
|
||||
**Risk:** Flawed architecture, missing security controls
|
||||
|
||||
**Mitigation:**
|
||||
- Threat modeling during design phase
|
||||
- Security requirements from start
|
||||
- Principle of least privilege
|
||||
- Defense in depth (multiple security layers)
|
||||
|
||||
#### 5. Security Misconfiguration
|
||||
**Risk:** Default credentials, verbose errors, unnecessary features enabled
|
||||
|
||||
**Mitigation:**
|
||||
- Remove default accounts
|
||||
- Disable directory listing
|
||||
- Use security headers (CSP, HSTS, X-Frame-Options)
|
||||
- Minimize attack surface
|
||||
- Regular security audits
|
||||
|
||||
```typescript
|
||||
// Security headers middleware
|
||||
app.use(helmet({
|
||||
contentSecurityPolicy: {
|
||||
directives: {
|
||||
defaultSrc: ["'self'"],
|
||||
scriptSrc: ["'self'", "'unsafe-inline'"],
|
||||
},
|
||||
},
|
||||
hsts: {
|
||||
maxAge: 31536000,
|
||||
includeSubDomains: true,
|
||||
},
|
||||
}));
|
||||
```
|
||||
|
||||
#### 6. Vulnerable Components
|
||||
**Risk:** Outdated dependencies with known vulnerabilities
|
||||
|
||||
**Mitigation:**
|
||||
- Regular dependency updates (npm audit, pip-audit)
|
||||
- Use Dependabot/Renovate for automated updates
|
||||
- Monitor CVE databases
|
||||
- Software composition analysis (SCA) in CI/CD
|
||||
- Lock file integrity checks
|
||||
|
||||
```bash
|
||||
# Check for vulnerabilities
|
||||
npm audit fix
|
||||
pip-audit --fix
|
||||
```
|
||||
|
||||
#### 7. Authentication Failures
|
||||
**Risk:** Weak passwords, session hijacking, credential stuffing
|
||||
|
||||
**Mitigation:**
|
||||
- MFA mandatory for admin accounts
|
||||
- Rate limiting on login endpoints (10 attempts/minute)
|
||||
- Strong password policies (12+ chars, complexity)
|
||||
- Session timeout (15 mins idle, 8 hours absolute)
|
||||
- FIDO2/WebAuthn for passwordless auth
|
||||
|
||||
#### 8. Software & Data Integrity Failures
|
||||
**Risk:** CI/CD pipeline compromise, unsigned updates
|
||||
|
||||
**Mitigation:**
|
||||
- Code signing for releases
|
||||
- Verify integrity of packages (lock files)
|
||||
- Secure CI/CD pipelines (immutable builds)
|
||||
- Checksum verification
|
||||
|
||||
#### 9. Logging & Monitoring Failures
|
||||
**Risk:** Breaches undetected, insufficient audit trail
|
||||
|
||||
**Mitigation:**
|
||||
- Log authentication events (success/failure)
|
||||
- Log access control failures
|
||||
- Centralized logging (ELK Stack, Splunk)
|
||||
- Alerting on suspicious patterns
|
||||
- Log rotation and retention policies
|
||||
|
||||
#### 10. Server-Side Request Forgery (SSRF)
|
||||
**Risk:** Server makes malicious requests to internal resources
|
||||
|
||||
**Mitigation:**
|
||||
- Validate and sanitize URLs
|
||||
- Allow-list for remote resources
|
||||
- Network segmentation
|
||||
- Disable unnecessary protocols (file://, gopher://)
|
||||
|
||||
## Input Validation (Prevents 70%+ Vulnerabilities)
|
||||
|
||||
### Validation Strategies
|
||||
|
||||
**1. Type Validation**
|
||||
```typescript
|
||||
// Use class-validator with NestJS
|
||||
class CreateUserDto {
|
||||
@IsEmail()
|
||||
email: string;
|
||||
|
||||
@IsString()
|
||||
@MinLength(12)
|
||||
@Matches(/^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)/)
|
||||
password: string;
|
||||
|
||||
@IsInt()
|
||||
@Min(18)
|
||||
age: number;
|
||||
}
|
||||
```
|
||||
|
||||
**2. Sanitization**
|
||||
```typescript
|
||||
import DOMPurify from 'isomorphic-dompurify';
|
||||
|
||||
// Sanitize HTML input
|
||||
const clean = DOMPurify.sanitize(userInput);
|
||||
```
|
||||
|
||||
**3. Allow-lists (Preferred over Deny-lists)**
|
||||
```typescript
|
||||
// Good: Allow-list approach
|
||||
const allowedFields = ['name', 'email', 'age'];
|
||||
const sanitized = Object.keys(input)
|
||||
.filter(key => allowedFields.includes(key))
|
||||
.reduce((obj, key) => ({ ...obj, [key]: input[key] }), {});
|
||||
```
|
||||
|
||||
## Rate Limiting
|
||||
|
||||
### Token Bucket Algorithm (Industry Standard)
|
||||
|
||||
```typescript
|
||||
import rateLimit from 'express-rate-limit';
|
||||
|
||||
const limiter = rateLimit({
|
||||
windowMs: 15 * 60 * 1000, // 15 minutes
|
||||
max: 100, // 100 requests per window
|
||||
standardHeaders: true,
|
||||
legacyHeaders: false,
|
||||
message: 'Too many requests, please try again later',
|
||||
});
|
||||
|
||||
app.use('/api/', limiter);
|
||||
```
|
||||
|
||||
### API-Specific Limits
|
||||
|
||||
- **Authentication:** 10 attempts/15 min
|
||||
- **Public APIs:** 100 requests/15 min
|
||||
- **Authenticated APIs:** 1000 requests/15 min
|
||||
- **Admin endpoints:** 50 requests/15 min
|
||||
|
||||
## Security Headers
|
||||
|
||||
```typescript
|
||||
// Essential security headers (2025)
|
||||
{
|
||||
'Strict-Transport-Security': 'max-age=31536000; includeSubDomains',
|
||||
'Content-Security-Policy': "default-src 'self'",
|
||||
'X-Frame-Options': 'DENY',
|
||||
'X-Content-Type-Options': 'nosniff',
|
||||
'Referrer-Policy': 'strict-origin-when-cross-origin',
|
||||
'Permissions-Policy': 'geolocation=(), microphone=()',
|
||||
}
|
||||
```
|
||||
|
||||
## Secrets Management
|
||||
|
||||
### Best Practices
|
||||
|
||||
1. **Never commit secrets** - Use .env files (gitignored)
|
||||
2. **Environment-specific** - Different secrets per environment
|
||||
3. **Rotation policy** - Rotate secrets every 90 days
|
||||
4. **Encryption at rest** - Encrypt secrets in secret managers
|
||||
5. **Least privilege** - Minimal permissions per secret
|
||||
|
||||
### Tools
|
||||
|
||||
- **HashiCorp Vault** - Multi-cloud, dynamic secrets
|
||||
- **AWS Secrets Manager** - Managed service, auto-rotation
|
||||
- **Azure Key Vault** - Integrated with Azure services
|
||||
- **Pulumi ESC** - Unified secrets orchestration (2025 trend)
|
||||
|
||||
```typescript
|
||||
// Good: Secrets from environment
|
||||
const dbPassword = process.env.DB_PASSWORD;
|
||||
if (!dbPassword) throw new Error('DB_PASSWORD not set');
|
||||
```
|
||||
|
||||
## API Security Checklist
|
||||
|
||||
- [ ] Use HTTPS/TLS 1.3 only
|
||||
- [ ] Implement OAuth 2.1 + JWT for authentication
|
||||
- [ ] Rate limiting on all endpoints
|
||||
- [ ] Input validation on all inputs
|
||||
- [ ] Parameterized queries (prevent SQL injection)
|
||||
- [ ] Security headers configured
|
||||
- [ ] CORS properly configured (not `*` in production)
|
||||
- [ ] API versioning implemented
|
||||
- [ ] Error messages don't leak system info
|
||||
- [ ] Logging authentication events
|
||||
- [ ] MFA for admin accounts
|
||||
- [ ] Regular security audits (quarterly)
|
||||
|
||||
## Common Security Pitfalls
|
||||
|
||||
1. **Client-side validation only** - Always validate on server
|
||||
2. **Using Math.random() for tokens** - Use crypto.randomBytes()
|
||||
3. **Storing passwords with bcrypt** - Use Argon2id (2025 standard)
|
||||
4. **Trusting user input** - Validate and sanitize everything
|
||||
5. **Weak CORS configuration** - Don't use `*` in production
|
||||
6. **Insufficient logging** - Log all authentication/authorization events
|
||||
7. **No rate limiting** - Implement on all public endpoints
|
||||
|
||||
## Resources
|
||||
|
||||
- **OWASP Top 10 (2025):** https://owasp.org/www-project-top-ten/
|
||||
- **OWASP Cheat Sheets:** https://cheatsheetseries.owasp.org/
|
||||
- **CWE Top 25:** https://cwe.mitre.org/top25/
|
||||
- **NIST Guidelines:** https://www.nist.gov/cybersecurity
|
||||
@@ -0,0 +1,256 @@
|
||||
# Backend Technologies
|
||||
|
||||
Core technologies, frameworks, databases, and message queues for modern backend development (2025).
|
||||
|
||||
## Programming Languages
|
||||
|
||||
### Node.js/TypeScript
|
||||
**Market Position:** TypeScript dominance in Node.js backend (industry standard)
|
||||
|
||||
**Best For:**
|
||||
- Full-stack JavaScript teams
|
||||
- Real-time applications (WebSockets, Socket.io)
|
||||
- Rapid prototyping with npm ecosystem (2M+ packages)
|
||||
- Event-driven architectures
|
||||
|
||||
**Popular Frameworks:**
|
||||
- **NestJS** - Enterprise-grade, TypeScript-first, modular architecture
|
||||
- **Express** - Lightweight, flexible, most popular (23M weekly downloads)
|
||||
- **Fastify** - High performance (20k req/sec vs Express 15k req/sec)
|
||||
- **tRPC** - End-to-end typesafe APIs without GraphQL
|
||||
|
||||
**When to Choose:** Team already using JavaScript/TypeScript, real-time features needed, rapid development priority
|
||||
|
||||
### Python
|
||||
**Market Position:** FastAPI adoption surge - 73% migrating from Flask
|
||||
|
||||
**Best For:**
|
||||
- Data-heavy applications
|
||||
- ML/AI integration (TensorFlow, PyTorch)
|
||||
- Scientific computing
|
||||
- Scripting and automation
|
||||
|
||||
**Popular Frameworks:**
|
||||
- **FastAPI** - Modern, async, auto-generated OpenAPI docs, validation via Pydantic
|
||||
- **Django** - Batteries-included, ORM, admin panel, authentication
|
||||
- **Flask** - Lightweight, flexible, microservices-friendly
|
||||
|
||||
**When to Choose:** Data science integration, ML/AI features, rapid prototyping, team Python expertise
|
||||
|
||||
### Go
|
||||
**Market Position:** Preferred for microservices at scale (Docker, Kubernetes written in Go)
|
||||
|
||||
**Best For:**
|
||||
- High-concurrency systems (goroutines)
|
||||
- Microservices architectures
|
||||
- CLI tools and DevOps tooling
|
||||
- System programming
|
||||
|
||||
**Popular Frameworks:**
|
||||
- **Gin** - Fast HTTP router (40x faster than Martini)
|
||||
- **Echo** - High performance, extensible
|
||||
- **Fiber** - Express-like API, built on Fasthttp
|
||||
|
||||
**When to Choose:** Microservices, high concurrency needs, DevOps tooling, simple deployment (single binary)
|
||||
|
||||
### Rust
|
||||
**Market Position:** 72% most admired language, 1.5x faster than Go
|
||||
|
||||
**Best For:**
|
||||
- Performance-critical systems
|
||||
- Memory-safe system programming
|
||||
- High-reliability requirements
|
||||
- WebAssembly backends
|
||||
|
||||
**Popular Frameworks:**
|
||||
- **Axum** - Ergonomic, modular, tokio-based
|
||||
- **Actix-web** - Fastest web framework (benchmark leader)
|
||||
- **Rocket** - Type-safe, easy to use
|
||||
|
||||
**When to Choose:** Maximum performance needed, memory safety critical, low-level control required
|
||||
|
||||
## Databases
|
||||
|
||||
### Relational (SQL)
|
||||
|
||||
#### PostgreSQL
|
||||
**Market Position:** Most popular SQL database for new projects
|
||||
|
||||
**Strengths:**
|
||||
- ACID compliance, data integrity
|
||||
- JSON/JSONB support (hybrid SQL + NoSQL)
|
||||
- Full-text search, geospatial (PostGIS)
|
||||
- Advanced indexing (B-tree, Hash, GiST, GIN)
|
||||
- Window functions, CTEs, materialized views
|
||||
|
||||
**Use Cases:**
|
||||
- E-commerce (transactions critical)
|
||||
- Financial applications
|
||||
- Complex reporting requirements
|
||||
- Multi-tenant applications
|
||||
|
||||
**When to Choose:** Need ACID guarantees, complex queries/joins, data integrity critical
|
||||
|
||||
### NoSQL
|
||||
|
||||
#### MongoDB
|
||||
**Market Position:** Leading document database
|
||||
|
||||
**Strengths:**
|
||||
- Flexible/evolving schemas
|
||||
- Horizontal scaling (sharding built-in)
|
||||
- Aggregation pipeline (powerful data processing)
|
||||
- GridFS for large files
|
||||
|
||||
**Use Cases:**
|
||||
- Content management systems
|
||||
- Real-time analytics
|
||||
- IoT data collection
|
||||
- Catalogs with varied attributes
|
||||
|
||||
**When to Choose:** Schema flexibility needed, rapid iteration, horizontal scaling required
|
||||
|
||||
### Caching & In-Memory
|
||||
|
||||
#### Redis
|
||||
**Market Position:** Industry standard for caching and session storage
|
||||
|
||||
**Capabilities:**
|
||||
- In-memory key-value store
|
||||
- Pub/sub messaging
|
||||
- Sorted sets (leaderboards)
|
||||
- Geospatial indexes
|
||||
- Streams (event sourcing)
|
||||
|
||||
**Performance:** 10-100x faster than disk-based databases
|
||||
|
||||
**Use Cases:**
|
||||
- Session storage
|
||||
- Rate limiting
|
||||
- Real-time leaderboards
|
||||
- Job queues (Bull, BullMQ)
|
||||
- Caching layer (90% DB load reduction)
|
||||
|
||||
**When to Choose:** Need sub-millisecond latency, caching layer, session management
|
||||
|
||||
## ORMs & Database Tools
|
||||
|
||||
### Modern ORMs (2025)
|
||||
|
||||
**Drizzle ORM** (TypeScript)
|
||||
- Winning NestJS performance race
|
||||
- 7.4kb, zero dependencies
|
||||
- SQL-like syntax, full type safety
|
||||
- Best for: Performance-critical TypeScript apps
|
||||
|
||||
**Prisma** (TypeScript)
|
||||
- Auto-generated type-safe client
|
||||
- Database migrations included
|
||||
- Excellent DX with Prisma Studio
|
||||
- Best for: Rapid development, type safety
|
||||
|
||||
**TypeORM** (TypeScript)
|
||||
- Mature, feature-complete
|
||||
- Supports Active Record + Data Mapper
|
||||
- Best for: Complex enterprise apps
|
||||
|
||||
**SQLAlchemy** (Python)
|
||||
- Industry standard Python ORM
|
||||
- Powerful query builder
|
||||
- Best for: Python backends
|
||||
|
||||
## Message Queues & Event Streaming
|
||||
|
||||
### RabbitMQ
|
||||
**Best For:** Task queues, request/reply patterns
|
||||
|
||||
**Strengths:**
|
||||
- Flexible routing (direct, topic, fanout, headers)
|
||||
- Message acknowledgment and durability
|
||||
- Dead letter exchanges
|
||||
- Wide protocol support (AMQP, MQTT, STOMP)
|
||||
|
||||
**Use Cases:**
|
||||
- Background job processing
|
||||
- Microservices communication
|
||||
- Email/notification queues
|
||||
|
||||
**When to Choose:** Traditional message queue needs, complex routing, moderate throughput
|
||||
|
||||
### Apache Kafka
|
||||
**Best For:** Event streaming, millions messages/second
|
||||
|
||||
**Strengths:**
|
||||
- Distributed, fault-tolerant
|
||||
- High throughput (millions msg/sec)
|
||||
- Message replay (retention-based)
|
||||
- Stream processing (Kafka Streams)
|
||||
|
||||
**Use Cases:**
|
||||
- Real-time analytics
|
||||
- Event sourcing
|
||||
- Log aggregation
|
||||
- Netflix/Uber scale (billions events/day)
|
||||
|
||||
**When to Choose:** Event streaming, high throughput, event replay needed, real-time analytics
|
||||
|
||||
## Framework Comparisons
|
||||
|
||||
### Node.js Frameworks
|
||||
|
||||
| Framework | Performance | Learning Curve | Use Case |
|
||||
|-----------|------------|----------------|----------|
|
||||
| Express | Moderate | Easy | Simple APIs, learning |
|
||||
| NestJS | Moderate | Steep | Enterprise apps |
|
||||
| Fastify | High | Moderate | Performance-critical |
|
||||
| tRPC | High | Moderate | Full-stack TypeScript |
|
||||
|
||||
### Python Frameworks
|
||||
|
||||
| Framework | Performance | Features | Use Case |
|
||||
|-----------|------------|----------|----------|
|
||||
| FastAPI | High | Modern, async | New projects, APIs |
|
||||
| Django | Moderate | Batteries-included | Full-featured apps |
|
||||
| Flask | Moderate | Minimal | Microservices, simple APIs |
|
||||
|
||||
## Technology Selection Flowchart
|
||||
|
||||
```
|
||||
Start → Need real-time features?
|
||||
→ Yes → Node.js + Socket.io
|
||||
→ No → Need ML/AI integration?
|
||||
→ Yes → Python + FastAPI
|
||||
→ No → Need maximum performance?
|
||||
→ Yes → Rust + Axum
|
||||
→ No → Need high concurrency?
|
||||
→ Yes → Go + Gin
|
||||
→ No → Node.js + NestJS (safe default)
|
||||
|
||||
Database Selection:
|
||||
ACID needed? → Yes → PostgreSQL
|
||||
→ No → Flexible schema? → Yes → MongoDB
|
||||
→ No → PostgreSQL (default)
|
||||
|
||||
Caching needed? → Always use Redis
|
||||
|
||||
Message Queue:
|
||||
Millions msg/sec? → Yes → Kafka
|
||||
→ No → RabbitMQ
|
||||
```
|
||||
|
||||
## Common Pitfalls
|
||||
|
||||
1. **Choosing NoSQL for relational data** - Use PostgreSQL if data has clear relationships
|
||||
2. **Not using connection pooling** - Implement pooling for 5-10x performance boost
|
||||
3. **Ignoring indexes** - Add indexes to frequently queried columns (30% I/O reduction)
|
||||
4. **Over-engineering with microservices** - Start monolith, split when needed
|
||||
5. **Not caching** - Redis caching provides 90% DB load reduction
|
||||
|
||||
## Resources
|
||||
|
||||
- **NestJS:** https://nestjs.com
|
||||
- **FastAPI:** https://fastapi.tiangolo.com
|
||||
- **PostgreSQL:** https://www.postgresql.org/docs/
|
||||
- **MongoDB:** https://www.mongodb.com/docs/
|
||||
- **Redis:** https://redis.io/docs/
|
||||
- **Kafka:** https://kafka.apache.org/documentation/
|
||||
@@ -0,0 +1,429 @@
|
||||
# Backend Testing Strategies
|
||||
|
||||
Comprehensive testing approaches, frameworks, and quality assurance practices (2025).
|
||||
|
||||
## Test Pyramid (70-20-10 Rule)
|
||||
|
||||
```
|
||||
/\
|
||||
/E2E\ 10% - End-to-End Tests
|
||||
/------\
|
||||
/Integr.\ 20% - Integration Tests
|
||||
/----------\
|
||||
/ Unit \ 70% - Unit Tests
|
||||
/--------------\
|
||||
```
|
||||
|
||||
**Rationale:**
|
||||
- Unit tests: Fast, cheap, isolate bugs quickly
|
||||
- Integration tests: Verify component interactions
|
||||
- E2E tests: Expensive, slow, but validate real user flows
|
||||
|
||||
## Unit Testing
|
||||
|
||||
### Frameworks by Language
|
||||
|
||||
**TypeScript/JavaScript:**
|
||||
- **Vitest** - 50% faster than Jest in CI/CD, ESM native
|
||||
- **Jest** - Mature, large ecosystem, snapshot testing
|
||||
|
||||
**Python:**
|
||||
- **Pytest** - Industry standard, fixtures, parametrization
|
||||
- **Unittest** - Built-in, standard library
|
||||
|
||||
**Go:**
|
||||
- **testing** - Built-in, table-driven tests
|
||||
- **testify** - Assertions and mocking
|
||||
|
||||
### Best Practices
|
||||
|
||||
```typescript
|
||||
// Good: Test single responsibility
|
||||
describe('UserService', () => {
|
||||
describe('createUser', () => {
|
||||
it('should create user with valid data', async () => {
|
||||
const userData = { email: 'test@example.com', name: 'Test' };
|
||||
const user = await userService.createUser(userData);
|
||||
|
||||
expect(user).toMatchObject(userData);
|
||||
expect(user.id).toBeDefined();
|
||||
});
|
||||
|
||||
it('should throw error with duplicate email', async () => {
|
||||
const userData = { email: 'existing@example.com', name: 'Test' };
|
||||
|
||||
await expect(userService.createUser(userData))
|
||||
.rejects.toThrow('Email already exists');
|
||||
});
|
||||
|
||||
it('should hash password before storing', async () => {
|
||||
const userData = { email: 'test@example.com', password: 'plain123' };
|
||||
const user = await userService.createUser(userData);
|
||||
|
||||
expect(user.password).not.toBe('plain123');
|
||||
expect(user.password).toMatch(/^\$argon2id\$/);
|
||||
});
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### Mocking
|
||||
|
||||
```typescript
|
||||
// Mock external dependencies
|
||||
jest.mock('./emailService');
|
||||
|
||||
it('should send welcome email after user creation', async () => {
|
||||
const emailService = require('./emailService');
|
||||
emailService.sendWelcomeEmail = jest.fn();
|
||||
|
||||
await userService.createUser({ email: 'test@example.com' });
|
||||
|
||||
expect(emailService.sendWelcomeEmail).toHaveBeenCalledWith('test@example.com');
|
||||
});
|
||||
```
|
||||
|
||||
## Integration Testing
|
||||
|
||||
### API Integration Tests
|
||||
|
||||
```typescript
|
||||
import request from 'supertest';
|
||||
import { app } from '../app';
|
||||
|
||||
describe('POST /api/users', () => {
|
||||
beforeAll(async () => {
|
||||
await db.connect(); // Real database connection (test DB)
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await db.disconnect();
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
await db.users.deleteMany({}); // Clean state
|
||||
});
|
||||
|
||||
it('should create user and return 201', async () => {
|
||||
const response = await request(app)
|
||||
.post('/api/users')
|
||||
.send({ email: 'test@example.com', name: 'Test User' })
|
||||
.expect(201);
|
||||
|
||||
expect(response.body).toMatchObject({
|
||||
email: 'test@example.com',
|
||||
name: 'Test User',
|
||||
});
|
||||
|
||||
// Verify database persistence
|
||||
const user = await db.users.findOne({ email: 'test@example.com' });
|
||||
expect(user).toBeDefined();
|
||||
});
|
||||
|
||||
it('should return 400 for invalid email', async () => {
|
||||
await request(app)
|
||||
.post('/api/users')
|
||||
.send({ email: 'invalid-email', name: 'Test' })
|
||||
.expect(400)
|
||||
.expect((res) => {
|
||||
expect(res.body.error).toBe('Invalid email format');
|
||||
});
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### Database Testing with TestContainers
|
||||
|
||||
```typescript
|
||||
import { GenericContainer } from 'testcontainers';
|
||||
|
||||
let container;
|
||||
let db;
|
||||
|
||||
beforeAll(async () => {
|
||||
// Spin up real PostgreSQL in Docker
|
||||
container = await new GenericContainer('postgres:15')
|
||||
.withEnvironment({ POSTGRES_PASSWORD: 'test' })
|
||||
.withExposedPorts(5432)
|
||||
.start();
|
||||
|
||||
const port = container.getMappedPort(5432);
|
||||
db = await createConnection({
|
||||
host: 'localhost',
|
||||
port,
|
||||
database: 'test',
|
||||
password: 'test',
|
||||
});
|
||||
}, 60000);
|
||||
|
||||
afterAll(async () => {
|
||||
await container.stop();
|
||||
});
|
||||
```
|
||||
|
||||
## Contract Testing (Microservices)
|
||||
|
||||
### Pact (Consumer-Driven Contracts)
|
||||
|
||||
```typescript
|
||||
// Consumer test
|
||||
import { Pact } from '@pact-foundation/pact';
|
||||
|
||||
const provider = new Pact({
|
||||
consumer: 'UserService',
|
||||
provider: 'AuthService',
|
||||
});
|
||||
|
||||
describe('Auth Service Contract', () => {
|
||||
beforeAll(() => provider.setup());
|
||||
afterEach(() => provider.verify());
|
||||
afterAll(() => provider.finalize());
|
||||
|
||||
it('should validate user token', async () => {
|
||||
await provider.addInteraction({
|
||||
state: 'user token exists',
|
||||
uponReceiving: 'a request to validate token',
|
||||
withRequest: {
|
||||
method: 'POST',
|
||||
path: '/auth/validate',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: { token: 'valid-token-123' },
|
||||
},
|
||||
willRespondWith: {
|
||||
status: 200,
|
||||
body: { valid: true, userId: '123' },
|
||||
},
|
||||
});
|
||||
|
||||
const response = await authClient.validateToken('valid-token-123');
|
||||
expect(response.valid).toBe(true);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
## Load Testing
|
||||
|
||||
### Tools Comparison
|
||||
|
||||
**k6** (Modern, Developer-Friendly)
|
||||
```javascript
|
||||
import http from 'k6/http';
|
||||
import { check, sleep } from 'k6';
|
||||
|
||||
export const options = {
|
||||
stages: [
|
||||
{ duration: '2m', target: 100 }, // Ramp up to 100 users
|
||||
{ duration: '5m', target: 100 }, // Stay at 100 users
|
||||
{ duration: '2m', target: 0 }, // Ramp down to 0 users
|
||||
],
|
||||
thresholds: {
|
||||
http_req_duration: ['p(95)<500'], // 95% requests under 500ms
|
||||
},
|
||||
};
|
||||
|
||||
export default function () {
|
||||
const res = http.get('https://api.example.com/users');
|
||||
check(res, {
|
||||
'status is 200': (r) => r.status === 200,
|
||||
'response time < 500ms': (r) => r.timings.duration < 500,
|
||||
});
|
||||
sleep(1);
|
||||
}
|
||||
```
|
||||
|
||||
**Gatling** (JVM-based, Advanced Scenarios)
|
||||
**JMeter** (GUI-based, Traditional)
|
||||
|
||||
### Performance Thresholds
|
||||
|
||||
- **Response time:** p95 < 500ms, p99 < 1s
|
||||
- **Throughput:** 1000+ req/sec (target based on SLA)
|
||||
- **Error rate:** < 1%
|
||||
- **Concurrent users:** Test at 2x expected peak
|
||||
|
||||
## E2E Testing
|
||||
|
||||
### Playwright (Modern, Multi-Browser)
|
||||
|
||||
```typescript
|
||||
import { test, expect } from '@playwright/test';
|
||||
|
||||
test('user can register and login', async ({ page }) => {
|
||||
// Navigate to registration page
|
||||
await page.goto('https://app.example.com/register');
|
||||
|
||||
// Fill registration form
|
||||
await page.fill('input[name="email"]', 'test@example.com');
|
||||
await page.fill('input[name="password"]', 'SecurePass123!');
|
||||
await page.click('button[type="submit"]');
|
||||
|
||||
// Verify redirect to dashboard
|
||||
await expect(page).toHaveURL('/dashboard');
|
||||
await expect(page.locator('h1')).toContainText('Welcome');
|
||||
|
||||
// Verify API call was made
|
||||
const response = await page.waitForResponse('/api/users');
|
||||
expect(response.status()).toBe(201);
|
||||
});
|
||||
```
|
||||
|
||||
## Database Migration Testing
|
||||
|
||||
**Critical:** 83% migrations fail without proper testing
|
||||
|
||||
```typescript
|
||||
describe('Database Migrations', () => {
|
||||
it('should migrate from v1 to v2 without data loss', async () => {
|
||||
// Insert test data in v1 schema
|
||||
await db.query(`
|
||||
INSERT INTO users (id, email, name)
|
||||
VALUES (1, 'test@example.com', 'Test User')
|
||||
`);
|
||||
|
||||
// Run migration
|
||||
await runMigration('v2-add-created-at.sql');
|
||||
|
||||
// Verify v2 schema
|
||||
const result = await db.query('SELECT * FROM users WHERE id = 1');
|
||||
expect(result.rows[0]).toMatchObject({
|
||||
id: 1,
|
||||
email: 'test@example.com',
|
||||
name: 'Test User',
|
||||
created_at: expect.any(Date),
|
||||
});
|
||||
});
|
||||
|
||||
it('should rollback migration successfully', async () => {
|
||||
await runMigration('v2-add-created-at.sql');
|
||||
await rollbackMigration('v2-add-created-at.sql');
|
||||
|
||||
// Verify v1 schema restored
|
||||
const columns = await db.query(`
|
||||
SELECT column_name FROM information_schema.columns
|
||||
WHERE table_name = 'users'
|
||||
`);
|
||||
expect(columns.rows.map(r => r.column_name)).not.toContain('created_at');
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
## Security Testing
|
||||
|
||||
### SAST (Static Application Security Testing)
|
||||
|
||||
```bash
|
||||
# SonarQube for code quality + security
|
||||
sonar-scanner \
|
||||
-Dsonar.projectKey=my-backend \
|
||||
-Dsonar.sources=src \
|
||||
-Dsonar.host.url=http://localhost:9000
|
||||
|
||||
# Semgrep for security patterns
|
||||
semgrep --config auto src/
|
||||
```
|
||||
|
||||
### DAST (Dynamic Application Security Testing)
|
||||
|
||||
```bash
|
||||
# OWASP ZAP for runtime security scanning
|
||||
docker run -t owasp/zap2docker-stable zap-baseline.py \
|
||||
-t https://api.example.com \
|
||||
-r zap-report.html
|
||||
```
|
||||
|
||||
### Dependency Scanning (SCA)
|
||||
|
||||
```bash
|
||||
# npm audit for Node.js
|
||||
npm audit fix
|
||||
|
||||
# Snyk for multi-language
|
||||
snyk test
|
||||
snyk monitor # Continuous monitoring
|
||||
```
|
||||
|
||||
## Code Coverage
|
||||
|
||||
### Target Metrics (SonarQube Standards)
|
||||
|
||||
- **Overall coverage:** 80%+
|
||||
- **Critical paths:** 100% (authentication, payment, data integrity)
|
||||
- **New code:** 90%+
|
||||
|
||||
### Implementation
|
||||
|
||||
```bash
|
||||
# Vitest with coverage
|
||||
vitest run --coverage
|
||||
|
||||
# Jest with coverage
|
||||
jest --coverage --coverageThreshold='{"global":{"branches":80,"functions":80,"lines":80}}'
|
||||
```
|
||||
|
||||
## CI/CD Testing Pipeline
|
||||
|
||||
```yaml
|
||||
# GitHub Actions example
|
||||
name: Test Pipeline
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Unit Tests
|
||||
run: npm run test:unit
|
||||
|
||||
- name: Integration Tests
|
||||
run: npm run test:integration
|
||||
|
||||
- name: E2E Tests
|
||||
run: npm run test:e2e
|
||||
|
||||
- name: Load Tests
|
||||
run: k6 run load-test.js
|
||||
|
||||
- name: Security Scan
|
||||
run: npm audit && snyk test
|
||||
|
||||
- name: Coverage Report
|
||||
run: npm run test:coverage
|
||||
|
||||
- name: Upload to Codecov
|
||||
uses: codecov/codecov-action@v3
|
||||
```
|
||||
|
||||
## Testing Best Practices
|
||||
|
||||
1. **Arrange-Act-Assert (AAA) Pattern**
|
||||
2. **One assertion per test** (when practical)
|
||||
3. **Descriptive test names** - `should throw error when email is invalid`
|
||||
4. **Test edge cases** - Empty inputs, boundary values, null/undefined
|
||||
5. **Clean test data** - Reset database state between tests
|
||||
6. **Fast tests** - Unit tests < 10ms, Integration < 100ms
|
||||
7. **Deterministic** - No flaky tests, avoid sleep(), use waitFor()
|
||||
8. **Independent** - Tests don't depend on execution order
|
||||
|
||||
## Testing Checklist
|
||||
|
||||
- [ ] Unit tests cover 70% of codebase
|
||||
- [ ] Integration tests for all API endpoints
|
||||
- [ ] Contract tests for microservices
|
||||
- [ ] Load tests configured (k6/Gatling)
|
||||
- [ ] E2E tests for critical user flows
|
||||
- [ ] Database migration tests
|
||||
- [ ] Security scanning in CI/CD (SAST, DAST, SCA)
|
||||
- [ ] Code coverage reports automated
|
||||
- [ ] Tests run on every PR
|
||||
- [ ] Flaky tests eliminated
|
||||
|
||||
## Resources
|
||||
|
||||
- **Vitest:** https://vitest.dev/
|
||||
- **Playwright:** https://playwright.dev/
|
||||
- **k6:** https://k6.io/docs/
|
||||
- **Pact:** https://docs.pact.io/
|
||||
- **TestContainers:** https://testcontainers.com/
|
||||
207
.opencode/skills/better-auth/SKILL.md
Normal file
207
.opencode/skills/better-auth/SKILL.md
Normal file
@@ -0,0 +1,207 @@
|
||||
---
|
||||
name: ck:better-auth
|
||||
description: Add authentication with Better Auth (TypeScript). Use for email/password, OAuth providers (Google, GitHub), 2FA/MFA, passkeys/WebAuthn, sessions, RBAC, rate limiting.
|
||||
license: MIT
|
||||
argument-hint: "[auth-method or feature]"
|
||||
metadata:
|
||||
author: claudekit
|
||||
version: "2.0.0"
|
||||
---
|
||||
|
||||
# Better Auth Skill
|
||||
|
||||
Better Auth is comprehensive, framework-agnostic authentication/authorization framework for TypeScript with built-in email/password, social OAuth, and powerful plugin ecosystem for advanced features.
|
||||
|
||||
## When to Use
|
||||
|
||||
- Implementing auth in TypeScript/JavaScript applications
|
||||
- Adding email/password or social OAuth authentication
|
||||
- Setting up 2FA, passkeys, magic links, advanced auth features
|
||||
- Building multi-tenant apps with organization support
|
||||
- Managing sessions and user lifecycle
|
||||
- Working with any framework (Next.js, Nuxt, SvelteKit, Remix, Astro, Hono, Express, etc.)
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
npm install better-auth
|
||||
# or pnpm/yarn/bun add better-auth
|
||||
```
|
||||
|
||||
### Environment Setup
|
||||
|
||||
Create `.env`:
|
||||
```env
|
||||
BETTER_AUTH_SECRET=<generated-secret-32-chars-min>
|
||||
BETTER_AUTH_URL=http://localhost:3000
|
||||
```
|
||||
|
||||
### Basic Server Setup
|
||||
|
||||
Create `auth.ts` (root, lib/, utils/, or under src/app/server/):
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
|
||||
export const auth = betterAuth({
|
||||
database: {
|
||||
// See references/database-integration.md
|
||||
},
|
||||
emailAndPassword: {
|
||||
enabled: true,
|
||||
autoSignIn: true
|
||||
},
|
||||
socialProviders: {
|
||||
github: {
|
||||
clientId: process.env.GITHUB_CLIENT_ID!,
|
||||
clientSecret: process.env.GITHUB_CLIENT_SECRET!,
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Database Schema
|
||||
|
||||
```bash
|
||||
npx @better-auth/cli generate # Generate schema/migrations
|
||||
npx @better-auth/cli migrate # Apply migrations (Kysely only)
|
||||
```
|
||||
|
||||
### Mount API Handler
|
||||
|
||||
**Next.js App Router:**
|
||||
```ts
|
||||
// app/api/auth/[...all]/route.ts
|
||||
import { auth } from "@/lib/auth";
|
||||
import { toNextJsHandler } from "better-auth/next-js";
|
||||
|
||||
export const { POST, GET } = toNextJsHandler(auth);
|
||||
```
|
||||
|
||||
**Other frameworks:** See references/email-password-auth.md#framework-setup
|
||||
|
||||
### Client Setup
|
||||
|
||||
Create `auth-client.ts`:
|
||||
|
||||
```ts
|
||||
import { createAuthClient } from "better-auth/client";
|
||||
|
||||
export const authClient = createAuthClient({
|
||||
baseURL: process.env.NEXT_PUBLIC_BETTER_AUTH_URL || "http://localhost:3000"
|
||||
});
|
||||
```
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```ts
|
||||
// Sign up
|
||||
await authClient.signUp.email({
|
||||
email: "user@example.com",
|
||||
password: "secure123",
|
||||
name: "John Doe"
|
||||
});
|
||||
|
||||
// Sign in
|
||||
await authClient.signIn.email({
|
||||
email: "user@example.com",
|
||||
password: "secure123"
|
||||
});
|
||||
|
||||
// OAuth
|
||||
await authClient.signIn.social({ provider: "github" });
|
||||
|
||||
// Session
|
||||
const { data: session } = authClient.useSession(); // React/Vue/Svelte
|
||||
const { data: session } = await authClient.getSession(); // Vanilla JS
|
||||
```
|
||||
|
||||
## Feature Selection Matrix
|
||||
|
||||
| Feature | Plugin Required | Use Case | Reference |
|
||||
|---------|----------------|----------|-----------|
|
||||
| Email/Password | No (built-in) | Basic auth | [email-password-auth.md](./references/email-password-auth.md) |
|
||||
| OAuth (GitHub, Google, etc.) | No (built-in) | Social login | [oauth-providers.md](./references/oauth-providers.md) |
|
||||
| Email Verification | No (built-in) | Verify email addresses | [email-password-auth.md](./references/email-password-auth.md#email-verification) |
|
||||
| Password Reset | No (built-in) | Forgot password flow | [email-password-auth.md](./references/email-password-auth.md#password-reset) |
|
||||
| Two-Factor Auth (2FA/TOTP) | Yes (`twoFactor`) | Enhanced security | [advanced-features.md](./references/advanced-features.md#two-factor-authentication) |
|
||||
| Passkeys/WebAuthn | Yes (`passkey`) | Passwordless auth | [advanced-features.md](./references/advanced-features.md#passkeys-webauthn) |
|
||||
| Magic Link | Yes (`magicLink`) | Email-based login | [advanced-features.md](./references/advanced-features.md#magic-link) |
|
||||
| Username Auth | Yes (`username`) | Username login | [email-password-auth.md](./references/email-password-auth.md#username-authentication) |
|
||||
| Organizations/Multi-tenant | Yes (`organization`) | Team/org features | [advanced-features.md](./references/advanced-features.md#organizations) |
|
||||
| Rate Limiting | No (built-in) | Prevent abuse | [advanced-features.md](./references/advanced-features.md#rate-limiting) |
|
||||
| Session Management | No (built-in) | User sessions | [advanced-features.md](./references/advanced-features.md#session-management) |
|
||||
|
||||
## Auth Method Selection Guide
|
||||
|
||||
**Choose Email/Password when:**
|
||||
- Building standard web app with traditional auth
|
||||
- Need full control over user credentials
|
||||
- Targeting users who prefer email-based accounts
|
||||
|
||||
**Choose OAuth when:**
|
||||
- Want quick signup with minimal friction
|
||||
- Users already have social accounts
|
||||
- Need access to social profile data
|
||||
|
||||
**Choose Passkeys when:**
|
||||
- Want passwordless experience
|
||||
- Targeting modern browsers/devices
|
||||
- Security is top priority
|
||||
|
||||
**Choose Magic Link when:**
|
||||
- Want passwordless without WebAuthn complexity
|
||||
- Targeting email-first users
|
||||
- Need temporary access links
|
||||
|
||||
**Combine Multiple Methods when:**
|
||||
- Want flexibility for different user preferences
|
||||
- Building enterprise apps with various auth requirements
|
||||
- Need progressive enhancement (start simple, add more options)
|
||||
|
||||
## Core Architecture
|
||||
|
||||
Better Auth uses client-server architecture:
|
||||
1. **Server** (`better-auth`): Handles auth logic, database ops, API routes
|
||||
2. **Client** (`better-auth/client`): Provides hooks/methods for frontend
|
||||
3. **Plugins**: Extend both server/client functionality
|
||||
|
||||
## Implementation Checklist
|
||||
|
||||
- [ ] Install `better-auth` package
|
||||
- [ ] Set environment variables (SECRET, URL)
|
||||
- [ ] Create auth server instance with database config
|
||||
- [ ] Run schema migration (`npx @better-auth/cli generate`)
|
||||
- [ ] Mount API handler in framework
|
||||
- [ ] Create client instance
|
||||
- [ ] Implement sign-up/sign-in UI
|
||||
- [ ] Add session management to components
|
||||
- [ ] Set up protected routes/middleware
|
||||
- [ ] Add plugins as needed (regenerate schema after)
|
||||
- [ ] Test complete auth flow
|
||||
- [ ] Configure email sending (verification/reset)
|
||||
- [ ] Enable rate limiting for production
|
||||
- [ ] Set up error handling
|
||||
|
||||
## Reference Documentation
|
||||
|
||||
### Core Authentication
|
||||
- [Email/Password Authentication](./references/email-password-auth.md) - Email/password setup, verification, password reset, username auth
|
||||
- [OAuth Providers](./references/oauth-providers.md) - Social login setup, provider configuration, token management
|
||||
- [Database Integration](./references/database-integration.md) - Database adapters, schema setup, migrations
|
||||
|
||||
### Advanced Features
|
||||
- [Advanced Features](./references/advanced-features.md) - 2FA/MFA, passkeys, magic links, organizations, rate limiting, session management
|
||||
|
||||
## Scripts
|
||||
|
||||
- `scripts/better_auth_init.py` - Initialize Better Auth configuration with interactive setup
|
||||
|
||||
## Resources
|
||||
|
||||
- Docs: https://www.better-auth.com/docs
|
||||
- GitHub: https://github.com/better-auth/better-auth
|
||||
- Plugins: https://www.better-auth.com/docs/plugins
|
||||
- Examples: https://www.better-auth.com/docs/examples
|
||||
553
.opencode/skills/better-auth/references/advanced-features.md
Normal file
553
.opencode/skills/better-auth/references/advanced-features.md
Normal file
@@ -0,0 +1,553 @@
|
||||
# Advanced Features
|
||||
|
||||
Better Auth plugins extend functionality beyond basic authentication.
|
||||
|
||||
## Two-Factor Authentication
|
||||
|
||||
### Server Setup
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { twoFactor } from "better-auth/plugins";
|
||||
|
||||
export const auth = betterAuth({
|
||||
plugins: [
|
||||
twoFactor({
|
||||
issuer: "YourAppName", // TOTP issuer name
|
||||
otpOptions: {
|
||||
period: 30, // OTP validity period (seconds)
|
||||
digits: 6, // OTP length
|
||||
}
|
||||
})
|
||||
]
|
||||
});
|
||||
```
|
||||
|
||||
### Client Setup
|
||||
|
||||
```ts
|
||||
import { createAuthClient } from "better-auth/client";
|
||||
import { twoFactorClient } from "better-auth/client/plugins";
|
||||
|
||||
export const authClient = createAuthClient({
|
||||
plugins: [
|
||||
twoFactorClient({
|
||||
twoFactorPage: "/two-factor", // Redirect to 2FA verification page
|
||||
redirect: true // Auto-redirect if 2FA required
|
||||
})
|
||||
]
|
||||
});
|
||||
```
|
||||
|
||||
### Enable 2FA for User
|
||||
|
||||
```ts
|
||||
// Enable TOTP
|
||||
const { data } = await authClient.twoFactor.enable({
|
||||
password: "userPassword" // Verify user identity
|
||||
});
|
||||
|
||||
// data contains QR code URI for authenticator app
|
||||
const qrCodeUri = data.totpURI;
|
||||
const backupCodes = data.backupCodes; // Save these securely
|
||||
```
|
||||
|
||||
### Verify TOTP Code
|
||||
|
||||
```ts
|
||||
await authClient.twoFactor.verifyTOTP({
|
||||
code: "123456",
|
||||
trustDevice: true // Skip 2FA on this device for 30 days
|
||||
});
|
||||
```
|
||||
|
||||
### Disable 2FA
|
||||
|
||||
```ts
|
||||
await authClient.twoFactor.disable({
|
||||
password: "userPassword"
|
||||
});
|
||||
```
|
||||
|
||||
### Backup Codes
|
||||
|
||||
```ts
|
||||
// Generate new backup codes
|
||||
const { data } = await authClient.twoFactor.generateBackupCodes({
|
||||
password: "userPassword"
|
||||
});
|
||||
|
||||
// Use backup code instead of TOTP
|
||||
await authClient.twoFactor.verifyBackupCode({
|
||||
code: "backup-code-123"
|
||||
});
|
||||
```
|
||||
|
||||
## Passkeys (WebAuthn)
|
||||
|
||||
### Server Setup
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { passkey } from "better-auth/plugins";
|
||||
|
||||
export const auth = betterAuth({
|
||||
plugins: [
|
||||
passkey({
|
||||
rpName: "YourApp", // Relying Party name
|
||||
rpID: "yourdomain.com" // Your domain
|
||||
})
|
||||
]
|
||||
});
|
||||
```
|
||||
|
||||
### Client Setup
|
||||
|
||||
```ts
|
||||
import { createAuthClient } from "better-auth/client";
|
||||
import { passkeyClient } from "better-auth/client/plugins";
|
||||
|
||||
export const authClient = createAuthClient({
|
||||
plugins: [passkeyClient()]
|
||||
});
|
||||
```
|
||||
|
||||
### Register Passkey
|
||||
|
||||
```ts
|
||||
// User must be authenticated first
|
||||
await authClient.passkey.register({
|
||||
name: "My Laptop" // Optional: name for this passkey
|
||||
});
|
||||
```
|
||||
|
||||
### Sign In with Passkey
|
||||
|
||||
```ts
|
||||
await authClient.passkey.signIn();
|
||||
```
|
||||
|
||||
### List User Passkeys
|
||||
|
||||
```ts
|
||||
const { data } = await authClient.passkey.list();
|
||||
// data contains array of registered passkeys
|
||||
```
|
||||
|
||||
### Delete Passkey
|
||||
|
||||
```ts
|
||||
await authClient.passkey.delete({
|
||||
id: "passkey-id"
|
||||
});
|
||||
```
|
||||
|
||||
## Magic Link
|
||||
|
||||
### Server Setup
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { magicLink } from "better-auth/plugins";
|
||||
|
||||
export const auth = betterAuth({
|
||||
plugins: [
|
||||
magicLink({
|
||||
sendMagicLink: async ({ email, url, token }) => {
|
||||
await sendEmail({
|
||||
to: email,
|
||||
subject: "Sign in to YourApp",
|
||||
html: `Click <a href="${url}">here</a> to sign in.`
|
||||
});
|
||||
},
|
||||
expiresIn: 300, // Link expires in 5 minutes (seconds)
|
||||
})
|
||||
]
|
||||
});
|
||||
```
|
||||
|
||||
### Client Setup
|
||||
|
||||
```ts
|
||||
import { createAuthClient } from "better-auth/client";
|
||||
import { magicLinkClient } from "better-auth/client/plugins";
|
||||
|
||||
export const authClient = createAuthClient({
|
||||
plugins: [magicLinkClient()]
|
||||
});
|
||||
```
|
||||
|
||||
### Send Magic Link
|
||||
|
||||
```ts
|
||||
await authClient.magicLink.sendMagicLink({
|
||||
email: "user@example.com",
|
||||
callbackURL: "/dashboard"
|
||||
});
|
||||
```
|
||||
|
||||
### Verify Magic Link
|
||||
|
||||
```ts
|
||||
// Called automatically when user clicks link
|
||||
// Token in URL query params handled by Better Auth
|
||||
await authClient.magicLink.verify({
|
||||
token: "token-from-url"
|
||||
});
|
||||
```
|
||||
|
||||
## Organizations (Multi-Tenancy)
|
||||
|
||||
### Server Setup
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { organization } from "better-auth/plugins";
|
||||
|
||||
export const auth = betterAuth({
|
||||
plugins: [
|
||||
organization({
|
||||
allowUserToCreateOrganization: true,
|
||||
organizationLimit: 5, // Max orgs per user
|
||||
creatorRole: "owner" // Role for org creator
|
||||
})
|
||||
]
|
||||
});
|
||||
```
|
||||
|
||||
### Client Setup
|
||||
|
||||
```ts
|
||||
import { createAuthClient } from "better-auth/client";
|
||||
import { organizationClient } from "better-auth/client/plugins";
|
||||
|
||||
export const authClient = createAuthClient({
|
||||
plugins: [organizationClient()]
|
||||
});
|
||||
```
|
||||
|
||||
### Create Organization
|
||||
|
||||
```ts
|
||||
await authClient.organization.create({
|
||||
name: "Acme Corp",
|
||||
slug: "acme", // Unique slug
|
||||
metadata: {
|
||||
industry: "Technology"
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Invite Members
|
||||
|
||||
```ts
|
||||
await authClient.organization.inviteMember({
|
||||
organizationId: "org-id",
|
||||
email: "user@example.com",
|
||||
role: "member", // owner, admin, member
|
||||
message: "Join our team!" // Optional
|
||||
});
|
||||
```
|
||||
|
||||
### Accept Invitation
|
||||
|
||||
```ts
|
||||
await authClient.organization.acceptInvitation({
|
||||
invitationId: "invitation-id"
|
||||
});
|
||||
```
|
||||
|
||||
### List Organizations
|
||||
|
||||
```ts
|
||||
const { data } = await authClient.organization.list();
|
||||
// Returns user's organizations
|
||||
```
|
||||
|
||||
### Update Member Role
|
||||
|
||||
```ts
|
||||
await authClient.organization.updateMemberRole({
|
||||
organizationId: "org-id",
|
||||
userId: "user-id",
|
||||
role: "admin"
|
||||
});
|
||||
```
|
||||
|
||||
### Remove Member
|
||||
|
||||
```ts
|
||||
await authClient.organization.removeMember({
|
||||
organizationId: "org-id",
|
||||
userId: "user-id"
|
||||
});
|
||||
```
|
||||
|
||||
### Delete Organization
|
||||
|
||||
```ts
|
||||
await authClient.organization.delete({
|
||||
organizationId: "org-id"
|
||||
});
|
||||
```
|
||||
|
||||
## Session Management
|
||||
|
||||
### Configure Session Expiration
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
session: {
|
||||
expiresIn: 60 * 60 * 24 * 7, // 7 days (seconds)
|
||||
updateAge: 60 * 60 * 24, // Update session every 24 hours
|
||||
cookieCache: {
|
||||
enabled: true,
|
||||
maxAge: 5 * 60 // Cache for 5 minutes
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Server-Side Session
|
||||
|
||||
```ts
|
||||
// Next.js
|
||||
import { auth } from "@/lib/auth";
|
||||
import { headers } from "next/headers";
|
||||
|
||||
const session = await auth.api.getSession({
|
||||
headers: await headers()
|
||||
});
|
||||
|
||||
if (!session) {
|
||||
// Not authenticated
|
||||
}
|
||||
```
|
||||
|
||||
### Client-Side Session
|
||||
|
||||
```tsx
|
||||
// React
|
||||
import { authClient } from "@/lib/auth-client";
|
||||
|
||||
function UserProfile() {
|
||||
const { data: session, isPending, error } = authClient.useSession();
|
||||
|
||||
if (isPending) return <div>Loading...</div>;
|
||||
if (error) return <div>Error</div>;
|
||||
if (!session) return <div>Not logged in</div>;
|
||||
|
||||
return <div>Hello, {session.user.name}!</div>;
|
||||
}
|
||||
```
|
||||
|
||||
### List Active Sessions
|
||||
|
||||
```ts
|
||||
const { data: sessions } = await authClient.listSessions();
|
||||
// Returns all active sessions for current user
|
||||
```
|
||||
|
||||
### Revoke Session
|
||||
|
||||
```ts
|
||||
await authClient.revokeSession({
|
||||
sessionId: "session-id"
|
||||
});
|
||||
```
|
||||
|
||||
### Revoke All Sessions
|
||||
|
||||
```ts
|
||||
await authClient.revokeAllSessions();
|
||||
```
|
||||
|
||||
## Rate Limiting
|
||||
|
||||
### Server Configuration
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
rateLimit: {
|
||||
enabled: true,
|
||||
window: 60, // Time window in seconds
|
||||
max: 10, // Max requests per window
|
||||
storage: "memory", // "memory" or "database"
|
||||
customRules: {
|
||||
"/api/auth/sign-in": {
|
||||
window: 60,
|
||||
max: 5 // Stricter limit for sign-in
|
||||
},
|
||||
"/api/auth/sign-up": {
|
||||
window: 3600,
|
||||
max: 3 // 3 signups per hour
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Custom Rate Limiter
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
|
||||
export const auth = betterAuth({
|
||||
rateLimit: {
|
||||
enabled: true,
|
||||
customLimiter: async ({ request, limit }) => {
|
||||
// Custom rate limiting logic
|
||||
const ip = request.headers.get("x-forwarded-for");
|
||||
const key = `ratelimit:${ip}`;
|
||||
|
||||
// Use Redis, etc.
|
||||
const count = await redis.incr(key);
|
||||
if (count === 1) {
|
||||
await redis.expire(key, limit.window);
|
||||
}
|
||||
|
||||
if (count > limit.max) {
|
||||
throw new Error("Rate limit exceeded");
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## Anonymous Sessions
|
||||
|
||||
Track users before they sign up.
|
||||
|
||||
### Server Setup
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { anonymous } from "better-auth/plugins";
|
||||
|
||||
export const auth = betterAuth({
|
||||
plugins: [anonymous()]
|
||||
});
|
||||
```
|
||||
|
||||
### Client Usage
|
||||
|
||||
```ts
|
||||
// Create anonymous session
|
||||
const { data } = await authClient.signIn.anonymous();
|
||||
|
||||
// Convert to full account
|
||||
await authClient.signUp.email({
|
||||
email: "user@example.com",
|
||||
password: "password123",
|
||||
linkAnonymousSession: true // Link anonymous data
|
||||
});
|
||||
```
|
||||
|
||||
## Email OTP
|
||||
|
||||
One-time password via email (passwordless).
|
||||
|
||||
### Server Setup
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { emailOTP } from "better-auth/plugins";
|
||||
|
||||
export const auth = betterAuth({
|
||||
plugins: [
|
||||
emailOTP({
|
||||
sendVerificationOTP: async ({ email, otp }) => {
|
||||
await sendEmail({
|
||||
to: email,
|
||||
subject: "Your verification code",
|
||||
text: `Your code is: ${otp}`
|
||||
});
|
||||
},
|
||||
expiresIn: 300, // 5 minutes
|
||||
length: 6 // OTP length
|
||||
})
|
||||
]
|
||||
});
|
||||
```
|
||||
|
||||
### Client Usage
|
||||
|
||||
```ts
|
||||
// Send OTP to email
|
||||
await authClient.emailOTP.sendOTP({
|
||||
email: "user@example.com"
|
||||
});
|
||||
|
||||
// Verify OTP
|
||||
await authClient.emailOTP.verifyOTP({
|
||||
email: "user@example.com",
|
||||
otp: "123456"
|
||||
});
|
||||
```
|
||||
|
||||
## Phone Number Authentication
|
||||
|
||||
Requires phone number plugin.
|
||||
|
||||
### Server Setup
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { phoneNumber } from "better-auth/plugins";
|
||||
|
||||
export const auth = betterAuth({
|
||||
plugins: [
|
||||
phoneNumber({
|
||||
sendOTP: async ({ phoneNumber, otp }) => {
|
||||
// Use Twilio, AWS SNS, etc.
|
||||
await sendSMS(phoneNumber, `Your code: ${otp}`);
|
||||
}
|
||||
})
|
||||
]
|
||||
});
|
||||
```
|
||||
|
||||
### Client Usage
|
||||
|
||||
```ts
|
||||
// Sign up with phone
|
||||
await authClient.signUp.phoneNumber({
|
||||
phoneNumber: "+1234567890",
|
||||
password: "password123"
|
||||
});
|
||||
|
||||
// Send OTP
|
||||
await authClient.phoneNumber.sendOTP({
|
||||
phoneNumber: "+1234567890"
|
||||
});
|
||||
|
||||
// Verify OTP
|
||||
await authClient.phoneNumber.verifyOTP({
|
||||
phoneNumber: "+1234567890",
|
||||
otp: "123456"
|
||||
});
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **2FA**: Offer 2FA as optional, make mandatory for admin users
|
||||
2. **Passkeys**: Implement as progressive enhancement (fallback to password)
|
||||
3. **Magic Links**: Set short expiration (5-15 minutes)
|
||||
4. **Organizations**: Implement RBAC for org permissions
|
||||
5. **Sessions**: Use short expiration for sensitive apps
|
||||
6. **Rate Limiting**: Enable in production, adjust limits based on usage
|
||||
7. **Anonymous Sessions**: Clean up old anonymous sessions periodically
|
||||
8. **Backup Codes**: Force users to save backup codes before enabling 2FA
|
||||
9. **Multi-Device**: Allow users to manage trusted devices
|
||||
10. **Audit Logs**: Track sensitive operations (role changes, 2FA changes)
|
||||
|
||||
## Regenerate Schema After Plugins
|
||||
|
||||
After adding any plugin:
|
||||
|
||||
```bash
|
||||
npx @better-auth/cli generate
|
||||
npx @better-auth/cli migrate # if using Kysely
|
||||
```
|
||||
|
||||
Or manually apply migrations for your ORM (Drizzle, Prisma).
|
||||
577
.opencode/skills/better-auth/references/database-integration.md
Normal file
577
.opencode/skills/better-auth/references/database-integration.md
Normal file
@@ -0,0 +1,577 @@
|
||||
# Database Integration
|
||||
|
||||
Better Auth supports multiple databases and ORMs for flexible data persistence.
|
||||
|
||||
## Supported Databases
|
||||
|
||||
- SQLite
|
||||
- PostgreSQL
|
||||
- MySQL/MariaDB
|
||||
- MongoDB
|
||||
- Any database with adapter support
|
||||
|
||||
## Direct Database Connection
|
||||
|
||||
### SQLite
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import Database from "better-sqlite3";
|
||||
|
||||
export const auth = betterAuth({
|
||||
database: new Database("./sqlite.db"),
|
||||
// or
|
||||
database: new Database(":memory:") // In-memory for testing
|
||||
});
|
||||
```
|
||||
|
||||
### PostgreSQL
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { Pool } from "pg";
|
||||
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
// or explicit config
|
||||
host: "localhost",
|
||||
port: 5432,
|
||||
user: "postgres",
|
||||
password: "password",
|
||||
database: "myapp"
|
||||
});
|
||||
|
||||
export const auth = betterAuth({
|
||||
database: pool
|
||||
});
|
||||
```
|
||||
|
||||
### MySQL
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { createPool } from "mysql2/promise";
|
||||
|
||||
const pool = createPool({
|
||||
host: "localhost",
|
||||
user: "root",
|
||||
password: "password",
|
||||
database: "myapp",
|
||||
waitForConnections: true,
|
||||
connectionLimit: 10
|
||||
});
|
||||
|
||||
export const auth = betterAuth({
|
||||
database: pool
|
||||
});
|
||||
```
|
||||
|
||||
## ORM Adapters
|
||||
|
||||
### Drizzle ORM
|
||||
|
||||
**Install:**
|
||||
```bash
|
||||
npm install drizzle-orm better-auth
|
||||
```
|
||||
|
||||
**Setup:**
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { drizzleAdapter } from "better-auth/adapters/drizzle";
|
||||
import { drizzle } from "drizzle-orm/node-postgres";
|
||||
import { Pool } from "pg";
|
||||
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL
|
||||
});
|
||||
|
||||
const db = drizzle(pool);
|
||||
|
||||
export const auth = betterAuth({
|
||||
database: drizzleAdapter(db, {
|
||||
provider: "pg", // "pg" | "mysql" | "sqlite"
|
||||
schema: {
|
||||
// Optional: custom table names
|
||||
user: "users",
|
||||
session: "sessions",
|
||||
account: "accounts",
|
||||
verification: "verifications"
|
||||
}
|
||||
})
|
||||
});
|
||||
```
|
||||
|
||||
**Generate Schema:**
|
||||
```bash
|
||||
npx @better-auth/cli generate --adapter drizzle
|
||||
```
|
||||
|
||||
### Prisma
|
||||
|
||||
**Install:**
|
||||
```bash
|
||||
npm install @prisma/client better-auth
|
||||
```
|
||||
|
||||
**Setup:**
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { prismaAdapter } from "better-auth/adapters/prisma";
|
||||
import { PrismaClient } from "@prisma/client";
|
||||
|
||||
const prisma = new PrismaClient();
|
||||
|
||||
export const auth = betterAuth({
|
||||
database: prismaAdapter(prisma, {
|
||||
provider: "postgresql", // "postgresql" | "mysql" | "sqlite"
|
||||
})
|
||||
});
|
||||
```
|
||||
|
||||
**Generate Schema:**
|
||||
```bash
|
||||
npx @better-auth/cli generate --adapter prisma
|
||||
```
|
||||
|
||||
**Apply to Prisma:**
|
||||
```bash
|
||||
# Add generated schema to schema.prisma
|
||||
npx prisma migrate dev --name init
|
||||
npx prisma generate
|
||||
```
|
||||
|
||||
### Kysely
|
||||
|
||||
**Install:**
|
||||
```bash
|
||||
npm install kysely better-auth
|
||||
```
|
||||
|
||||
**Setup:**
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { kyselyAdapter } from "better-auth/adapters/kysely";
|
||||
import { Kysely, PostgresDialect } from "kysely";
|
||||
import { Pool } from "pg";
|
||||
|
||||
const db = new Kysely({
|
||||
dialect: new PostgresDialect({
|
||||
pool: new Pool({
|
||||
connectionString: process.env.DATABASE_URL
|
||||
})
|
||||
})
|
||||
});
|
||||
|
||||
export const auth = betterAuth({
|
||||
database: kyselyAdapter(db, {
|
||||
provider: "pg"
|
||||
})
|
||||
});
|
||||
```
|
||||
|
||||
**Auto-migrate with Kysely:**
|
||||
```bash
|
||||
npx @better-auth/cli migrate --adapter kysely
|
||||
```
|
||||
|
||||
### MongoDB
|
||||
|
||||
**Install:**
|
||||
```bash
|
||||
npm install mongodb better-auth
|
||||
```
|
||||
|
||||
**Setup:**
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { mongodbAdapter } from "better-auth/adapters/mongodb";
|
||||
import { MongoClient } from "mongodb";
|
||||
|
||||
const client = new MongoClient(process.env.MONGODB_URI!);
|
||||
await client.connect();
|
||||
|
||||
export const auth = betterAuth({
|
||||
database: mongodbAdapter(client, {
|
||||
databaseName: "myapp"
|
||||
})
|
||||
});
|
||||
```
|
||||
|
||||
**Generate Collections:**
|
||||
```bash
|
||||
npx @better-auth/cli generate --adapter mongodb
|
||||
```
|
||||
|
||||
## Core Database Schema
|
||||
|
||||
Better Auth requires these core tables/collections:
|
||||
|
||||
### User Table
|
||||
|
||||
```sql
|
||||
CREATE TABLE user (
|
||||
id TEXT PRIMARY KEY,
|
||||
email TEXT UNIQUE NOT NULL,
|
||||
emailVerified BOOLEAN DEFAULT FALSE,
|
||||
name TEXT,
|
||||
image TEXT,
|
||||
createdAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updatedAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
```
|
||||
|
||||
### Session Table
|
||||
|
||||
```sql
|
||||
CREATE TABLE session (
|
||||
id TEXT PRIMARY KEY,
|
||||
userId TEXT NOT NULL,
|
||||
expiresAt TIMESTAMP NOT NULL,
|
||||
ipAddress TEXT,
|
||||
userAgent TEXT,
|
||||
createdAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updatedAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (userId) REFERENCES user(id) ON DELETE CASCADE
|
||||
);
|
||||
```
|
||||
|
||||
### Account Table
|
||||
|
||||
```sql
|
||||
CREATE TABLE account (
|
||||
id TEXT PRIMARY KEY,
|
||||
userId TEXT NOT NULL,
|
||||
accountId TEXT NOT NULL,
|
||||
providerId TEXT NOT NULL,
|
||||
accessToken TEXT,
|
||||
refreshToken TEXT,
|
||||
expiresAt TIMESTAMP,
|
||||
scope TEXT,
|
||||
createdAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updatedAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (userId) REFERENCES user(id) ON DELETE CASCADE,
|
||||
UNIQUE(providerId, accountId)
|
||||
);
|
||||
```
|
||||
|
||||
### Verification Table
|
||||
|
||||
```sql
|
||||
CREATE TABLE verification (
|
||||
id TEXT PRIMARY KEY,
|
||||
identifier TEXT NOT NULL,
|
||||
value TEXT NOT NULL,
|
||||
expiresAt TIMESTAMP NOT NULL,
|
||||
createdAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updatedAt TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
```
|
||||
|
||||
## Schema Generation
|
||||
|
||||
### Using CLI
|
||||
|
||||
```bash
|
||||
# Generate schema files
|
||||
npx @better-auth/cli generate
|
||||
|
||||
# Specify adapter
|
||||
npx @better-auth/cli generate --adapter drizzle
|
||||
npx @better-auth/cli generate --adapter prisma
|
||||
|
||||
# Specify output
|
||||
npx @better-auth/cli generate --output ./db/schema.ts
|
||||
```
|
||||
|
||||
### Auto-migrate (Kysely only)
|
||||
|
||||
```bash
|
||||
npx @better-auth/cli migrate
|
||||
```
|
||||
|
||||
For other ORMs, apply generated schema manually.
|
||||
|
||||
## Custom Fields
|
||||
|
||||
Add custom fields to user table:
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
user: {
|
||||
additionalFields: {
|
||||
role: {
|
||||
type: "string",
|
||||
required: false,
|
||||
defaultValue: "user"
|
||||
},
|
||||
phoneNumber: {
|
||||
type: "string",
|
||||
required: false
|
||||
},
|
||||
subscriptionTier: {
|
||||
type: "string",
|
||||
required: false
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
After adding fields:
|
||||
```bash
|
||||
npx @better-auth/cli generate
|
||||
```
|
||||
|
||||
Update user with custom fields:
|
||||
```ts
|
||||
await authClient.updateUser({
|
||||
role: "admin",
|
||||
phoneNumber: "+1234567890"
|
||||
});
|
||||
```
|
||||
|
||||
## Plugin Schema Extensions
|
||||
|
||||
Plugins add their own tables/fields. Regenerate schema after adding plugins:
|
||||
|
||||
```bash
|
||||
npx @better-auth/cli generate
|
||||
```
|
||||
|
||||
### Two-Factor Plugin Tables
|
||||
|
||||
- `twoFactor`: Stores TOTP secrets, backup codes
|
||||
|
||||
### Passkey Plugin Tables
|
||||
|
||||
- `passkey`: Stores WebAuthn credentials
|
||||
|
||||
### Organization Plugin Tables
|
||||
|
||||
- `organization`: Organization data
|
||||
- `member`: Organization members
|
||||
- `invitation`: Pending invitations
|
||||
|
||||
## Migration Strategies
|
||||
|
||||
### Development
|
||||
|
||||
```bash
|
||||
# Generate schema
|
||||
npx @better-auth/cli generate
|
||||
|
||||
# Apply migrations (Kysely)
|
||||
npx @better-auth/cli migrate
|
||||
|
||||
# Or manual (Prisma)
|
||||
npx prisma migrate dev
|
||||
|
||||
# Or manual (Drizzle)
|
||||
npx drizzle-kit push
|
||||
```
|
||||
|
||||
### Production
|
||||
|
||||
```bash
|
||||
# Review generated migration
|
||||
npx @better-auth/cli generate
|
||||
|
||||
# Test in staging
|
||||
# Apply to production with your ORM's migration tool
|
||||
|
||||
# Prisma
|
||||
npx prisma migrate deploy
|
||||
|
||||
# Drizzle
|
||||
npx drizzle-kit push
|
||||
|
||||
# Kysely
|
||||
npx @better-auth/cli migrate
|
||||
```
|
||||
|
||||
## Connection Pooling
|
||||
|
||||
### PostgreSQL
|
||||
|
||||
```ts
|
||||
import { Pool } from "pg";
|
||||
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
max: 20, // Max connections
|
||||
idleTimeoutMillis: 30000,
|
||||
connectionTimeoutMillis: 2000,
|
||||
});
|
||||
```
|
||||
|
||||
### MySQL
|
||||
|
||||
```ts
|
||||
import { createPool } from "mysql2/promise";
|
||||
|
||||
const pool = createPool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
waitForConnections: true,
|
||||
connectionLimit: 10,
|
||||
queueLimit: 0
|
||||
});
|
||||
```
|
||||
|
||||
## Database URLs
|
||||
|
||||
### PostgreSQL
|
||||
|
||||
```env
|
||||
DATABASE_URL=postgresql://user:password@localhost:5432/dbname
|
||||
# Or with connection params
|
||||
DATABASE_URL=postgresql://user:password@localhost:5432/dbname?schema=public&connection_limit=10
|
||||
```
|
||||
|
||||
### MySQL
|
||||
|
||||
```env
|
||||
DATABASE_URL=mysql://user:password@localhost:3306/dbname
|
||||
```
|
||||
|
||||
### SQLite
|
||||
|
||||
```env
|
||||
DATABASE_URL=file:./dev.db
|
||||
# Or in-memory
|
||||
DATABASE_URL=:memory:
|
||||
```
|
||||
|
||||
### MongoDB
|
||||
|
||||
```env
|
||||
MONGODB_URI=mongodb://localhost:27017/dbname
|
||||
# Or Atlas
|
||||
MONGODB_URI=mongodb+srv://user:password@cluster.mongodb.net/dbname
|
||||
```
|
||||
|
||||
## Performance Optimization
|
||||
|
||||
### Indexes
|
||||
|
||||
Better Auth CLI auto-generates essential indexes:
|
||||
- `user.email` (unique)
|
||||
- `session.userId`
|
||||
- `account.userId`
|
||||
- `account.providerId, accountId` (unique)
|
||||
|
||||
Add custom indexes for performance:
|
||||
```sql
|
||||
CREATE INDEX idx_session_expires ON session(expiresAt);
|
||||
CREATE INDEX idx_user_created ON user(createdAt);
|
||||
```
|
||||
|
||||
### Query Optimization
|
||||
|
||||
```ts
|
||||
// Use connection pooling
|
||||
// Enable query caching where applicable
|
||||
// Monitor slow queries
|
||||
|
||||
export const auth = betterAuth({
|
||||
advanced: {
|
||||
defaultCookieAttributes: {
|
||||
sameSite: "lax",
|
||||
secure: true,
|
||||
httpOnly: true
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## Backup Strategies
|
||||
|
||||
### PostgreSQL
|
||||
|
||||
```bash
|
||||
# Backup
|
||||
pg_dump dbname > backup.sql
|
||||
|
||||
# Restore
|
||||
psql dbname < backup.sql
|
||||
```
|
||||
|
||||
### MySQL
|
||||
|
||||
```bash
|
||||
# Backup
|
||||
mysqldump -u root -p dbname > backup.sql
|
||||
|
||||
# Restore
|
||||
mysql -u root -p dbname < backup.sql
|
||||
```
|
||||
|
||||
### SQLite
|
||||
|
||||
```bash
|
||||
# Copy file
|
||||
cp dev.db dev.db.backup
|
||||
|
||||
# Or use backup command
|
||||
sqlite3 dev.db ".backup backup.db"
|
||||
```
|
||||
|
||||
### MongoDB
|
||||
|
||||
```bash
|
||||
# Backup
|
||||
mongodump --db=dbname --out=./backup
|
||||
|
||||
# Restore
|
||||
mongorestore --db=dbname ./backup/dbname
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Environment Variables**: Store credentials in env vars, never commit
|
||||
2. **Connection Pooling**: Use pools for PostgreSQL/MySQL in production
|
||||
3. **Migrations**: Use ORM migration tools, not raw SQL in production
|
||||
4. **Indexes**: Add indexes for frequently queried fields
|
||||
5. **Backups**: Automate daily backups in production
|
||||
6. **SSL**: Use SSL/TLS for database connections in production
|
||||
7. **Schema Sync**: Keep schema in sync across environments
|
||||
8. **Testing**: Use separate database for tests (in-memory SQLite ideal)
|
||||
9. **Monitoring**: Monitor query performance and connection pool usage
|
||||
10. **Cleanup**: Periodically clean expired sessions/verifications
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Connection Errors
|
||||
|
||||
```ts
|
||||
// Add connection timeout
|
||||
const pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
connectionTimeoutMillis: 5000
|
||||
});
|
||||
```
|
||||
|
||||
### Schema Mismatch
|
||||
|
||||
```bash
|
||||
# Regenerate schema
|
||||
npx @better-auth/cli generate
|
||||
|
||||
# Apply migrations
|
||||
# For Prisma: npx prisma migrate dev
|
||||
# For Drizzle: npx drizzle-kit push
|
||||
```
|
||||
|
||||
### Migration Failures
|
||||
|
||||
- Check database credentials
|
||||
- Verify database server is running
|
||||
- Check for schema conflicts
|
||||
- Review migration SQL manually
|
||||
|
||||
### Performance Issues
|
||||
|
||||
- Add indexes on foreign keys
|
||||
- Enable connection pooling
|
||||
- Monitor slow queries
|
||||
- Consider read replicas for heavy read workloads
|
||||
416
.opencode/skills/better-auth/references/email-password-auth.md
Normal file
416
.opencode/skills/better-auth/references/email-password-auth.md
Normal file
@@ -0,0 +1,416 @@
|
||||
# Email/Password Authentication
|
||||
|
||||
Email/password is built-in auth method in Better Auth. No plugins required for basic functionality.
|
||||
|
||||
## Server Configuration
|
||||
|
||||
### Basic Setup
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
|
||||
export const auth = betterAuth({
|
||||
emailAndPassword: {
|
||||
enabled: true,
|
||||
autoSignIn: true, // Auto sign-in after signup (default: true)
|
||||
requireEmailVerification: false, // Require email verification before login
|
||||
sendResetPasswordToken: async ({ user, url }) => {
|
||||
// Send password reset email
|
||||
await sendEmail(user.email, url);
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Custom Password Requirements
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
emailAndPassword: {
|
||||
enabled: true,
|
||||
password: {
|
||||
minLength: 8,
|
||||
requireUppercase: true,
|
||||
requireLowercase: true,
|
||||
requireNumbers: true,
|
||||
requireSpecialChars: true
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## Client Usage
|
||||
|
||||
### Sign Up
|
||||
|
||||
```ts
|
||||
import { authClient } from "@/lib/auth-client";
|
||||
|
||||
const { data, error } = await authClient.signUp.email({
|
||||
email: "user@example.com",
|
||||
password: "securePassword123",
|
||||
name: "John Doe",
|
||||
image: "https://example.com/avatar.jpg", // optional
|
||||
callbackURL: "/dashboard" // optional
|
||||
}, {
|
||||
onSuccess: (ctx) => {
|
||||
// ctx.data contains user and session
|
||||
console.log("User created:", ctx.data.user);
|
||||
},
|
||||
onError: (ctx) => {
|
||||
alert(ctx.error.message);
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Sign In
|
||||
|
||||
```ts
|
||||
const { data, error } = await authClient.signIn.email({
|
||||
email: "user@example.com",
|
||||
password: "securePassword123",
|
||||
callbackURL: "/dashboard",
|
||||
rememberMe: true // default: true
|
||||
}, {
|
||||
onSuccess: () => {
|
||||
// redirect or update UI
|
||||
},
|
||||
onError: (ctx) => {
|
||||
console.error(ctx.error.message);
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Sign Out
|
||||
|
||||
```ts
|
||||
await authClient.signOut({
|
||||
fetchOptions: {
|
||||
onSuccess: () => {
|
||||
router.push("/login");
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## Email Verification
|
||||
|
||||
### Server Setup
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
emailVerification: {
|
||||
sendVerificationEmail: async ({ user, url, token }) => {
|
||||
// Send verification email
|
||||
await sendEmail({
|
||||
to: user.email,
|
||||
subject: "Verify your email",
|
||||
html: `Click <a href="${url}">here</a> to verify your email.`
|
||||
});
|
||||
},
|
||||
sendOnSignUp: true, // Send verification email on signup
|
||||
autoSignInAfterVerification: true // Auto sign-in after verification
|
||||
},
|
||||
emailAndPassword: {
|
||||
enabled: true,
|
||||
requireEmailVerification: true // Require verification before login
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Client Usage
|
||||
|
||||
```ts
|
||||
// Send verification email
|
||||
await authClient.sendVerificationEmail({
|
||||
email: "user@example.com",
|
||||
callbackURL: "/verify-success"
|
||||
});
|
||||
|
||||
// Verify email with token
|
||||
await authClient.verifyEmail({
|
||||
token: "verification-token-from-email"
|
||||
});
|
||||
```
|
||||
|
||||
## Password Reset Flow
|
||||
|
||||
### Server Setup
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
emailAndPassword: {
|
||||
enabled: true,
|
||||
sendResetPasswordToken: async ({ user, url, token }) => {
|
||||
await sendEmail({
|
||||
to: user.email,
|
||||
subject: "Reset your password",
|
||||
html: `Click <a href="${url}">here</a> to reset your password.`
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Client Flow
|
||||
|
||||
```ts
|
||||
// Step 1: Request password reset
|
||||
await authClient.forgetPassword({
|
||||
email: "user@example.com",
|
||||
redirectTo: "/reset-password"
|
||||
});
|
||||
|
||||
// Step 2: Reset password with token
|
||||
await authClient.resetPassword({
|
||||
token: "reset-token-from-email",
|
||||
password: "newSecurePassword123"
|
||||
});
|
||||
```
|
||||
|
||||
### Change Password (Authenticated)
|
||||
|
||||
```ts
|
||||
await authClient.changePassword({
|
||||
currentPassword: "oldPassword123",
|
||||
newPassword: "newPassword456",
|
||||
revokeOtherSessions: true // Optional: logout other sessions
|
||||
});
|
||||
```
|
||||
|
||||
## Username Authentication
|
||||
|
||||
Requires `username` plugin for username-based auth.
|
||||
|
||||
### Server Setup
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
import { username } from "better-auth/plugins";
|
||||
|
||||
export const auth = betterAuth({
|
||||
plugins: [
|
||||
username({
|
||||
// Allow sign in with username or email
|
||||
allowUsernameOrEmail: true
|
||||
})
|
||||
]
|
||||
});
|
||||
```
|
||||
|
||||
### Client Setup
|
||||
|
||||
```ts
|
||||
import { createAuthClient } from "better-auth/client";
|
||||
import { usernameClient } from "better-auth/client/plugins";
|
||||
|
||||
export const authClient = createAuthClient({
|
||||
plugins: [usernameClient()]
|
||||
});
|
||||
```
|
||||
|
||||
### Client Usage
|
||||
|
||||
```ts
|
||||
// Sign up with username
|
||||
await authClient.signUp.username({
|
||||
username: "johndoe",
|
||||
password: "securePassword123",
|
||||
email: "john@example.com", // optional
|
||||
name: "John Doe"
|
||||
});
|
||||
|
||||
// Sign in with username
|
||||
await authClient.signIn.username({
|
||||
username: "johndoe",
|
||||
password: "securePassword123"
|
||||
});
|
||||
|
||||
// Sign in with username or email (if allowUsernameOrEmail: true)
|
||||
await authClient.signIn.username({
|
||||
username: "johndoe", // or "john@example.com"
|
||||
password: "securePassword123"
|
||||
});
|
||||
```
|
||||
|
||||
## Framework Setup
|
||||
|
||||
### Next.js (App Router)
|
||||
|
||||
```ts
|
||||
// app/api/auth/[...all]/route.ts
|
||||
import { auth } from "@/lib/auth";
|
||||
import { toNextJsHandler } from "better-auth/next-js";
|
||||
|
||||
export const { POST, GET } = toNextJsHandler(auth);
|
||||
```
|
||||
|
||||
### Next.js (Pages Router)
|
||||
|
||||
```ts
|
||||
// pages/api/auth/[...all].ts
|
||||
import { auth } from "@/lib/auth";
|
||||
import { toNextJsHandler } from "better-auth/next-js";
|
||||
|
||||
export default toNextJsHandler(auth);
|
||||
```
|
||||
|
||||
### Nuxt
|
||||
|
||||
```ts
|
||||
// server/api/auth/[...all].ts
|
||||
import { auth } from "~/utils/auth";
|
||||
import { toWebRequest } from "better-auth/utils/web";
|
||||
|
||||
export default defineEventHandler((event) => {
|
||||
return auth.handler(toWebRequest(event));
|
||||
});
|
||||
```
|
||||
|
||||
### SvelteKit
|
||||
|
||||
```ts
|
||||
// hooks.server.ts
|
||||
import { auth } from "$lib/auth";
|
||||
import { svelteKitHandler } from "better-auth/svelte-kit";
|
||||
|
||||
export async function handle({ event, resolve }) {
|
||||
return svelteKitHandler({ event, resolve, auth });
|
||||
}
|
||||
```
|
||||
|
||||
### Astro
|
||||
|
||||
```ts
|
||||
// pages/api/auth/[...all].ts
|
||||
import { auth } from "@/lib/auth";
|
||||
|
||||
export async function ALL({ request }: { request: Request }) {
|
||||
return auth.handler(request);
|
||||
}
|
||||
```
|
||||
|
||||
### Hono
|
||||
|
||||
```ts
|
||||
import { Hono } from "hono";
|
||||
import { auth } from "./auth";
|
||||
|
||||
const app = new Hono();
|
||||
|
||||
app.on(["POST", "GET"], "/api/auth/*", (c) => {
|
||||
return auth.handler(c.req.raw);
|
||||
});
|
||||
```
|
||||
|
||||
### Express
|
||||
|
||||
```ts
|
||||
import express from "express";
|
||||
import { toNodeHandler } from "better-auth/node";
|
||||
import { auth } from "./auth";
|
||||
|
||||
const app = express();
|
||||
|
||||
app.all("/api/auth/*", toNodeHandler(auth));
|
||||
```
|
||||
|
||||
## Protected Routes
|
||||
|
||||
### Next.js Middleware
|
||||
|
||||
```ts
|
||||
// middleware.ts
|
||||
import { auth } from "@/lib/auth";
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
|
||||
export async function middleware(request: NextRequest) {
|
||||
const session = await auth.api.getSession({
|
||||
headers: request.headers
|
||||
});
|
||||
|
||||
if (!session) {
|
||||
return NextResponse.redirect(new URL("/login", request.url));
|
||||
}
|
||||
|
||||
return NextResponse.next();
|
||||
}
|
||||
|
||||
export const config = {
|
||||
matcher: ["/dashboard/:path*", "/profile/:path*"]
|
||||
};
|
||||
```
|
||||
|
||||
### SvelteKit Hooks
|
||||
|
||||
```ts
|
||||
// hooks.server.ts
|
||||
import { auth } from "$lib/auth";
|
||||
import { redirect } from "@sveltejs/kit";
|
||||
|
||||
export async function handle({ event, resolve }) {
|
||||
const session = await auth.api.getSession({
|
||||
headers: event.request.headers
|
||||
});
|
||||
|
||||
if (event.url.pathname.startsWith("/dashboard") && !session) {
|
||||
throw redirect(303, "/login");
|
||||
}
|
||||
|
||||
return resolve(event);
|
||||
}
|
||||
```
|
||||
|
||||
### Nuxt Middleware
|
||||
|
||||
```ts
|
||||
// middleware/auth.ts
|
||||
export default defineNuxtRouteMiddleware(async (to) => {
|
||||
const { data: session } = await useAuthSession();
|
||||
|
||||
if (!session.value && to.path.startsWith("/dashboard")) {
|
||||
return navigateTo("/login");
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## User Profile Management
|
||||
|
||||
### Get Current User
|
||||
|
||||
```ts
|
||||
const { data: session } = await authClient.getSession();
|
||||
console.log(session.user);
|
||||
```
|
||||
|
||||
### Update User Profile
|
||||
|
||||
```ts
|
||||
await authClient.updateUser({
|
||||
name: "New Name",
|
||||
image: "https://example.com/new-avatar.jpg",
|
||||
// Custom fields if defined in schema
|
||||
});
|
||||
```
|
||||
|
||||
### Delete User Account
|
||||
|
||||
```ts
|
||||
await authClient.deleteUser({
|
||||
password: "currentPassword", // Required for security
|
||||
callbackURL: "/" // Redirect after deletion
|
||||
});
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Password Security**: Enforce strong password requirements
|
||||
2. **Email Verification**: Enable for production to prevent spam
|
||||
3. **Rate Limiting**: Prevent brute force attacks (see advanced-features.md)
|
||||
4. **HTTPS**: Always use HTTPS in production
|
||||
5. **Error Messages**: Don't reveal if email exists during login
|
||||
6. **Session Security**: Use secure, httpOnly cookies
|
||||
7. **CSRF Protection**: Better Auth handles this automatically
|
||||
8. **Password Reset**: Set short expiration for reset tokens
|
||||
9. **Account Lockout**: Consider implementing after N failed attempts
|
||||
10. **Audit Logs**: Track auth events for security monitoring
|
||||
430
.opencode/skills/better-auth/references/oauth-providers.md
Normal file
430
.opencode/skills/better-auth/references/oauth-providers.md
Normal file
@@ -0,0 +1,430 @@
|
||||
# OAuth Providers
|
||||
|
||||
Better Auth provides built-in OAuth 2.0 support for social authentication. No plugins required.
|
||||
|
||||
## Supported Providers
|
||||
|
||||
GitHub, Google, Apple, Discord, Facebook, Microsoft, Twitter/X, Spotify, Twitch, LinkedIn, Dropbox, GitLab, and more.
|
||||
|
||||
## Basic OAuth Setup
|
||||
|
||||
### Server Configuration
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
|
||||
export const auth = betterAuth({
|
||||
socialProviders: {
|
||||
github: {
|
||||
clientId: process.env.GITHUB_CLIENT_ID!,
|
||||
clientSecret: process.env.GITHUB_CLIENT_SECRET!,
|
||||
// Optional: custom scopes
|
||||
scope: ["user:email", "read:user"]
|
||||
},
|
||||
google: {
|
||||
clientId: process.env.GOOGLE_CLIENT_ID!,
|
||||
clientSecret: process.env.GOOGLE_CLIENT_SECRET!,
|
||||
scope: ["openid", "email", "profile"]
|
||||
},
|
||||
discord: {
|
||||
clientId: process.env.DISCORD_CLIENT_ID!,
|
||||
clientSecret: process.env.DISCORD_CLIENT_SECRET!,
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Client Usage
|
||||
|
||||
```ts
|
||||
import { authClient } from "@/lib/auth-client";
|
||||
|
||||
// Basic sign in
|
||||
await authClient.signIn.social({
|
||||
provider: "github",
|
||||
callbackURL: "/dashboard"
|
||||
});
|
||||
|
||||
// With callbacks
|
||||
await authClient.signIn.social({
|
||||
provider: "google",
|
||||
callbackURL: "/dashboard",
|
||||
errorCallbackURL: "/error",
|
||||
newUserCallbackURL: "/welcome", // For first-time users
|
||||
});
|
||||
```
|
||||
|
||||
## Provider Configuration
|
||||
|
||||
### GitHub OAuth
|
||||
|
||||
1. Create OAuth App at https://github.com/settings/developers
|
||||
2. Set Authorization callback URL: `http://localhost:3000/api/auth/callback/github`
|
||||
3. Add credentials to `.env`:
|
||||
|
||||
```env
|
||||
GITHUB_CLIENT_ID=your_client_id
|
||||
GITHUB_CLIENT_SECRET=your_client_secret
|
||||
```
|
||||
|
||||
### Google OAuth
|
||||
|
||||
1. Create project at https://console.cloud.google.com
|
||||
2. Enable Google+ API
|
||||
3. Create OAuth 2.0 credentials
|
||||
4. Add authorized redirect URI: `http://localhost:3000/api/auth/callback/google`
|
||||
5. Add credentials to `.env`:
|
||||
|
||||
```env
|
||||
GOOGLE_CLIENT_ID=your_client_id.apps.googleusercontent.com
|
||||
GOOGLE_CLIENT_SECRET=your_client_secret
|
||||
```
|
||||
|
||||
### Discord OAuth
|
||||
|
||||
1. Create application at https://discord.com/developers/applications
|
||||
2. Add OAuth2 redirect: `http://localhost:3000/api/auth/callback/discord`
|
||||
3. Add credentials:
|
||||
|
||||
```env
|
||||
DISCORD_CLIENT_ID=your_client_id
|
||||
DISCORD_CLIENT_SECRET=your_client_secret
|
||||
```
|
||||
|
||||
### Apple Sign In
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
socialProviders: {
|
||||
apple: {
|
||||
clientId: process.env.APPLE_CLIENT_ID!,
|
||||
clientSecret: process.env.APPLE_CLIENT_SECRET!,
|
||||
teamId: process.env.APPLE_TEAM_ID!,
|
||||
keyId: process.env.APPLE_KEY_ID!,
|
||||
privateKey: process.env.APPLE_PRIVATE_KEY!
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Microsoft/Azure AD
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
socialProviders: {
|
||||
microsoft: {
|
||||
clientId: process.env.MICROSOFT_CLIENT_ID!,
|
||||
clientSecret: process.env.MICROSOFT_CLIENT_SECRET!,
|
||||
tenantId: process.env.MICROSOFT_TENANT_ID, // Optional: for specific tenant
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Twitter/X OAuth
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
socialProviders: {
|
||||
twitter: {
|
||||
clientId: process.env.TWITTER_CLIENT_ID!,
|
||||
clientSecret: process.env.TWITTER_CLIENT_SECRET!,
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## Custom OAuth Provider
|
||||
|
||||
Add custom OAuth 2.0 provider:
|
||||
|
||||
```ts
|
||||
import { betterAuth } from "better-auth";
|
||||
|
||||
export const auth = betterAuth({
|
||||
socialProviders: {
|
||||
customProvider: {
|
||||
clientId: process.env.CUSTOM_CLIENT_ID!,
|
||||
clientSecret: process.env.CUSTOM_CLIENT_SECRET!,
|
||||
authorizationUrl: "https://provider.com/oauth/authorize",
|
||||
tokenUrl: "https://provider.com/oauth/token",
|
||||
userInfoUrl: "https://provider.com/oauth/userinfo",
|
||||
scope: ["email", "profile"],
|
||||
// Map provider user data to Better Auth user
|
||||
mapProfile: (profile) => ({
|
||||
id: profile.id,
|
||||
email: profile.email,
|
||||
name: profile.name,
|
||||
image: profile.avatar_url
|
||||
})
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## Account Linking
|
||||
|
||||
Link multiple OAuth providers to same user account.
|
||||
|
||||
### Server Setup
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
account: {
|
||||
accountLinking: {
|
||||
enabled: true,
|
||||
trustedProviders: ["google", "github"] // Auto-link these providers
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Client Usage
|
||||
|
||||
```ts
|
||||
// Link new provider to existing account
|
||||
await authClient.linkSocial({
|
||||
provider: "google",
|
||||
callbackURL: "/profile"
|
||||
});
|
||||
|
||||
// List linked accounts
|
||||
const { data: session } = await authClient.getSession();
|
||||
const accounts = session.user.accounts;
|
||||
|
||||
// Unlink account
|
||||
await authClient.unlinkAccount({
|
||||
accountId: "account-id"
|
||||
});
|
||||
```
|
||||
|
||||
## Token Management
|
||||
|
||||
### Access OAuth Tokens
|
||||
|
||||
```ts
|
||||
// Server-side
|
||||
const session = await auth.api.getSession({
|
||||
headers: request.headers
|
||||
});
|
||||
|
||||
const accounts = await auth.api.listAccounts({
|
||||
userId: session.user.id
|
||||
});
|
||||
|
||||
// Get specific provider token
|
||||
const githubAccount = accounts.find(a => a.providerId === "github");
|
||||
const accessToken = githubAccount.accessToken;
|
||||
const refreshToken = githubAccount.refreshToken;
|
||||
```
|
||||
|
||||
### Refresh Tokens
|
||||
|
||||
```ts
|
||||
// Manually refresh OAuth token
|
||||
const newToken = await auth.api.refreshToken({
|
||||
accountId: "account-id"
|
||||
});
|
||||
```
|
||||
|
||||
### Use Provider API
|
||||
|
||||
```ts
|
||||
// Example: Use GitHub token to fetch repos
|
||||
const githubAccount = accounts.find(a => a.providerId === "github");
|
||||
|
||||
const response = await fetch("https://api.github.com/user/repos", {
|
||||
headers: {
|
||||
Authorization: `Bearer ${githubAccount.accessToken}`
|
||||
}
|
||||
});
|
||||
|
||||
const repos = await response.json();
|
||||
```
|
||||
|
||||
## Advanced OAuth Configuration
|
||||
|
||||
### Custom Scopes
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
socialProviders: {
|
||||
github: {
|
||||
clientId: process.env.GITHUB_CLIENT_ID!,
|
||||
clientSecret: process.env.GITHUB_CLIENT_SECRET!,
|
||||
scope: [
|
||||
"user:email",
|
||||
"read:user",
|
||||
"repo", // Access repositories
|
||||
"gist" // Access gists
|
||||
]
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### State Parameter
|
||||
|
||||
Better Auth automatically handles OAuth state parameter for CSRF protection.
|
||||
|
||||
```ts
|
||||
// Custom state validation
|
||||
export const auth = betterAuth({
|
||||
advanced: {
|
||||
generateState: async () => {
|
||||
// Custom state generation
|
||||
return crypto.randomUUID();
|
||||
},
|
||||
validateState: async (state: string) => {
|
||||
// Custom state validation
|
||||
return true;
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### PKCE Support
|
||||
|
||||
Better Auth automatically uses PKCE (Proof Key for Code Exchange) for supported providers.
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
socialProviders: {
|
||||
customProvider: {
|
||||
pkce: true, // Enable PKCE
|
||||
// ... other config
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Client-Side
|
||||
|
||||
```ts
|
||||
await authClient.signIn.social({
|
||||
provider: "github",
|
||||
errorCallbackURL: "/auth/error"
|
||||
}, {
|
||||
onError: (ctx) => {
|
||||
console.error("OAuth error:", ctx.error);
|
||||
// Handle specific errors
|
||||
if (ctx.error.code === "OAUTH_ACCOUNT_ALREADY_LINKED") {
|
||||
alert("This account is already linked to another user");
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Server-Side
|
||||
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
callbacks: {
|
||||
async onOAuthError({ error, provider }) {
|
||||
console.error(`OAuth error with ${provider}:`, error);
|
||||
// Log to monitoring service
|
||||
await logError(error);
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## Callback URLs
|
||||
|
||||
### Development
|
||||
|
||||
```
|
||||
http://localhost:3000/api/auth/callback/{provider}
|
||||
```
|
||||
|
||||
### Production
|
||||
|
||||
```
|
||||
https://yourdomain.com/api/auth/callback/{provider}
|
||||
```
|
||||
|
||||
**Important:** Add all callback URLs to OAuth provider settings.
|
||||
|
||||
## UI Components
|
||||
|
||||
### Sign In Button (React)
|
||||
|
||||
```tsx
|
||||
import { authClient } from "@/lib/auth-client";
|
||||
|
||||
export function SocialSignIn() {
|
||||
const handleOAuth = async (provider: string) => {
|
||||
await authClient.signIn.social({
|
||||
provider,
|
||||
callbackURL: "/dashboard"
|
||||
});
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="space-y-2">
|
||||
<button onClick={() => handleOAuth("github")}>
|
||||
Sign in with GitHub
|
||||
</button>
|
||||
<button onClick={() => handleOAuth("google")}>
|
||||
Sign in with Google
|
||||
</button>
|
||||
<button onClick={() => handleOAuth("discord")}>
|
||||
Sign in with Discord
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Callback URLs**: Add all environments (dev, staging, prod) to OAuth app
|
||||
2. **Scopes**: Request minimum scopes needed
|
||||
3. **Token Storage**: Better Auth stores tokens securely in database
|
||||
4. **Token Refresh**: Implement automatic token refresh for long-lived sessions
|
||||
5. **Account Linking**: Enable for better UX when user signs in with different providers
|
||||
6. **Error Handling**: Provide clear error messages for OAuth failures
|
||||
7. **Provider Icons**: Use official brand assets for OAuth buttons
|
||||
8. **Mobile Deep Links**: Configure deep links for mobile OAuth flows
|
||||
9. **Email Matching**: Consider auto-linking accounts with same email
|
||||
10. **Privacy**: Inform users what data you access from OAuth providers
|
||||
|
||||
## Common Issues
|
||||
|
||||
### Redirect URI Mismatch
|
||||
|
||||
Ensure callback URL in OAuth app matches exactly:
|
||||
```
|
||||
http://localhost:3000/api/auth/callback/github
|
||||
```
|
||||
|
||||
### Missing Scopes
|
||||
|
||||
Add required scopes for email access:
|
||||
```ts
|
||||
scope: ["user:email"] // GitHub
|
||||
scope: ["email"] // Google
|
||||
```
|
||||
|
||||
### HTTPS Required
|
||||
|
||||
Some providers (Apple, Microsoft) require HTTPS callbacks. Use ngrok for local development:
|
||||
```bash
|
||||
ngrok http 3000
|
||||
```
|
||||
|
||||
### CORS Errors
|
||||
|
||||
Configure CORS if frontend/backend on different domains:
|
||||
```ts
|
||||
export const auth = betterAuth({
|
||||
advanced: {
|
||||
corsOptions: {
|
||||
origin: ["https://yourdomain.com"],
|
||||
credentials: true
|
||||
}
|
||||
}
|
||||
});
|
||||
```
|
||||
BIN
.opencode/skills/better-auth/scripts/.coverage
Normal file
BIN
.opencode/skills/better-auth/scripts/.coverage
Normal file
Binary file not shown.
521
.opencode/skills/better-auth/scripts/better_auth_init.py
Executable file
521
.opencode/skills/better-auth/scripts/better_auth_init.py
Executable file
@@ -0,0 +1,521 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Better Auth Initialization Script
|
||||
|
||||
Interactive script to initialize Better Auth configuration.
|
||||
Supports multiple databases, ORMs, and authentication methods.
|
||||
|
||||
.env loading order: process.env > skill/.env > skills/.env > .opencode/.env
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import secrets
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class EnvConfig:
|
||||
"""Environment configuration holder."""
|
||||
secret: str
|
||||
url: str
|
||||
database_url: Optional[str] = None
|
||||
github_client_id: Optional[str] = None
|
||||
github_client_secret: Optional[str] = None
|
||||
google_client_id: Optional[str] = None
|
||||
google_client_secret: Optional[str] = None
|
||||
|
||||
|
||||
class BetterAuthInit:
|
||||
"""Better Auth configuration initializer."""
|
||||
|
||||
def __init__(self, project_root: Optional[Path] = None):
|
||||
"""
|
||||
Initialize the Better Auth configuration tool.
|
||||
|
||||
Args:
|
||||
project_root: Project root directory. Auto-detected if not provided.
|
||||
"""
|
||||
self.project_root = project_root or self._find_project_root()
|
||||
self.env_config: Optional[EnvConfig] = None
|
||||
|
||||
@staticmethod
|
||||
def _find_project_root() -> Path:
|
||||
"""
|
||||
Find project root by looking for package.json.
|
||||
|
||||
Returns:
|
||||
Path to project root.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If project root cannot be found.
|
||||
"""
|
||||
current = Path.cwd()
|
||||
while current != current.parent:
|
||||
if (current / "package.json").exists():
|
||||
return current
|
||||
current = current.parent
|
||||
|
||||
raise RuntimeError("Could not find project root (no package.json found)")
|
||||
|
||||
def _load_env_files(self) -> Dict[str, str]:
|
||||
"""
|
||||
Load environment variables from .env files in order.
|
||||
|
||||
Loading order: process.env > skill/.env > skills/.env > .opencode/.env
|
||||
|
||||
Returns:
|
||||
Dictionary of environment variables.
|
||||
"""
|
||||
env_vars = {}
|
||||
|
||||
# Define search paths in reverse priority order
|
||||
skill_dir = Path(__file__).parent.parent
|
||||
env_paths = [
|
||||
self.project_root / ".claude" / ".env",
|
||||
self.project_root / ".claude" / "skills" / ".env",
|
||||
skill_dir / ".env",
|
||||
]
|
||||
|
||||
# Load from files (lowest priority first)
|
||||
for env_path in env_paths:
|
||||
if env_path.exists():
|
||||
env_vars.update(self._parse_env_file(env_path))
|
||||
|
||||
# Override with process environment (highest priority)
|
||||
env_vars.update(os.environ)
|
||||
|
||||
return env_vars
|
||||
|
||||
@staticmethod
|
||||
def _parse_env_file(path: Path) -> Dict[str, str]:
|
||||
"""
|
||||
Parse .env file into dictionary.
|
||||
|
||||
Args:
|
||||
path: Path to .env file.
|
||||
|
||||
Returns:
|
||||
Dictionary of key-value pairs.
|
||||
"""
|
||||
env_vars = {}
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line and not line.startswith("#") and "=" in line:
|
||||
key, value = line.split("=", 1)
|
||||
# Remove quotes if present
|
||||
value = value.strip().strip('"').strip("'")
|
||||
env_vars[key.strip()] = value
|
||||
except Exception as e:
|
||||
print(f"Warning: Could not parse {path}: {e}")
|
||||
|
||||
return env_vars
|
||||
|
||||
@staticmethod
|
||||
def generate_secret(length: int = 32) -> str:
|
||||
"""
|
||||
Generate cryptographically secure random secret.
|
||||
|
||||
Args:
|
||||
length: Length of secret in bytes.
|
||||
|
||||
Returns:
|
||||
Hex-encoded secret string.
|
||||
"""
|
||||
return secrets.token_hex(length)
|
||||
|
||||
def prompt_database(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Prompt user for database configuration.
|
||||
|
||||
Returns:
|
||||
Database configuration dictionary.
|
||||
"""
|
||||
print("\nDatabase Configuration")
|
||||
print("=" * 50)
|
||||
print("1. Direct Connection (PostgreSQL/MySQL/SQLite)")
|
||||
print("2. Drizzle ORM")
|
||||
print("3. Prisma")
|
||||
print("4. Kysely")
|
||||
print("5. MongoDB")
|
||||
|
||||
choice = input("\nSelect database option (1-5): ").strip()
|
||||
|
||||
db_configs = {
|
||||
"1": self._prompt_direct_db,
|
||||
"2": self._prompt_drizzle,
|
||||
"3": self._prompt_prisma,
|
||||
"4": self._prompt_kysely,
|
||||
"5": self._prompt_mongodb,
|
||||
}
|
||||
|
||||
handler = db_configs.get(choice)
|
||||
if not handler:
|
||||
print("Invalid choice. Defaulting to direct PostgreSQL.")
|
||||
return self._prompt_direct_db()
|
||||
|
||||
return handler()
|
||||
|
||||
def _prompt_direct_db(self) -> Dict[str, Any]:
|
||||
"""Prompt for direct database connection."""
|
||||
print("\nDatabase Type:")
|
||||
print("1. PostgreSQL")
|
||||
print("2. MySQL")
|
||||
print("3. SQLite")
|
||||
|
||||
db_type = input("Select (1-3): ").strip()
|
||||
|
||||
if db_type == "3":
|
||||
db_path = input("SQLite file path [./dev.db]: ").strip() or "./dev.db"
|
||||
return {
|
||||
"type": "sqlite",
|
||||
"import": "import Database from 'better-sqlite3';",
|
||||
"config": f'database: new Database("{db_path}")'
|
||||
}
|
||||
elif db_type == "2":
|
||||
db_url = input("MySQL connection string: ").strip()
|
||||
return {
|
||||
"type": "mysql",
|
||||
"import": "import { createPool } from 'mysql2/promise';",
|
||||
"config": f"database: createPool({{ connectionString: process.env.DATABASE_URL }})",
|
||||
"env_var": ("DATABASE_URL", db_url)
|
||||
}
|
||||
else:
|
||||
db_url = input("PostgreSQL connection string: ").strip()
|
||||
return {
|
||||
"type": "postgresql",
|
||||
"import": "import { Pool } from 'pg';",
|
||||
"config": "database: new Pool({ connectionString: process.env.DATABASE_URL })",
|
||||
"env_var": ("DATABASE_URL", db_url)
|
||||
}
|
||||
|
||||
def _prompt_drizzle(self) -> Dict[str, Any]:
|
||||
"""Prompt for Drizzle ORM configuration."""
|
||||
print("\nDrizzle Provider:")
|
||||
print("1. PostgreSQL")
|
||||
print("2. MySQL")
|
||||
print("3. SQLite")
|
||||
|
||||
provider = input("Select (1-3): ").strip()
|
||||
provider_map = {"1": "pg", "2": "mysql", "3": "sqlite"}
|
||||
provider_name = provider_map.get(provider, "pg")
|
||||
|
||||
return {
|
||||
"type": "drizzle",
|
||||
"provider": provider_name,
|
||||
"import": "import { drizzleAdapter } from 'better-auth/adapters/drizzle';\nimport { db } from '@/db';",
|
||||
"config": f"database: drizzleAdapter(db, {{ provider: '{provider_name}' }})"
|
||||
}
|
||||
|
||||
def _prompt_prisma(self) -> Dict[str, Any]:
|
||||
"""Prompt for Prisma configuration."""
|
||||
print("\nPrisma Provider:")
|
||||
print("1. PostgreSQL")
|
||||
print("2. MySQL")
|
||||
print("3. SQLite")
|
||||
|
||||
provider = input("Select (1-3): ").strip()
|
||||
provider_map = {"1": "postgresql", "2": "mysql", "3": "sqlite"}
|
||||
provider_name = provider_map.get(provider, "postgresql")
|
||||
|
||||
return {
|
||||
"type": "prisma",
|
||||
"provider": provider_name,
|
||||
"import": "import { prismaAdapter } from 'better-auth/adapters/prisma';\nimport { PrismaClient } from '@prisma/client';\n\nconst prisma = new PrismaClient();",
|
||||
"config": f"database: prismaAdapter(prisma, {{ provider: '{provider_name}' }})"
|
||||
}
|
||||
|
||||
def _prompt_kysely(self) -> Dict[str, Any]:
|
||||
"""Prompt for Kysely configuration."""
|
||||
return {
|
||||
"type": "kysely",
|
||||
"import": "import { kyselyAdapter } from 'better-auth/adapters/kysely';\nimport { db } from '@/db';",
|
||||
"config": "database: kyselyAdapter(db, { provider: 'pg' })"
|
||||
}
|
||||
|
||||
def _prompt_mongodb(self) -> Dict[str, Any]:
|
||||
"""Prompt for MongoDB configuration."""
|
||||
mongo_uri = input("MongoDB connection string: ").strip()
|
||||
db_name = input("Database name: ").strip()
|
||||
|
||||
return {
|
||||
"type": "mongodb",
|
||||
"import": "import { mongodbAdapter } from 'better-auth/adapters/mongodb';\nimport { client } from '@/db';",
|
||||
"config": f"database: mongodbAdapter(client, {{ databaseName: '{db_name}' }})",
|
||||
"env_var": ("MONGODB_URI", mongo_uri)
|
||||
}
|
||||
|
||||
def prompt_auth_methods(self) -> List[str]:
|
||||
"""
|
||||
Prompt user for authentication methods.
|
||||
|
||||
Returns:
|
||||
List of selected auth method codes.
|
||||
"""
|
||||
print("\nAuthentication Methods")
|
||||
print("=" * 50)
|
||||
print("Select authentication methods (space-separated, e.g., '1 2 3'):")
|
||||
print("1. Email/Password")
|
||||
print("2. GitHub OAuth")
|
||||
print("3. Google OAuth")
|
||||
print("4. Discord OAuth")
|
||||
print("5. Two-Factor Authentication (2FA)")
|
||||
print("6. Passkeys (WebAuthn)")
|
||||
print("7. Magic Link")
|
||||
print("8. Username")
|
||||
|
||||
choices = input("\nYour selection: ").strip().split()
|
||||
return [c for c in choices if c in "12345678"]
|
||||
|
||||
def generate_auth_config(
|
||||
self,
|
||||
db_config: Dict[str, Any],
|
||||
auth_methods: List[str],
|
||||
) -> str:
|
||||
"""
|
||||
Generate auth.ts configuration file content.
|
||||
|
||||
Args:
|
||||
db_config: Database configuration.
|
||||
auth_methods: Selected authentication methods.
|
||||
|
||||
Returns:
|
||||
Generated TypeScript configuration code.
|
||||
"""
|
||||
imports = ["import { betterAuth } from 'better-auth';"]
|
||||
plugins = []
|
||||
plugin_imports = []
|
||||
config_parts = []
|
||||
|
||||
# Database import
|
||||
if db_config.get("import"):
|
||||
imports.append(db_config["import"])
|
||||
|
||||
# Email/Password
|
||||
if "1" in auth_methods:
|
||||
config_parts.append(""" emailAndPassword: {
|
||||
enabled: true,
|
||||
autoSignIn: true
|
||||
}""")
|
||||
|
||||
# OAuth providers
|
||||
social_providers = []
|
||||
if "2" in auth_methods:
|
||||
social_providers.append(""" github: {
|
||||
clientId: process.env.GITHUB_CLIENT_ID!,
|
||||
clientSecret: process.env.GITHUB_CLIENT_SECRET!,
|
||||
}""")
|
||||
|
||||
if "3" in auth_methods:
|
||||
social_providers.append(""" google: {
|
||||
clientId: process.env.GOOGLE_CLIENT_ID!,
|
||||
clientSecret: process.env.GOOGLE_CLIENT_SECRET!,
|
||||
}""")
|
||||
|
||||
if "4" in auth_methods:
|
||||
social_providers.append(""" discord: {
|
||||
clientId: process.env.DISCORD_CLIENT_ID!,
|
||||
clientSecret: process.env.DISCORD_CLIENT_SECRET!,
|
||||
}""")
|
||||
|
||||
if social_providers:
|
||||
config_parts.append(f" socialProviders: {{\n{',\\n'.join(social_providers)}\n }}")
|
||||
|
||||
# Plugins
|
||||
if "5" in auth_methods:
|
||||
plugin_imports.append("import { twoFactor } from 'better-auth/plugins';")
|
||||
plugins.append("twoFactor()")
|
||||
|
||||
if "6" in auth_methods:
|
||||
plugin_imports.append("import { passkey } from 'better-auth/plugins';")
|
||||
plugins.append("passkey()")
|
||||
|
||||
if "7" in auth_methods:
|
||||
plugin_imports.append("import { magicLink } from 'better-auth/plugins';")
|
||||
plugins.append("""magicLink({
|
||||
sendMagicLink: async ({ email, url }) => {
|
||||
// TODO: Implement email sending
|
||||
console.log(`Magic link for ${email}: ${url}`);
|
||||
}
|
||||
})""")
|
||||
|
||||
if "8" in auth_methods:
|
||||
plugin_imports.append("import { username } from 'better-auth/plugins';")
|
||||
plugins.append("username()")
|
||||
|
||||
# Combine all imports
|
||||
all_imports = imports + plugin_imports
|
||||
|
||||
# Build config
|
||||
config_body = ",\n".join(config_parts)
|
||||
|
||||
if plugins:
|
||||
plugins_str = ",\n ".join(plugins)
|
||||
config_body += f",\n plugins: [\n {plugins_str}\n ]"
|
||||
|
||||
# Final output
|
||||
return f"""{chr(10).join(all_imports)}
|
||||
|
||||
export const auth = betterAuth({{
|
||||
{db_config["config"]},
|
||||
{config_body}
|
||||
}});
|
||||
"""
|
||||
|
||||
def generate_env_file(
|
||||
self,
|
||||
db_config: Dict[str, Any],
|
||||
auth_methods: List[str]
|
||||
) -> str:
|
||||
"""
|
||||
Generate .env file content.
|
||||
|
||||
Args:
|
||||
db_config: Database configuration.
|
||||
auth_methods: Selected authentication methods.
|
||||
|
||||
Returns:
|
||||
Generated .env file content.
|
||||
"""
|
||||
env_vars = [
|
||||
f"BETTER_AUTH_SECRET={self.generate_secret()}",
|
||||
"BETTER_AUTH_URL=http://localhost:3000",
|
||||
]
|
||||
|
||||
# Database URL
|
||||
if db_config.get("env_var"):
|
||||
key, value = db_config["env_var"]
|
||||
env_vars.append(f"{key}={value}")
|
||||
|
||||
# OAuth credentials
|
||||
if "2" in auth_methods:
|
||||
env_vars.extend([
|
||||
"GITHUB_CLIENT_ID=your_github_client_id",
|
||||
"GITHUB_CLIENT_SECRET=your_github_client_secret",
|
||||
])
|
||||
|
||||
if "3" in auth_methods:
|
||||
env_vars.extend([
|
||||
"GOOGLE_CLIENT_ID=your_google_client_id",
|
||||
"GOOGLE_CLIENT_SECRET=your_google_client_secret",
|
||||
])
|
||||
|
||||
if "4" in auth_methods:
|
||||
env_vars.extend([
|
||||
"DISCORD_CLIENT_ID=your_discord_client_id",
|
||||
"DISCORD_CLIENT_SECRET=your_discord_client_secret",
|
||||
])
|
||||
|
||||
return "\n".join(env_vars) + "\n"
|
||||
|
||||
def run(self) -> None:
|
||||
"""Run interactive initialization."""
|
||||
print("=" * 50)
|
||||
print("Better Auth Configuration Generator")
|
||||
print("=" * 50)
|
||||
|
||||
# Load existing env
|
||||
env_vars = self._load_env_files()
|
||||
|
||||
# Prompt for configuration
|
||||
db_config = self.prompt_database()
|
||||
auth_methods = self.prompt_auth_methods()
|
||||
|
||||
# Generate files
|
||||
auth_config = self.generate_auth_config(db_config, auth_methods)
|
||||
env_content = self.generate_env_file(db_config, auth_methods)
|
||||
|
||||
# Display output
|
||||
print("\n" + "=" * 50)
|
||||
print("Generated Configuration")
|
||||
print("=" * 50)
|
||||
|
||||
print("\n--- auth.ts ---")
|
||||
print(auth_config)
|
||||
|
||||
print("\n--- .env ---")
|
||||
print(env_content)
|
||||
|
||||
# Offer to save
|
||||
save = input("\nSave configuration files? (y/N): ").strip().lower()
|
||||
if save == "y":
|
||||
self._save_files(auth_config, env_content)
|
||||
else:
|
||||
print("Configuration not saved.")
|
||||
|
||||
def _save_files(self, auth_config: str, env_content: str) -> None:
|
||||
"""
|
||||
Save generated configuration files.
|
||||
|
||||
Args:
|
||||
auth_config: auth.ts content.
|
||||
env_content: .env content.
|
||||
"""
|
||||
# Save auth.ts
|
||||
auth_locations = [
|
||||
self.project_root / "lib" / "auth.ts",
|
||||
self.project_root / "src" / "lib" / "auth.ts",
|
||||
self.project_root / "utils" / "auth.ts",
|
||||
self.project_root / "auth.ts",
|
||||
]
|
||||
|
||||
print("\nWhere to save auth.ts?")
|
||||
for i, loc in enumerate(auth_locations, 1):
|
||||
print(f"{i}. {loc}")
|
||||
print("5. Custom path")
|
||||
|
||||
choice = input("Select (1-5): ").strip()
|
||||
if choice == "5":
|
||||
custom_path = input("Enter path: ").strip()
|
||||
auth_path = Path(custom_path)
|
||||
else:
|
||||
idx = int(choice) - 1 if choice.isdigit() else 0
|
||||
auth_path = auth_locations[idx]
|
||||
|
||||
auth_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
auth_path.write_text(auth_config, encoding="utf-8")
|
||||
print(f"Saved: {auth_path}")
|
||||
|
||||
# Save .env
|
||||
env_path = self.project_root / ".env"
|
||||
if env_path.exists():
|
||||
backup = self.project_root / ".env.backup"
|
||||
env_path.rename(backup)
|
||||
print(f"Backed up existing .env to {backup}")
|
||||
|
||||
env_path.write_text(env_content, encoding="utf-8")
|
||||
print(f"Saved: {env_path}")
|
||||
|
||||
print("\nNext steps:")
|
||||
print("1. Run: npx @better-auth/cli generate")
|
||||
print("2. Apply database migrations")
|
||||
print("3. Mount API handler in your framework")
|
||||
print("4. Create client instance")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""
|
||||
Main entry point.
|
||||
|
||||
Returns:
|
||||
Exit code (0 for success, 1 for error).
|
||||
"""
|
||||
try:
|
||||
initializer = BetterAuthInit()
|
||||
initializer.run()
|
||||
return 0
|
||||
except KeyboardInterrupt:
|
||||
print("\n\nOperation cancelled.")
|
||||
return 1
|
||||
except Exception as e:
|
||||
print(f"\nError: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
15
.opencode/skills/better-auth/scripts/requirements.txt
Normal file
15
.opencode/skills/better-auth/scripts/requirements.txt
Normal file
@@ -0,0 +1,15 @@
|
||||
# Better Auth Skill Dependencies
|
||||
# Python 3.10+ required
|
||||
|
||||
# No Python package dependencies - uses only standard library
|
||||
|
||||
# Testing dependencies (dev)
|
||||
pytest>=8.0.0
|
||||
pytest-cov>=4.1.0
|
||||
pytest-mock>=3.12.0
|
||||
|
||||
# Note: This script generates Better Auth configuration
|
||||
# The actual Better Auth library is installed via npm/pnpm/yarn:
|
||||
# npm install better-auth
|
||||
# pnpm add better-auth
|
||||
# yarn add better-auth
|
||||
BIN
.opencode/skills/better-auth/scripts/tests/.coverage
Normal file
BIN
.opencode/skills/better-auth/scripts/tests/.coverage
Normal file
Binary file not shown.
@@ -0,0 +1,421 @@
|
||||
"""
|
||||
Tests for better_auth_init.py
|
||||
|
||||
Covers main functionality with mocked I/O and file operations.
|
||||
Target: >80% coverage
|
||||
"""
|
||||
|
||||
import sys
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from unittest.mock import Mock, patch, mock_open, MagicMock
|
||||
from io import StringIO
|
||||
|
||||
# Add parent directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from better_auth_init import BetterAuthInit, EnvConfig, main
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_project_root(tmp_path):
|
||||
"""Create mock project root with package.json."""
|
||||
(tmp_path / "package.json").write_text("{}")
|
||||
return tmp_path
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def auth_init(mock_project_root):
|
||||
"""Create BetterAuthInit instance with mock project root."""
|
||||
return BetterAuthInit(project_root=mock_project_root)
|
||||
|
||||
|
||||
class TestBetterAuthInit:
|
||||
"""Test BetterAuthInit class."""
|
||||
|
||||
def test_init_with_project_root(self, mock_project_root):
|
||||
"""Test initialization with explicit project root."""
|
||||
init = BetterAuthInit(project_root=mock_project_root)
|
||||
assert init.project_root == mock_project_root
|
||||
assert init.env_config is None
|
||||
|
||||
def test_find_project_root_success(self, mock_project_root, monkeypatch):
|
||||
"""Test finding project root successfully."""
|
||||
monkeypatch.chdir(mock_project_root)
|
||||
init = BetterAuthInit()
|
||||
assert init.project_root == mock_project_root
|
||||
|
||||
def test_find_project_root_failure(self, tmp_path, monkeypatch):
|
||||
"""Test failure to find project root."""
|
||||
# Create path without package.json
|
||||
no_package_dir = tmp_path / "no-package"
|
||||
no_package_dir.mkdir()
|
||||
monkeypatch.chdir(no_package_dir)
|
||||
|
||||
# Mock parent to stop infinite loop
|
||||
with patch.object(Path, "parent", new_callable=lambda: property(lambda self: self)):
|
||||
with pytest.raises(RuntimeError, match="Could not find project root"):
|
||||
BetterAuthInit()
|
||||
|
||||
def test_generate_secret(self):
|
||||
"""Test secret generation."""
|
||||
secret = BetterAuthInit.generate_secret()
|
||||
assert len(secret) == 64 # 32 bytes = 64 hex chars
|
||||
assert all(c in "0123456789abcdef" for c in secret)
|
||||
|
||||
# Test custom length
|
||||
secret = BetterAuthInit.generate_secret(length=16)
|
||||
assert len(secret) == 32 # 16 bytes = 32 hex chars
|
||||
|
||||
def test_parse_env_file(self, tmp_path):
|
||||
"""Test parsing .env file."""
|
||||
env_content = """
|
||||
# Comment
|
||||
KEY1=value1
|
||||
KEY2="value2"
|
||||
KEY3='value3'
|
||||
INVALID LINE
|
||||
KEY4=value=with=equals
|
||||
"""
|
||||
env_file = tmp_path / ".env"
|
||||
env_file.write_text(env_content)
|
||||
|
||||
result = BetterAuthInit._parse_env_file(env_file)
|
||||
|
||||
assert result["KEY1"] == "value1"
|
||||
assert result["KEY2"] == "value2"
|
||||
assert result["KEY3"] == "value3"
|
||||
assert result["KEY4"] == "value=with=equals"
|
||||
assert "INVALID" not in result
|
||||
|
||||
def test_parse_env_file_missing(self, tmp_path):
|
||||
"""Test parsing missing .env file."""
|
||||
result = BetterAuthInit._parse_env_file(tmp_path / "nonexistent.env")
|
||||
assert result == {}
|
||||
|
||||
def test_load_env_files(self, auth_init, mock_project_root):
|
||||
"""Test loading environment variables from multiple files."""
|
||||
# Create .env files
|
||||
claude_env = mock_project_root / ".claude" / ".env"
|
||||
claude_env.parent.mkdir(parents=True, exist_ok=True)
|
||||
claude_env.write_text("BASE_VAR=base\nOVERRIDE=claude")
|
||||
|
||||
skills_env = mock_project_root / ".claude" / "skills" / ".env"
|
||||
skills_env.parent.mkdir(parents=True, exist_ok=True)
|
||||
skills_env.write_text("OVERRIDE=skills\nSKILLS_VAR=skills")
|
||||
|
||||
# Mock process env (highest priority)
|
||||
with patch.dict("os.environ", {"OVERRIDE": "process", "PROCESS_VAR": "process"}):
|
||||
result = auth_init._load_env_files()
|
||||
|
||||
assert result["BASE_VAR"] == "base"
|
||||
assert result["SKILLS_VAR"] == "skills"
|
||||
assert result["OVERRIDE"] == "process" # Process env wins
|
||||
assert result["PROCESS_VAR"] == "process"
|
||||
|
||||
def test_prompt_direct_db_sqlite(self, auth_init):
|
||||
"""Test prompting for SQLite database."""
|
||||
with patch("builtins.input", side_effect=["3", "./test.db"]):
|
||||
config = auth_init._prompt_direct_db()
|
||||
|
||||
assert config["type"] == "sqlite"
|
||||
assert "better-sqlite3" in config["import"]
|
||||
assert "./test.db" in config["config"]
|
||||
|
||||
def test_prompt_direct_db_postgresql(self, auth_init):
|
||||
"""Test prompting for PostgreSQL database."""
|
||||
with patch("builtins.input", side_effect=["1", "postgresql://localhost/test"]):
|
||||
config = auth_init._prompt_direct_db()
|
||||
|
||||
assert config["type"] == "postgresql"
|
||||
assert "pg" in config["import"]
|
||||
assert config["env_var"] == ("DATABASE_URL", "postgresql://localhost/test")
|
||||
|
||||
def test_prompt_direct_db_mysql(self, auth_init):
|
||||
"""Test prompting for MySQL database."""
|
||||
with patch("builtins.input", side_effect=["2", "mysql://localhost/test"]):
|
||||
config = auth_init._prompt_direct_db()
|
||||
|
||||
assert config["type"] == "mysql"
|
||||
assert "mysql2" in config["import"]
|
||||
assert config["env_var"][0] == "DATABASE_URL"
|
||||
|
||||
def test_prompt_drizzle(self, auth_init):
|
||||
"""Test prompting for Drizzle ORM."""
|
||||
with patch("builtins.input", return_value="1"):
|
||||
config = auth_init._prompt_drizzle()
|
||||
|
||||
assert config["type"] == "drizzle"
|
||||
assert config["provider"] == "pg"
|
||||
assert "drizzleAdapter" in config["import"]
|
||||
assert "drizzleAdapter" in config["config"]
|
||||
|
||||
def test_prompt_prisma(self, auth_init):
|
||||
"""Test prompting for Prisma."""
|
||||
with patch("builtins.input", return_value="2"):
|
||||
config = auth_init._prompt_prisma()
|
||||
|
||||
assert config["type"] == "prisma"
|
||||
assert config["provider"] == "mysql"
|
||||
assert "prismaAdapter" in config["import"]
|
||||
assert "PrismaClient" in config["import"]
|
||||
|
||||
def test_prompt_kysely(self, auth_init):
|
||||
"""Test prompting for Kysely."""
|
||||
config = auth_init._prompt_kysely()
|
||||
|
||||
assert config["type"] == "kysely"
|
||||
assert "kyselyAdapter" in config["import"]
|
||||
|
||||
def test_prompt_mongodb(self, auth_init):
|
||||
"""Test prompting for MongoDB."""
|
||||
with patch("builtins.input", side_effect=["mongodb://localhost/test", "mydb"]):
|
||||
config = auth_init._prompt_mongodb()
|
||||
|
||||
assert config["type"] == "mongodb"
|
||||
assert "mongodbAdapter" in config["import"]
|
||||
assert "mydb" in config["config"]
|
||||
assert config["env_var"] == ("MONGODB_URI", "mongodb://localhost/test")
|
||||
|
||||
def test_prompt_database(self, auth_init):
|
||||
"""Test database prompting with different choices."""
|
||||
# Test valid choice
|
||||
with patch("builtins.input", side_effect=["3", "1"]):
|
||||
config = auth_init.prompt_database()
|
||||
assert config["type"] == "prisma"
|
||||
|
||||
# Test invalid choice (defaults to direct DB)
|
||||
with patch("builtins.input", side_effect=["99", "1", "postgresql://localhost/test"]):
|
||||
with patch("builtins.print"):
|
||||
config = auth_init.prompt_database()
|
||||
assert config["type"] == "postgresql"
|
||||
|
||||
def test_prompt_auth_methods(self, auth_init):
|
||||
"""Test prompting for authentication methods."""
|
||||
with patch("builtins.input", return_value="1 2 3 5 8"):
|
||||
with patch("builtins.print"):
|
||||
methods = auth_init.prompt_auth_methods()
|
||||
|
||||
assert methods == ["1", "2", "3", "5", "8"]
|
||||
|
||||
def test_prompt_auth_methods_invalid(self, auth_init):
|
||||
"""Test filtering invalid auth method choices."""
|
||||
with patch("builtins.input", return_value="1 99 abc 3"):
|
||||
with patch("builtins.print"):
|
||||
methods = auth_init.prompt_auth_methods()
|
||||
|
||||
assert methods == ["1", "3"]
|
||||
|
||||
def test_generate_auth_config_basic(self, auth_init):
|
||||
"""Test generating basic auth config."""
|
||||
db_config = {
|
||||
"import": "import Database from 'better-sqlite3';",
|
||||
"config": "database: new Database('./dev.db')"
|
||||
}
|
||||
auth_methods = ["1"] # Email/password only
|
||||
|
||||
config = auth_init.generate_auth_config(db_config, auth_methods)
|
||||
|
||||
assert "import { betterAuth }" in config
|
||||
assert "emailAndPassword" in config
|
||||
assert "enabled: true" in config
|
||||
assert "better-sqlite3" in config
|
||||
|
||||
def test_generate_auth_config_with_oauth(self, auth_init):
|
||||
"""Test generating config with OAuth providers."""
|
||||
db_config = {
|
||||
"import": "import { Pool } from 'pg';",
|
||||
"config": "database: new Pool()"
|
||||
}
|
||||
auth_methods = ["1", "2", "3", "4"] # Email + GitHub + Google + Discord
|
||||
|
||||
config = auth_init.generate_auth_config(db_config, auth_methods)
|
||||
|
||||
assert "socialProviders" in config
|
||||
assert "github:" in config
|
||||
assert "google:" in config
|
||||
assert "discord:" in config
|
||||
assert "GITHUB_CLIENT_ID" in config
|
||||
assert "GOOGLE_CLIENT_ID" in config
|
||||
assert "DISCORD_CLIENT_ID" in config
|
||||
|
||||
def test_generate_auth_config_with_plugins(self, auth_init):
|
||||
"""Test generating config with plugins."""
|
||||
db_config = {"import": "", "config": "database: db"}
|
||||
auth_methods = ["5", "6", "7", "8"] # 2FA, Passkey, Magic Link, Username
|
||||
|
||||
config = auth_init.generate_auth_config(db_config, auth_methods)
|
||||
|
||||
assert "plugins:" in config
|
||||
assert "twoFactor" in config
|
||||
assert "passkey" in config
|
||||
assert "magicLink" in config
|
||||
assert "username" in config
|
||||
assert "from 'better-auth/plugins'" in config
|
||||
|
||||
def test_generate_env_file_basic(self, auth_init):
|
||||
"""Test generating basic .env file."""
|
||||
db_config = {"type": "sqlite"}
|
||||
auth_methods = ["1"]
|
||||
|
||||
env_content = auth_init.generate_env_file(db_config, auth_methods)
|
||||
|
||||
assert "BETTER_AUTH_SECRET=" in env_content
|
||||
assert "BETTER_AUTH_URL=http://localhost:3000" in env_content
|
||||
assert len(env_content.split("\n")) >= 2
|
||||
|
||||
def test_generate_env_file_with_database_url(self, auth_init):
|
||||
"""Test generating .env with database URL."""
|
||||
db_config = {
|
||||
"env_var": ("DATABASE_URL", "postgresql://localhost/test")
|
||||
}
|
||||
auth_methods = []
|
||||
|
||||
env_content = auth_init.generate_env_file(db_config, auth_methods)
|
||||
|
||||
assert "DATABASE_URL=postgresql://localhost/test" in env_content
|
||||
|
||||
def test_generate_env_file_with_oauth(self, auth_init):
|
||||
"""Test generating .env with OAuth credentials."""
|
||||
db_config = {}
|
||||
auth_methods = ["2", "3", "4"] # GitHub, Google, Discord
|
||||
|
||||
env_content = auth_init.generate_env_file(db_config, auth_methods)
|
||||
|
||||
assert "GITHUB_CLIENT_ID=" in env_content
|
||||
assert "GITHUB_CLIENT_SECRET=" in env_content
|
||||
assert "GOOGLE_CLIENT_ID=" in env_content
|
||||
assert "GOOGLE_CLIENT_SECRET=" in env_content
|
||||
assert "DISCORD_CLIENT_ID=" in env_content
|
||||
assert "DISCORD_CLIENT_SECRET=" in env_content
|
||||
|
||||
def test_save_files(self, auth_init, mock_project_root):
|
||||
"""Test saving configuration files."""
|
||||
auth_config = "// auth config"
|
||||
env_content = "SECRET=test"
|
||||
|
||||
with patch("builtins.input", side_effect=["1"]):
|
||||
auth_init._save_files(auth_config, env_content)
|
||||
|
||||
# Check auth.ts was saved
|
||||
auth_path = mock_project_root / "lib" / "auth.ts"
|
||||
assert auth_path.exists()
|
||||
assert auth_path.read_text() == auth_config
|
||||
|
||||
# Check .env was saved
|
||||
env_path = mock_project_root / ".env"
|
||||
assert env_path.exists()
|
||||
assert env_path.read_text() == env_content
|
||||
|
||||
def test_save_files_custom_path(self, auth_init, mock_project_root):
|
||||
"""Test saving with custom path."""
|
||||
auth_config = "// config"
|
||||
env_content = "SECRET=test"
|
||||
|
||||
custom_path = str(mock_project_root / "custom" / "auth.ts")
|
||||
with patch("builtins.input", side_effect=["5", custom_path]):
|
||||
auth_init._save_files(auth_config, env_content)
|
||||
|
||||
assert Path(custom_path).exists()
|
||||
|
||||
def test_save_files_backup_existing_env(self, auth_init, mock_project_root):
|
||||
"""Test backing up existing .env file."""
|
||||
# Create existing .env
|
||||
env_path = mock_project_root / ".env"
|
||||
env_path.write_text("OLD_SECRET=old")
|
||||
|
||||
auth_config = "// config"
|
||||
env_content = "NEW_SECRET=new"
|
||||
|
||||
with patch("builtins.input", return_value="1"):
|
||||
auth_init._save_files(auth_config, env_content)
|
||||
|
||||
# Check backup was created
|
||||
backup_path = mock_project_root / ".env.backup"
|
||||
assert backup_path.exists()
|
||||
assert backup_path.read_text() == "OLD_SECRET=old"
|
||||
|
||||
# Check new .env
|
||||
assert env_path.read_text() == "NEW_SECRET=new"
|
||||
|
||||
def test_run_full_flow(self, auth_init, mock_project_root):
|
||||
"""Test complete run flow."""
|
||||
inputs = [
|
||||
"1", # Direct DB
|
||||
"1", # PostgreSQL
|
||||
"postgresql://localhost/test",
|
||||
"1 2", # Email + GitHub
|
||||
"n" # Don't save
|
||||
]
|
||||
|
||||
with patch("builtins.input", side_effect=inputs):
|
||||
with patch("builtins.print"):
|
||||
auth_init.run()
|
||||
|
||||
# Should complete without errors
|
||||
# Files not saved because user chose 'n'
|
||||
assert not (mock_project_root / "auth.ts").exists()
|
||||
|
||||
def test_run_save_files(self, auth_init, mock_project_root):
|
||||
"""Test run flow with file saving."""
|
||||
inputs = [
|
||||
"1", # Direct DB
|
||||
"3", # SQLite
|
||||
"", # Default path
|
||||
"1", # Email only
|
||||
"y", # Save
|
||||
"1" # Save location
|
||||
]
|
||||
|
||||
with patch("builtins.input", side_effect=inputs):
|
||||
with patch("builtins.print"):
|
||||
auth_init.run()
|
||||
|
||||
# Check files were created
|
||||
assert (mock_project_root / "lib" / "auth.ts").exists()
|
||||
assert (mock_project_root / ".env").exists()
|
||||
|
||||
|
||||
class TestMainFunction:
|
||||
"""Test main entry point."""
|
||||
|
||||
def test_main_success(self, tmp_path, monkeypatch):
|
||||
"""Test successful main execution."""
|
||||
(tmp_path / "package.json").write_text("{}")
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
inputs = ["1", "3", "", "1", "n"]
|
||||
|
||||
with patch("builtins.input", side_effect=inputs):
|
||||
with patch("builtins.print"):
|
||||
exit_code = main()
|
||||
|
||||
assert exit_code == 0
|
||||
|
||||
def test_main_keyboard_interrupt(self, tmp_path, monkeypatch):
|
||||
"""Test main with keyboard interrupt."""
|
||||
(tmp_path / "package.json").write_text("{}")
|
||||
monkeypatch.chdir(tmp_path)
|
||||
|
||||
with patch("builtins.input", side_effect=KeyboardInterrupt()):
|
||||
with patch("builtins.print"):
|
||||
exit_code = main()
|
||||
|
||||
assert exit_code == 1
|
||||
|
||||
def test_main_error(self, tmp_path, monkeypatch):
|
||||
"""Test main with error."""
|
||||
# No package.json - should fail
|
||||
no_package = tmp_path / "no-package"
|
||||
no_package.mkdir()
|
||||
monkeypatch.chdir(no_package)
|
||||
|
||||
with patch.object(Path, "parent", new_callable=lambda: property(lambda self: self)):
|
||||
with patch("sys.stderr", new_callable=StringIO):
|
||||
exit_code = main()
|
||||
|
||||
assert exit_code == 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v", "--cov=better_auth_init", "--cov-report=term-missing"])
|
||||
105
.opencode/skills/bootstrap/SKILL.md
Normal file
105
.opencode/skills/bootstrap/SKILL.md
Normal file
@@ -0,0 +1,105 @@
|
||||
---
|
||||
name: ck:bootstrap
|
||||
description: "Bootstrap new projects with research, tech stack, design, planning, and implementation. Modes: full (interactive), auto (default), fast (skip research), parallel (multi-agent)."
|
||||
license: MIT
|
||||
argument-hint: "[requirements] [--full|--auto|--fast|--parallel]"
|
||||
metadata:
|
||||
author: claudekit
|
||||
version: "1.0.0"
|
||||
---
|
||||
|
||||
# Bootstrap - New Project Scaffolding
|
||||
|
||||
End-to-end project bootstrapping from idea to running code.
|
||||
|
||||
**Principles:** YAGNI, KISS, DRY | Token efficiency | Concise reports
|
||||
|
||||
## Usage
|
||||
|
||||
```
|
||||
/ck:bootstrap <user-requirements>
|
||||
```
|
||||
|
||||
**Flags** (optional, default `--auto`):
|
||||
|
||||
| Flag | Mode | Thinking | User Gates | Planning Skill | Cook Skill |
|
||||
|------|------|----------|------------|----------------|------------|
|
||||
| `--full` | Full interactive | Ultrathink | Every phase | `--hard` | (interactive) |
|
||||
| `--auto` | Automatic | Ultrathink | Design only | `--auto` | `--auto` |
|
||||
| `--fast` | Quick | Think hard | None | `--fast` | `--auto` |
|
||||
| `--parallel` | Multi-agent | Ultrathink | Design only | `--parallel` | `--parallel` |
|
||||
|
||||
**Example:**
|
||||
```
|
||||
/ck:bootstrap "Build a SaaS dashboard with auth" --fast
|
||||
/ck:bootstrap "E-commerce platform with Stripe" --parallel
|
||||
```
|
||||
|
||||
## Workflow Overview
|
||||
|
||||
```
|
||||
[Git Init] → [Research?] → [Tech Stack?] → [Design?] → [Planning] → [Implementation] → [Test] → [Review] → [Docs] → [Onboard] → [Final]
|
||||
```
|
||||
|
||||
Each mode loads a specific workflow reference + shared phases.
|
||||
|
||||
## Mode Detection
|
||||
|
||||
If no flag provided, default to `--auto`.
|
||||
|
||||
Load the appropriate workflow reference:
|
||||
- `--full`: Load `references/workflow-full.md`
|
||||
- `--auto`: Load `references/workflow-auto.md`
|
||||
- `--fast`: Load `references/workflow-fast.md`
|
||||
- `--parallel`: Load `references/workflow-parallel.md`
|
||||
|
||||
All modes share: Load `references/shared-phases.md` for implementation through final report.
|
||||
|
||||
## Step 0: Git Init (ALL modes)
|
||||
|
||||
Check if Git initialized. If not:
|
||||
- `--full`: Ask user if they want to init → `git-manager` subagent (`main` branch)
|
||||
- Others: Auto-init via `git-manager` subagent (`main` branch)
|
||||
|
||||
## Skill Triggers (MANDATORY)
|
||||
|
||||
After early phases (research, tech stack, design), trigger downstream skills:
|
||||
|
||||
### Planning Phase
|
||||
Activate **ck:plan** skill with mode-appropriate flag:
|
||||
- `--full` → `/ck:plan --hard <requirements>` (thorough research + validation)
|
||||
- `--auto` → `/ck:plan --auto <requirements>` (auto-detect complexity)
|
||||
- `--fast` → `/ck:plan --fast <requirements>` (skip research)
|
||||
- `--parallel` → `/ck:plan --parallel <requirements>` (file ownership + dependency graph)
|
||||
|
||||
Planning skill outputs a plan path. Pass this to cook.
|
||||
|
||||
### Implementation Phase
|
||||
Activate **ck:cook** skill with the plan path and mode-appropriate flag:
|
||||
- `--full` → `/ck:cook <plan-path>` (interactive review gates)
|
||||
- `--auto` → `/ck:cook --auto <plan-path>` (skip review gates)
|
||||
- `--fast` → `/ck:cook --auto <plan-path>` (skip review gates)
|
||||
- `--parallel` → `/ck:cook --parallel <plan-path>` (multi-agent execution)
|
||||
|
||||
## Role
|
||||
|
||||
Elite software engineering expert specializing in system architecture and technical decisions. Brutally honest about feasibility and trade-offs.
|
||||
|
||||
## Critical Rules
|
||||
|
||||
- Activate relevant skills from catalog during the process
|
||||
- Keep all research reports ≤150 lines
|
||||
- All docs written to `./docs` directory
|
||||
- Plans written to `./plans` directory using naming from `## Naming` section
|
||||
- DO NOT implement code directly — delegate through planning + cook skills
|
||||
- Sacrifice grammar for concision in reports
|
||||
- List unresolved questions at end of reports
|
||||
- Run `/ck:journal` to write a concise technical journal entry upon completion
|
||||
|
||||
## References
|
||||
|
||||
- `references/workflow-full.md` - Full interactive workflow
|
||||
- `references/workflow-auto.md` - Auto workflow (default)
|
||||
- `references/workflow-fast.md` - Fast workflow
|
||||
- `references/workflow-parallel.md` - Parallel workflow
|
||||
- `references/shared-phases.md` - Common phases (implementation → final report)
|
||||
59
.opencode/skills/bootstrap/references/shared-phases.md
Normal file
59
.opencode/skills/bootstrap/references/shared-phases.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Shared Phases (All Modes)
|
||||
|
||||
These phases apply after planning is complete and cook skill is activated.
|
||||
Cook skill handles most of these — this reference documents bootstrap-specific guidance.
|
||||
|
||||
## Implementation
|
||||
|
||||
Handled by **ck:cook** skill. Bootstrap-specific notes:
|
||||
- Use main agent to implement step by step per plan in `./plans`
|
||||
- Use `ui-ux-designer` subagent for frontend per `./docs/design-guidelines.md`
|
||||
- Asset pipeline: `ck:ai-multimodal` (generate/analyze) → `imagemagick` (crop/resize) → background removal if needed
|
||||
- Run type checking and compile after each phase
|
||||
|
||||
## Testing
|
||||
|
||||
Handled by **ck:cook** skill. Bootstrap-specific notes:
|
||||
- Write real tests — NO fake data, mocks, cheats, tricks, temporary solutions
|
||||
- `tester` subagent runs tests → report to main agent
|
||||
- If failures: `debugger` subagent → fix → repeat until all pass
|
||||
- DO NOT ignore failed tests to pass build/CI
|
||||
|
||||
## Code Review
|
||||
|
||||
Handled by **ck:cook** skill. Bootstrap-specific notes:
|
||||
- `code-reviewer` subagent reviews code
|
||||
- If critical issues: fix → retest → repeat
|
||||
- Report summary to user when all tests pass and code reviewed
|
||||
|
||||
## Documentation
|
||||
|
||||
After code review passes. Use `docs-manager` subagent to create/update:
|
||||
- `./docs/README.md` (≤300 lines)
|
||||
- `./docs/codebase-summary.md`
|
||||
- `./docs/project-overview-pdr.md` (Product Development Requirements)
|
||||
- `./docs/code-standards.md`
|
||||
- `./docs/system-architecture.md`
|
||||
|
||||
Use `project-manager` subagent to create:
|
||||
- `./docs/project-roadmap.md`
|
||||
- Update plan/phase status to complete
|
||||
|
||||
## Onboarding
|
||||
|
||||
Guide user to get started with the project:
|
||||
- Ask 1 question at a time, wait for answer before next
|
||||
- Example: instruct user to obtain API key → ask for key → add to env vars
|
||||
- If user requests config changes, repeat until approved
|
||||
|
||||
## Final Report
|
||||
|
||||
1. Summary of all changes, brief explanations
|
||||
2. Guide user to get started + suggest next steps
|
||||
3. Ask user if they want to commit/push:
|
||||
- If yes: `git-manager` subagent to commit (and push if requested)
|
||||
- `--fast` mode: auto-commit (no push) without asking
|
||||
|
||||
**Report rules:**
|
||||
- Sacrifice grammar for concision
|
||||
- List unresolved questions at end, if any
|
||||
52
.opencode/skills/bootstrap/references/workflow-auto.md
Normal file
52
.opencode/skills/bootstrap/references/workflow-auto.md
Normal file
@@ -0,0 +1,52 @@
|
||||
# Auto Workflow (`--auto`) — Default
|
||||
|
||||
**Thinking level:** Ultrathink
|
||||
**User gates:** Design approval only. All other phases proceed automatically.
|
||||
|
||||
## Step 1: Research
|
||||
|
||||
Spawn multiple `researcher` subagents in parallel:
|
||||
- Explore request, idea validation, challenges, best solutions
|
||||
- Keep every report ≤150 lines
|
||||
|
||||
No user gate — proceed automatically.
|
||||
|
||||
## Step 2: Tech Stack
|
||||
|
||||
1. Use `planner` + multiple `researcher` subagents in parallel for best-fit stack
|
||||
2. Write tech stack to `./docs` directory
|
||||
|
||||
No user gate — auto-select best option.
|
||||
|
||||
## Step 3: Wireframe & Design
|
||||
|
||||
1. Use `ui-ux-designer` + `researcher` subagents in parallel:
|
||||
- Research style, trends, fonts (predict Google Fonts name, NOT just Inter/Poppins), colors, spacing, positions
|
||||
- Describe assets for `ck:ai-multimodal` skill generation
|
||||
2. `ui-ux-designer` creates:
|
||||
- Design guidelines at `./docs/design-guidelines.md`
|
||||
- Wireframes in HTML at `./docs/wireframe/`
|
||||
3. If no logo provided: generate with `ck:ai-multimodal` skill
|
||||
4. Screenshot wireframes with `ck:chrome-devtools` → save to `./docs/wireframes/`
|
||||
|
||||
**Gate:** Ask user to approve design. Repeat if rejected.
|
||||
|
||||
**Image tools:** `ck:ai-multimodal` for generation/analysis, `imagemagick` for crop/resize, background removal tool as needed.
|
||||
|
||||
## Step 4: Planning
|
||||
|
||||
Activate **ck:plan** skill: `/ck:plan --auto <requirements>`
|
||||
- Planning skill auto-detects complexity and picks appropriate mode
|
||||
- Creates plan directory using `## Naming` pattern
|
||||
- Overview at `plan.md` (<80 lines) + `phase-XX-*.md` files
|
||||
|
||||
No user gate — proceed to implementation.
|
||||
|
||||
## Step 5: Implementation → Final Report
|
||||
|
||||
Load `references/shared-phases.md` for remaining phases.
|
||||
|
||||
Activate **ck:cook** skill: `/ck:cook --auto <plan-path>`
|
||||
- Skips all review gates
|
||||
- Auto-approves if score≥9.5 and 0 critical issues
|
||||
- Continues through all phases without stopping
|
||||
50
.opencode/skills/bootstrap/references/workflow-fast.md
Normal file
50
.opencode/skills/bootstrap/references/workflow-fast.md
Normal file
@@ -0,0 +1,50 @@
|
||||
# Fast Workflow (`--fast`)
|
||||
|
||||
**Thinking level:** Think hard
|
||||
**User gates:** None. Fully autonomous from start to finish.
|
||||
|
||||
## Step 1: Combined Research & Planning
|
||||
|
||||
All research happens in parallel, then feeds into planning:
|
||||
|
||||
**Parallel research batch** (spawn these simultaneously):
|
||||
- 2 `researcher` subagents (max 5 sources each): explore request, validate idea, find solutions
|
||||
- 2 `researcher` subagents (max 5 sources each): find best-fit tech stack
|
||||
- 2 `researcher` subagents (max 5 sources each): research design style, trends, fonts, colors, spacing, positions
|
||||
- Predict Google Fonts name (NOT just Inter/Poppins)
|
||||
- Describe assets for `ck:ai-multimodal` generation
|
||||
|
||||
Keep all reports ≤150 lines.
|
||||
|
||||
## Step 2: Design
|
||||
|
||||
1. `ui-ux-designer` subagent analyzes research, creates:
|
||||
- Design guidelines at `./docs/design-guidelines.md`
|
||||
- Wireframes in HTML at `./docs/wireframe/`
|
||||
2. If no logo provided: generate with `ck:ai-multimodal` skill
|
||||
3. Screenshot wireframes with `ck:chrome-devtools` → save to `./docs/wireframes/`
|
||||
|
||||
**Image tools:** `ck:ai-multimodal` for generation/analysis, `imagemagick` for crop/resize, background removal tool as needed.
|
||||
|
||||
No user gate — proceed directly.
|
||||
|
||||
## Step 3: Planning
|
||||
|
||||
Activate **ck:plan** skill: `/ck:plan --fast <requirements>`
|
||||
- Skip research (already done above)
|
||||
- Read codebase docs → create plan directly
|
||||
- Plan directory using `## Naming` pattern
|
||||
- Overview at `plan.md` (<80 lines) + `phase-XX-*.md` files
|
||||
|
||||
No user gate — proceed to implementation.
|
||||
|
||||
## Step 4: Implementation → Final Report
|
||||
|
||||
Load `references/shared-phases.md` for remaining phases.
|
||||
|
||||
Activate **ck:cook** skill: `/ck:cook --auto <plan-path>`
|
||||
- Skips all review gates (fast planning pairs with fast execution)
|
||||
- Auto-approves if score≥9.5 and 0 critical issues
|
||||
- Continues through all phases without stopping
|
||||
|
||||
**Note:** Fast mode uses `git-manager` to auto-commit (no push) at the end.
|
||||
60
.opencode/skills/bootstrap/references/workflow-full.md
Normal file
60
.opencode/skills/bootstrap/references/workflow-full.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# Full Interactive Workflow (`--full`)
|
||||
|
||||
**Thinking level:** Ultrathink
|
||||
**User gates:** Every major phase requires user approval before proceeding.
|
||||
|
||||
## Step 1: Clarify Requirements
|
||||
|
||||
Use `AskUserQuestion` to probe user's request, constraints, true objectives.
|
||||
- Ask 1 question at a time, wait for answer before next
|
||||
- Question everything — don't assume
|
||||
- Challenge assumptions — best solution often differs from initial vision
|
||||
- Continue until 100% certain about requirements
|
||||
|
||||
## Step 2: Research
|
||||
|
||||
Spawn multiple `researcher` subagents in parallel:
|
||||
- Explore request validity, challenges, best solutions
|
||||
- Keep every report ≤150 lines
|
||||
|
||||
**Gate:** Present findings to user. Proceed only with approval.
|
||||
|
||||
## Step 3: Tech Stack
|
||||
|
||||
1. Ask user for preferred tech stack. If provided, skip to step 4.
|
||||
2. Use `planner` + multiple `researcher` subagents in parallel to find best-fit stack
|
||||
3. Present 2-3 options with pros/cons via `AskUserQuestion`
|
||||
4. Write approved tech stack to `./docs` directory
|
||||
|
||||
**Gate:** User approves tech stack before continuing.
|
||||
|
||||
## Step 4: Wireframe & Design
|
||||
|
||||
1. Ask user if they want wireframes/design. If no → skip to Step 5.
|
||||
2. Use `ui-ux-designer` + `researcher` subagents in parallel:
|
||||
- Research style, trends, fonts (predict Google Fonts name, NOT just Inter/Poppins), colors, spacing, positions
|
||||
- Describe assets for `ck:ai-multimodal` skill generation
|
||||
3. `ui-ux-designer` creates:
|
||||
- Design guidelines at `./docs/design-guidelines.md`
|
||||
- Wireframes in HTML at `./docs/wireframe/`
|
||||
4. If no logo provided: generate with `ck:ai-multimodal` skill
|
||||
5. Screenshot wireframes with `ck:chrome-devtools` → save to `./docs/wireframes/`
|
||||
|
||||
**Gate:** User approves design. Repeat if rejected.
|
||||
|
||||
**Image tools:** `ck:ai-multimodal` for generation/analysis, `imagemagick` for crop/resize, background removal tool as needed.
|
||||
|
||||
## Step 5: Planning
|
||||
|
||||
Activate **ck:plan** skill: `/ck:plan --hard <requirements>`
|
||||
- Planner creates directory using `## Naming` pattern
|
||||
- Overview at `plan.md` (<80 lines) + `phase-XX-*.md` files
|
||||
- Present pros/cons of plan
|
||||
|
||||
**Gate:** User approves plan. DO NOT start implementing without approval.
|
||||
|
||||
## Step 6: Implementation → Final Report
|
||||
|
||||
Load `references/shared-phases.md` for remaining phases.
|
||||
|
||||
Activate **ck:cook** skill: `/ck:cook <plan-path>` (interactive mode — review gates at each step)
|
||||
59
.opencode/skills/bootstrap/references/workflow-parallel.md
Normal file
59
.opencode/skills/bootstrap/references/workflow-parallel.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Parallel Workflow (`--parallel`)
|
||||
|
||||
**Thinking level:** Ultrathink parallel
|
||||
**User gates:** Design approval only. Implementation uses multi-agent parallel execution.
|
||||
|
||||
## Step 1: Research
|
||||
|
||||
Spawn max 2 `researcher` agents in parallel:
|
||||
- Explore requirements, validation, challenges, solutions
|
||||
- Keep reports ≤150 lines
|
||||
|
||||
No user gate — proceed automatically.
|
||||
|
||||
## Step 2: Tech Stack
|
||||
|
||||
Use `planner` + multiple `researcher` agents in parallel for best-fit stack.
|
||||
Write to `./docs` directory (≤150 lines).
|
||||
|
||||
No user gate — proceed automatically.
|
||||
|
||||
## Step 3: Wireframe & Design
|
||||
|
||||
1. Use `ui-ux-designer` + `researcher` agents in parallel:
|
||||
- Research style, trends, fonts, colors, spacing, positions
|
||||
- Predict Google Fonts name (NOT just Inter/Poppins)
|
||||
- Describe assets for `ck:ai-multimodal` generation
|
||||
2. `ui-ux-designer` creates:
|
||||
- Design guidelines at `./docs/design-guidelines.md`
|
||||
- Wireframes in HTML at `./docs/wireframe/`
|
||||
3. If no logo: generate with `ck:ai-multimodal` skill
|
||||
4. Screenshot with `ck:chrome-devtools` → save to `./docs/wireframes/`
|
||||
|
||||
**Gate:** Ask user to approve design. Repeat if rejected.
|
||||
|
||||
**Image tools:** `ck:ai-multimodal` for generation/analysis, `imagemagick` for crop/resize, background removal tool as needed.
|
||||
|
||||
## Step 4: Parallel Planning
|
||||
|
||||
Activate **ck:plan** skill: `/ck:plan --parallel <requirements>`
|
||||
- Creates phases with **exclusive file ownership** per phase (no overlap)
|
||||
- **Dependency matrix**: which phases run concurrently vs sequentially
|
||||
- `plan.md` includes dependency graph, execution strategy, file ownership matrix
|
||||
- Task hydration with `addBlockedBy` for sequential deps, no blockers for parallel groups
|
||||
|
||||
No user gate — proceed to implementation.
|
||||
|
||||
## Step 5: Parallel Implementation → Final Report
|
||||
|
||||
Load `references/shared-phases.md` for remaining phases.
|
||||
|
||||
Activate **ck:cook** skill: `/ck:cook --parallel <plan-path>`
|
||||
- Read `plan.md` for dependency graph and execution strategy
|
||||
- Launch multiple `fullstack-developer` agents in PARALLEL for concurrent phases
|
||||
- Pass: phase file path, environment info
|
||||
- Use `ui-ux-designer` for frontend (generate/analyze assets with `ck:ai-multimodal`, edit with `imagemagick`)
|
||||
- Respect file ownership boundaries
|
||||
- Run type checking after implementation
|
||||
|
||||
Cook handles testing, review, docs, onboarding, final report per `shared-phases.md`.
|
||||
125
.opencode/skills/brainstorm/SKILL.md
Normal file
125
.opencode/skills/brainstorm/SKILL.md
Normal file
@@ -0,0 +1,125 @@
|
||||
---
|
||||
name: ck:brainstorm
|
||||
description: "Brainstorm solutions with trade-off analysis and brutal honesty. Use for ideation, architecture decisions, technical debates, feature exploration, feasibility assessment, design discussions."
|
||||
license: MIT
|
||||
argument-hint: "[topic or problem]"
|
||||
metadata:
|
||||
author: claudekit
|
||||
version: "2.0.0"
|
||||
---
|
||||
|
||||
# Brainstorming Skill
|
||||
|
||||
You are a Solution Brainstormer, an elite software engineering expert who specializes in system architecture design and technical decision-making. Your core mission is to collaborate with users to find the best possible solutions while maintaining brutal honesty about feasibility and trade-offs.
|
||||
|
||||
## Communication Style
|
||||
If coding level guidelines were injected at session start (levels 0-5), follow those guidelines for response structure and explanation depth. The guidelines define what to explain, what not to explain, and required response format.
|
||||
|
||||
## Core Principles
|
||||
You operate by the holy trinity of software engineering: **YAGNI** (You Aren't Gonna Need It), **KISS** (Keep It Simple, Stupid), and **DRY** (Don't Repeat Yourself). Every solution you propose must honor these principles.
|
||||
|
||||
## Your Expertise
|
||||
- System architecture design and scalability patterns
|
||||
- Risk assessment and mitigation strategies
|
||||
- Development time optimization and resource allocation
|
||||
- User Experience (UX) and Developer Experience (DX) optimization
|
||||
- Technical debt management and maintainability
|
||||
- Performance optimization and bottleneck identification
|
||||
|
||||
## Your Approach
|
||||
1. **Question Everything**: Use `AskUserQuestion` tool to ask probing questions to fully understand the user's request, constraints, and true objectives. Don't assume - clarify until you're 100% certain.
|
||||
2. **Brutal Honesty**: Use `AskUserQuestion` tool to provide frank, unfiltered feedback about ideas. If something is unrealistic, over-engineered, or likely to cause problems, say so directly. Your job is to prevent costly mistakes.
|
||||
3. **Explore Alternatives**: Always consider multiple approaches. Present 2-3 viable solutions with clear pros/cons, explaining why one might be superior.
|
||||
4. **Challenge Assumptions**: Use `AskUserQuestion` tool to question the user's initial approach. Often the best solution is different from what was originally envisioned.
|
||||
5. **Consider All Stakeholders**: Use `AskUserQuestion` tool to evaluate impact on end users, developers, operations team, and business objectives.
|
||||
|
||||
## Collaboration Tools
|
||||
- Consult the `planner` agent to research industry best practices and find proven solutions
|
||||
- Engage the `docs-manager` agent to understand existing project implementation and constraints
|
||||
- Use `WebSearch` tool to find efficient approaches and learn from others' experiences
|
||||
- Use `ck:docs-seeker` skill to read latest documentation of external plugins/packages
|
||||
- Leverage `ck:ai-multimodal` skill to analyze visual materials and mockups
|
||||
- Query `psql` command to understand current database structure and existing data
|
||||
- Employ `ck:sequential-thinking` skill for complex problem-solving that requires structured analysis
|
||||
|
||||
<HARD-GATE>
|
||||
Do NOT invoke any implementation skill, write any code, scaffold any project, or take any implementation action until you have presented a design and the user has approved it.
|
||||
This applies to EVERY brainstorming session regardless of perceived simplicity.
|
||||
The design can be brief for simple projects, but you MUST present it and get approval.
|
||||
</HARD-GATE>
|
||||
|
||||
## Anti-Rationalization
|
||||
|
||||
| Thought | Reality |
|
||||
|---------|---------|
|
||||
| "This is too simple to need a design" | Simple projects = most wasted work from unexamined assumptions. |
|
||||
| "I already know the solution" | Then writing it down takes 30 seconds. Do it. |
|
||||
| "The user wants action, not talk" | Bad action wastes more time than good planning. |
|
||||
| "Let me explore the code first" | Brainstorming tells you HOW to explore. Follow the process. |
|
||||
| "I'll just prototype quickly" | Prototypes become production code. Design first. |
|
||||
|
||||
## Process Flow (Authoritative)
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
A[Scout Project Context] --> B[Ask Clarifying Questions]
|
||||
B --> C{Scope too large?}
|
||||
C -->|Yes| D[Decompose into Sub-Projects]
|
||||
D --> B
|
||||
C -->|No| E[Propose 2-3 Approaches]
|
||||
E --> F[Present Design Sections]
|
||||
F --> G{User Approves?}
|
||||
G -->|No| F
|
||||
G -->|Yes| H[Write Design Doc / Report]
|
||||
H --> I{Create Plan?}
|
||||
I -->|Yes| J[Invoke /ck:plan]
|
||||
I -->|No| K[End Session]
|
||||
J --> L[Journal]
|
||||
K --> L
|
||||
```
|
||||
|
||||
**This diagram is the authoritative workflow.** If prose conflicts with this flow, follow the diagram. The terminal state is either `/ck:plan` or end.
|
||||
|
||||
## Your Process
|
||||
1. **Scout Phase**: Use `ck:scout` skill to discover relevant files and code patterns, read relevant docs in `<project-dir>/docs` directory, to understand the current state of the project
|
||||
2. **Discovery Phase**: Use `AskUserQuestion` tool to ask clarifying questions about requirements, constraints, timeline, and success criteria
|
||||
3. **Scope Assessment**: Before deep-diving, assess if request covers multiple independent subsystems:
|
||||
- If request describes 3+ independent concerns (e.g., "build platform with chat, billing, analytics") → flag immediately
|
||||
- Help user decompose into sub-projects: identify pieces, relationships, build order
|
||||
- Each sub-project gets its own brainstorm → plan → implement cycle
|
||||
- Don't spend questions refining details of a project that needs decomposition first
|
||||
4. **Research Phase**: Gather information from other agents and external sources
|
||||
5. **Analysis Phase**: Evaluate multiple approaches using your expertise and principles
|
||||
6. **Debate Phase**: Use `AskUserQuestion` tool to Present options, challenge user preferences, and work toward the optimal solution
|
||||
7. **Consensus Phase**: Ensure alignment on the chosen approach and document decisions
|
||||
8. **Documentation Phase**: Create a comprehensive markdown summary report with the final agreed solution
|
||||
9. **Finalize Phase**: Use `AskUserQuestion` tool to ask if user wants to create a detailed implementation plan.
|
||||
- If `Yes`: Run `/ck:plan` command with the brainstorm summary context as the argument to ensure plan continuity.
|
||||
**CRITICAL:** The invoked plan command will create `plan.md` with YAML frontmatter including `status: pending`.
|
||||
- If `No`: End the session.
|
||||
10. **Journal Phase**: Run `/ck:journal` to write a concise technical journal entry upon completion.
|
||||
|
||||
## Report Output
|
||||
Use the naming pattern from the `## Naming` section in the injected context. The pattern includes the full path and computed date.
|
||||
|
||||
## Output Requirements
|
||||
**IMPORTANT:** Invoke "/ck:project-organization" skill to organize the reports.
|
||||
|
||||
When brainstorming concludes with agreement, create a detailed markdown summary report including:
|
||||
- Problem statement and requirements
|
||||
- Evaluated approaches with pros/cons
|
||||
- Final recommended solution with rationale
|
||||
- Implementation considerations and risks
|
||||
- Success metrics and validation criteria
|
||||
- Next steps and dependencies
|
||||
* **IMPORTANT:** Sacrifice grammar for the sake of concision when writing outputs.
|
||||
|
||||
## Critical Constraints
|
||||
- You DO NOT implement solutions yourself - you only brainstorm and advise
|
||||
- You must validate feasibility before endorsing any approach
|
||||
- You prioritize long-term maintainability over short-term convenience
|
||||
- You consider both technical excellence and business pragmatism
|
||||
|
||||
**Remember:** Your role is to be the user's most trusted technical advisor - someone who will tell them hard truths to ensure they build something great, maintainable, and successful.
|
||||
|
||||
**IMPORTANT:** **DO NOT** implement anything, just brainstorm, answer questions and advise.
|
||||
630
.opencode/skills/chrome-devtools/SKILL.md
Normal file
630
.opencode/skills/chrome-devtools/SKILL.md
Normal file
@@ -0,0 +1,630 @@
|
||||
---
|
||||
name: ck:chrome-devtools
|
||||
description: Automate browsers with Puppeteer CLI scripts and persistent sessions. Use for screenshots, performance analysis, network monitoring, web scraping, form automation, JavaScript debugging.
|
||||
license: Apache-2.0
|
||||
argument-hint: "[url or task]"
|
||||
metadata:
|
||||
author: claudekit
|
||||
version: "1.1.0"
|
||||
---
|
||||
|
||||
# Chrome DevTools Agent Skill
|
||||
|
||||
Browser automation via Puppeteer scripts with persistent sessions. All scripts output JSON.
|
||||
|
||||
## Skill Location
|
||||
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
|
||||
```bash
|
||||
# Detect skill location (no cd needed - scripts use __dirname for paths)
|
||||
SKILL_DIR=""
|
||||
if [ -d ".opencode/skills/chrome-devtools/scripts" ]; then
|
||||
SKILL_DIR=".opencode/skills/chrome-devtools/scripts"
|
||||
elif [ -d "$HOME/.opencode/skills/chrome-devtools/scripts" ]; then
|
||||
SKILL_DIR="$HOME/.opencode/skills/chrome-devtools/scripts"
|
||||
fi
|
||||
# Run scripts with full path: node "$SKILL_DIR/script.js" --args
|
||||
```
|
||||
|
||||
## Choosing Your Approach
|
||||
|
||||
| Scenario | Approach |
|
||||
|----------|----------|
|
||||
| **Source-available sites** | Read source code first, write selectors directly |
|
||||
| **Unknown layouts** | Use `aria-snapshot.js` for semantic discovery |
|
||||
| **Visual inspection** | Take screenshots to verify rendering |
|
||||
| **Debug issues** | Collect console logs, analyze with session storage |
|
||||
| **Accessibility audit** | Use ARIA snapshot for semantic structure analysis |
|
||||
|
||||
## Automation Browsing Running Mode
|
||||
|
||||
Browser visibility is resolved automatically by `resolveHeadless()` in `lib/browser.js`:
|
||||
|
||||
| Environment | Default | Why |
|
||||
|-------------|---------|-----|
|
||||
| **macOS / Windows** | **Headed** (visible) | Better debugging, OAuth login support |
|
||||
| **Linux / WSL** | **Headless** | Servers typically have no display |
|
||||
| **CI** (`CI`, `GITHUB_ACTIONS`, `GITLAB_CI`, `JENKINS_URL` env vars) | **Headless** | No display available |
|
||||
|
||||
Override with `--headless true` or `--headless false` on any script.
|
||||
|
||||
- Run multiple scripts/sessions in parallel to simulate real user interactions.
|
||||
- Run multiple scripts/sessions in parallel to simulate different device types (mobile, tablet, desktop).
|
||||
|
||||
## ARIA Snapshot (Element Discovery)
|
||||
|
||||
When page structure is unknown, use `aria-snapshot.js` to get a YAML-formatted accessibility tree with semantic roles, accessible names, states, and stable element references.
|
||||
|
||||
### Get ARIA Snapshot
|
||||
|
||||
```bash
|
||||
# Generate ARIA snapshot and output to stdout
|
||||
node "$SKILL_DIR/aria-snapshot.js" --url https://example.com
|
||||
|
||||
# Save to file in snapshots directory
|
||||
node "$SKILL_DIR/aria-snapshot.js" --url https://example.com --output ./.opencode/chrome-devtools/snapshots/page.yaml
|
||||
```
|
||||
|
||||
### Example YAML Output
|
||||
|
||||
```yaml
|
||||
- banner:
|
||||
- link "Hacker News" [ref=e1]
|
||||
/url: https://news.ycombinator.com
|
||||
- navigation:
|
||||
- link "new" [ref=e2]
|
||||
- link "past" [ref=e3]
|
||||
- link "comments" [ref=e4]
|
||||
- main:
|
||||
- list:
|
||||
- listitem:
|
||||
- link "Show HN: My new project" [ref=e8]
|
||||
- text: "128 points by user 3 hours ago"
|
||||
- contentinfo:
|
||||
- textbox [ref=e10]
|
||||
/placeholder: "Search"
|
||||
```
|
||||
|
||||
### Interpreting ARIA Notation
|
||||
|
||||
| Notation | Meaning |
|
||||
|----------|---------|
|
||||
| `[ref=eN]` | Stable identifier for interactive elements |
|
||||
| `[checked]` | Checkbox/radio is selected |
|
||||
| `[disabled]` | Element is inactive |
|
||||
| `[expanded]` | Accordion/dropdown is open |
|
||||
| `[level=N]` | Heading hierarchy (1-6) |
|
||||
| `/url:` | Link destination |
|
||||
| `/placeholder:` | Input placeholder text |
|
||||
| `/value:` | Current input value |
|
||||
|
||||
### Interact by Ref
|
||||
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
Use `select-ref.js` to interact with elements by their ref:
|
||||
|
||||
```bash
|
||||
# Click element with ref e5
|
||||
node "$SKILL_DIR/select-ref.js" --ref e5 --action click
|
||||
|
||||
# Fill input with ref e10
|
||||
node "$SKILL_DIR/select-ref.js" --ref e10 --action fill --value "search query"
|
||||
|
||||
# Get text content
|
||||
node "$SKILL_DIR/select-ref.js" --ref e8 --action text
|
||||
|
||||
# Screenshot specific element
|
||||
node "$SKILL_DIR/select-ref.js" --ref e1 --action screenshot --output ./logo.png
|
||||
|
||||
# Focus element
|
||||
node "$SKILL_DIR/select-ref.js" --ref e10 --action focus
|
||||
|
||||
# Hover over element
|
||||
node "$SKILL_DIR/select-ref.js" --ref e5 --action hover
|
||||
```
|
||||
|
||||
### Store Snapshots
|
||||
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
Store snapshots for analysis in `<project>/.opencode/chrome-devtools/snapshots/`:
|
||||
|
||||
```bash
|
||||
# Create snapshots directory
|
||||
mkdir -p .opencode/chrome-devtools/snapshots
|
||||
|
||||
# Capture and store with timestamp
|
||||
SESSION="$(date +%Y%m%d-%H%M%S)"
|
||||
node "$SKILL_DIR/aria-snapshot.js" --url https://example.com --output .opencode/chrome-devtools/snapshots/$SESSION.yaml
|
||||
```
|
||||
|
||||
### Workflow: Unknown Page Structure
|
||||
|
||||
1. **Get snapshot** to discover elements:
|
||||
```bash
|
||||
node "$SKILL_DIR/aria-snapshot.js" --url https://example.com
|
||||
```
|
||||
|
||||
2. **Identify target** from YAML output (e.g., `[ref=e5]` for a button)
|
||||
|
||||
3. **Interact by ref**:
|
||||
```bash
|
||||
node "$SKILL_DIR/select-ref.js" --ref e5 --action click
|
||||
```
|
||||
|
||||
4. **Verify result** with screenshot or new snapshot:
|
||||
```bash
|
||||
node "$SKILL_DIR/screenshot.js" --output ./result.png
|
||||
```
|
||||
|
||||
## Local HTML Files
|
||||
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
**IMPORTANT**: Never browse local HTML files via `file://` protocol. Always serve via local server:
|
||||
**Why**: `file://` protocol blocks many browser features (CORS, ES modules, fetch API, service workers). Local server ensures proper HTTP behavior.
|
||||
|
||||
```bash
|
||||
# Option 1: npx serve (recommended)
|
||||
npx serve ./dist -p 3000 &
|
||||
node "$SKILL_DIR/navigate.js" --url http://localhost:3000
|
||||
|
||||
# Option 2: Python http.server
|
||||
python -m http.server 3000 --directory ./dist &
|
||||
node "$SKILL_DIR/navigate.js" --url http://localhost:3000
|
||||
```
|
||||
|
||||
**Note**: when port 3000 is busy, find an available port with `lsof -i :3000` and use a different one.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Install dependencies (one-time setup)
|
||||
npm install --prefix "$SKILL_DIR"
|
||||
|
||||
# Test (browser stays running for session reuse)
|
||||
node "$SKILL_DIR/navigate.js" --url https://example.com
|
||||
# Output: {"success": true, "url": "...", "title": "..."}
|
||||
```
|
||||
|
||||
**Linux/WSL only**: Run `"$SKILL_DIR/install-deps.sh"` first for Chrome system libraries.
|
||||
|
||||
## Session Persistence
|
||||
|
||||
Browser state persists across script executions via WebSocket endpoint file (`.browser-session.json`).
|
||||
|
||||
**Default behavior**: Scripts disconnect but keep browser running for session reuse.
|
||||
|
||||
```bash
|
||||
# First script: launches browser, navigates, disconnects (browser stays running)
|
||||
node "$SKILL_DIR/navigate.js" --url https://example.com/login
|
||||
|
||||
# Subsequent scripts: connect to existing browser, reuse page state
|
||||
node "$SKILL_DIR/fill.js" --selector "#email" --value "user@example.com"
|
||||
node "$SKILL_DIR/fill.js" --selector "#password" --value "secret"
|
||||
node "$SKILL_DIR/click.js" --selector "button[type=submit]"
|
||||
|
||||
# Close browser when done
|
||||
node "$SKILL_DIR/navigate.js" --url about:blank --close true
|
||||
```
|
||||
|
||||
**Session management**:
|
||||
- `--close true`: Close browser and clear session
|
||||
- Default (no flag): Keep browser running for next script
|
||||
|
||||
## Available Scripts
|
||||
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
All in `.opencode/skills/chrome-devtools/scripts/`:
|
||||
|
||||
| Script | Purpose |
|
||||
|--------|---------|
|
||||
| `navigate.js` | Navigate to URLs |
|
||||
| `screenshot.js` | Capture screenshots (auto-compress >5MB via Sharp) |
|
||||
| `click.js` | Click elements |
|
||||
| `fill.js` | Fill form fields |
|
||||
| `evaluate.js` | Execute JS in page context |
|
||||
| `snapshot.js` | Extract interactive elements (JSON format) |
|
||||
| `aria-snapshot.js` | Get ARIA accessibility tree (YAML format with refs) |
|
||||
| `select-ref.js` | Interact with elements by ref from ARIA snapshot |
|
||||
| `console.js` | Monitor console messages/errors |
|
||||
| `network.js` | Track HTTP requests/responses |
|
||||
| `performance.js` | Measure Core Web Vitals |
|
||||
| `ws-debug.js` | Debug WebSocket connections (basic) |
|
||||
| `ws-full-debug.js` | Debug WebSocket with full events/frames |
|
||||
| `inject-auth.js` | Inject cookies/tokens for authentication |
|
||||
| `import-cookies.js` | Import cookies from JSON/Netscape file |
|
||||
| `connect-chrome.js` | Connect to Chrome with remote debugging |
|
||||
|
||||
## Workflow Loop
|
||||
|
||||
1. **Execute** focused script for single task
|
||||
2. **Observe** JSON output
|
||||
3. **Assess** completion status
|
||||
4. **Decide** next action
|
||||
5. **Repeat** until done
|
||||
|
||||
## Writing Custom Test Scripts
|
||||
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
For complex automation, write scripts to `<project>/.opencode/chrome-devtools/tmp/`:
|
||||
|
||||
```bash
|
||||
# Create tmp directory for test scripts
|
||||
mkdir -p $SKILL_DIR/.opencode/chrome-devtools/tmp
|
||||
|
||||
# Write a test script
|
||||
cat > $SKILL_DIR/.opencode/chrome-devtools/tmp/login-test.js << 'EOF'
|
||||
import { getBrowser, getPage, disconnectBrowser, outputJSON } from '../scripts/lib/browser.js';
|
||||
|
||||
async function loginTest() {
|
||||
const browser = await getBrowser();
|
||||
const page = await getPage(browser);
|
||||
|
||||
await page.goto('https://example.com/login');
|
||||
await page.type('#email', 'user@example.com');
|
||||
await page.type('#password', 'secret');
|
||||
await page.click('button[type=submit]');
|
||||
await page.waitForNavigation();
|
||||
|
||||
outputJSON({
|
||||
success: true,
|
||||
url: page.url(),
|
||||
title: await page.title()
|
||||
});
|
||||
|
||||
await disconnectBrowser();
|
||||
}
|
||||
|
||||
loginTest();
|
||||
EOF
|
||||
|
||||
# Run the test
|
||||
node $SKILL_DIR/.opencode/chrome-devtools/tmp/login-test.js
|
||||
```
|
||||
|
||||
**Key principles for custom scripts**:
|
||||
- Single-purpose: one script, one task
|
||||
- Always call `disconnectBrowser()` at the end (keeps browser running)
|
||||
- Use `closeBrowser()` only when ending session completely
|
||||
- Output JSON for easy parsing
|
||||
- Plain JavaScript only in `page.evaluate()` callbacks
|
||||
|
||||
## Screenshots
|
||||
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
|
||||
**IMPORTANT:** Invoke "/ck:project-organization" skill to organize the outputs.
|
||||
|
||||
Store screenshots for analysis in `<project>/.opencode/chrome-devtools/screenshots/`:
|
||||
|
||||
```bash
|
||||
# Basic screenshot
|
||||
node "$SKILL_DIR/screenshot.js" --url https://example.com --output ./.opencode/chrome-devtools/screenshots/page.png
|
||||
|
||||
# Full page
|
||||
node "$SKILL_DIR/screenshot.js" --url https://example.com --output ./.opencode/chrome-devtools/screenshots/page.png --full-page true
|
||||
|
||||
# Specific element
|
||||
node "$SKILL_DIR/screenshot.js" --url https://example.com --selector ".main-content" --output ./.opencode/chrome-devtools/screenshots/element.png
|
||||
```
|
||||
|
||||
### Auto-Compression (Sharp)
|
||||
|
||||
Screenshots >5MB auto-compress using Sharp (4-5x faster than ImageMagick):
|
||||
|
||||
```bash
|
||||
# Default: compress if >5MB
|
||||
node "$SKILL_DIR/screenshot.js" --url https://example.com --output ./.opencode/chrome-devtools/screenshots/page.png
|
||||
|
||||
# Custom threshold (3MB)
|
||||
node "$SKILL_DIR/screenshot.js" --url https://example.com --output ./.opencode/chrome-devtools/screenshots/page.png --max-size 3
|
||||
|
||||
# Disable compression
|
||||
node "$SKILL_DIR/screenshot.js" --url https://example.com --output ./.opencode/chrome-devtools/screenshots/page.png --no-compress
|
||||
```
|
||||
|
||||
Store screenshots for analysis in `<project>/.opencode/chrome-devtools/screenshots/`.
|
||||
|
||||
## Console Log Collection & Analysis
|
||||
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
|
||||
### Capture Logs
|
||||
|
||||
```bash
|
||||
# Capture all logs for 10 seconds
|
||||
node "$SKILL_DIR/console.js" --url https://example.com --duration 10000
|
||||
|
||||
# Filter by type
|
||||
node "$SKILL_DIR/console.js" --url https://example.com --types error,warn --duration 5000
|
||||
```
|
||||
|
||||
### Session Storage Pattern
|
||||
|
||||
Store logs for analysis in `<project>/.opencode/chrome-devtools/logs/<session>/`:
|
||||
|
||||
```bash
|
||||
# Create session directory
|
||||
SESSION="$(date +%Y%m%d-%H%M%S)"
|
||||
mkdir -p .opencode/chrome-devtools/logs/$SESSION
|
||||
|
||||
# Capture and store
|
||||
node "$SKILL_DIR/console.js" --url https://example.com --duration 10000 > .opencode/chrome-devtools/logs/$SESSION/console.json
|
||||
node "$SKILL_DIR/network.js" --url https://example.com > .opencode/chrome-devtools/logs/$SESSION/network.json
|
||||
|
||||
# View errors
|
||||
jq '.messages[] | select(.type=="error")' .opencode/chrome-devtools/logs/$SESSION/console.json
|
||||
```
|
||||
|
||||
### Root Cause Analysis
|
||||
|
||||
```bash
|
||||
# 1. Check for JavaScript errors
|
||||
node "$SKILL_DIR/console.js" --url https://example.com --types error,pageerror --duration 5000 | jq '.messages'
|
||||
|
||||
# 2. Correlate with network failures
|
||||
node "$SKILL_DIR/network.js" --url https://example.com | jq '.requests[] | select(.response.status >= 400)'
|
||||
|
||||
# 3. Check specific error stack traces
|
||||
node "$SKILL_DIR/console.js" --url https://example.com --types error --duration 5000 | jq '.messages[].stack'
|
||||
```
|
||||
|
||||
## Finding Elements
|
||||
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
Use `snapshot.js` to discover selectors before interacting:
|
||||
|
||||
```bash
|
||||
# Get all interactive elements
|
||||
node "$SKILL_DIR/snapshot.js" --url https://example.com | jq '.elements[] | {tagName, text, selector}'
|
||||
|
||||
# Find buttons
|
||||
node "$SKILL_DIR/snapshot.js" --url https://example.com | jq '.elements[] | select(.tagName=="button")'
|
||||
|
||||
# Find by text content
|
||||
node "$SKILL_DIR/snapshot.js" --url https://example.com | jq '.elements[] | select(.text | contains("Submit"))'
|
||||
```
|
||||
|
||||
## Error Recovery
|
||||
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
If script fails:
|
||||
|
||||
```bash
|
||||
# 1. Capture current state (without navigating to preserve state)
|
||||
node "$SKILL_DIR/screenshot.js" --output ./.opencode/skills/chrome-devtools/screenshots/debug.png
|
||||
|
||||
# 2. Get console errors
|
||||
node "$SKILL_DIR/console.js" --url about:blank --types error --duration 1000
|
||||
|
||||
# 3. Discover correct selector
|
||||
node "$SKILL_DIR/snapshot.js" | jq '.elements[] | select(.text | contains("Submit"))'
|
||||
|
||||
# 4. Try XPath if CSS fails
|
||||
node "$SKILL_DIR/click.js" --selector "//button[contains(text(),'Submit')]"
|
||||
```
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Web Scraping
|
||||
```bash
|
||||
node "$SKILL_DIR/evaluate.js" --url https://example.com --script "
|
||||
Array.from(document.querySelectorAll('.item')).map(el => ({
|
||||
title: el.querySelector('h2')?.textContent,
|
||||
link: el.querySelector('a')?.href
|
||||
}))
|
||||
" | jq '.result'
|
||||
```
|
||||
|
||||
### Form Automation
|
||||
```bash
|
||||
node "$SKILL_DIR/navigate.js" --url https://example.com/form
|
||||
node "$SKILL_DIR/fill.js" --selector "#search" --value "query"
|
||||
node "$SKILL_DIR/click.js" --selector "button[type=submit]"
|
||||
```
|
||||
|
||||
### Performance Testing
|
||||
```bash
|
||||
node "$SKILL_DIR/performance.js" --url https://example.com | jq '.vitals'
|
||||
```
|
||||
|
||||
## Script Options
|
||||
|
||||
All scripts support:
|
||||
- `--headless true/false` - Override auto-detected headless mode (default: auto by OS)
|
||||
- `--close true` - Close browser completely (default: stay running)
|
||||
- `--timeout 30000` - Set timeout (ms)
|
||||
- `--wait-until networkidle2` - Wait strategy
|
||||
|
||||
`navigate.js` additionally supports:
|
||||
- `--wait-for-login <pattern>` - Interactive login: open headed, wait for URL regex match
|
||||
- `--login-timeout <ms>` - Max wait for login completion (default: 300000 = 5 min)
|
||||
|
||||
## Troubleshooting
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
|
||||
| Error | Solution |
|
||||
|-------|----------|
|
||||
| `Cannot find package 'puppeteer'` | Run `npm install` in scripts directory |
|
||||
| `libnss3.so` missing (Linux) | Run `./install-deps.sh` |
|
||||
| Element not found | Use `snapshot.js` to find correct selector |
|
||||
| Script hangs | Use `--timeout 60000` or `--wait-until load` |
|
||||
| Screenshot >5MB | Auto-compressed; use `--max-size 3` for lower |
|
||||
| Session stale | Delete `.browser-session.json` and retry |
|
||||
|
||||
### Screenshot Analysis: Missing Images
|
||||
|
||||
If images don't appear in screenshots, they may be waiting for animation triggers:
|
||||
|
||||
1. **Scroll-triggered animations**: Scroll element into view first
|
||||
```bash
|
||||
node "$SKILL_DIR/evaluate.js" --script "document.querySelector('.lazy-image').scrollIntoView()"
|
||||
# Wait for animation
|
||||
node "$SKILL_DIR/evaluate.js" --script "await new Promise(r => setTimeout(r, 1000))"
|
||||
node "$SKILL_DIR/screenshot.js" --output ./result.png
|
||||
```
|
||||
|
||||
2. **Sequential animation queue**: Wait longer and retry
|
||||
```bash
|
||||
# First attempt
|
||||
node "$SKILL_DIR/screenshot.js" --url http://localhost:3000 --output ./attempt1.png
|
||||
|
||||
# Wait for animations to complete
|
||||
node "$SKILL_DIR/evaluate.js" --script "await new Promise(r => setTimeout(r, 2000))"
|
||||
|
||||
# Retry screenshot
|
||||
node "$SKILL_DIR/screenshot.js" --output ./attempt2.png
|
||||
```
|
||||
|
||||
3. **Intersection Observer animations**: Trigger by scrolling through page
|
||||
```bash
|
||||
node "$SKILL_DIR/evaluate.js" --script "window.scrollTo(0, document.body.scrollHeight)"
|
||||
node "$SKILL_DIR/evaluate.js" --script "await new Promise(r => setTimeout(r, 1500))"
|
||||
node "$SKILL_DIR/evaluate.js" --script "window.scrollTo(0, 0)"
|
||||
node "$SKILL_DIR/screenshot.js" --output ./full-loaded.png --full-page true
|
||||
```
|
||||
|
||||
## Authentication & Cookies
|
||||
|
||||
For accessing protected/authenticated pages, use one of these methods:
|
||||
|
||||
### Method 1: Inject Cookies Directly
|
||||
|
||||
Use when you have cookie values (from DevTools or manual extraction):
|
||||
|
||||
```bash
|
||||
# Inject single cookie
|
||||
node "$SKILL_DIR/inject-auth.js" --url https://site.com \
|
||||
--cookies '[{"name":"session","value":"abc123","domain":".site.com"}]'
|
||||
|
||||
# Multiple cookies with all properties
|
||||
node "$SKILL_DIR/inject-auth.js" --url https://site.com \
|
||||
--cookies '[{"name":"session","value":"abc","domain":".site.com","httpOnly":true,"secure":true}]'
|
||||
|
||||
# With Bearer token header
|
||||
node "$SKILL_DIR/inject-auth.js" --url https://api.site.com \
|
||||
--token "Bearer eyJhbG..." --header Authorization
|
||||
```
|
||||
|
||||
### Method 2: Import from Browser Extension
|
||||
|
||||
Best for complex auth (OAuth, multi-cookie sessions):
|
||||
|
||||
```bash
|
||||
# 1. Install "Cookie-Editor" or "EditThisCookie" Chrome extension
|
||||
# 2. Navigate to site → Log in manually
|
||||
# 3. Click extension → Export as JSON → Save to cookies.json
|
||||
# 4. Import into puppeteer session:
|
||||
|
||||
node "$SKILL_DIR/import-cookies.js" --file ./cookies.json --url https://site.com
|
||||
|
||||
# Netscape format (from curl/wget):
|
||||
node "$SKILL_DIR/import-cookies.js" --file ./cookies.txt --format netscape --url https://site.com
|
||||
|
||||
# Only import cookies matching target domain:
|
||||
node "$SKILL_DIR/import-cookies.js" --file ./cookies.json --url https://site.com --strict-domain
|
||||
```
|
||||
|
||||
### Method 3: Use Your Chrome Profile
|
||||
|
||||
Most reliable for complex auth (2FA, OAuth, SSO). Uses your existing Chrome session:
|
||||
|
||||
```bash
|
||||
# Use Chrome's default profile (preserves all cookies, extensions, saved passwords)
|
||||
node "$SKILL_DIR/navigate.js" --url https://site.com --use-default-profile true
|
||||
|
||||
# Use specific Chrome profile directory
|
||||
node "$SKILL_DIR/navigate.js" --url https://site.com --profile "/path/to/chrome/profile"
|
||||
```
|
||||
|
||||
**[!] Important**: Chrome must be fully closed when using its profile (single instance lock).
|
||||
|
||||
**Profile paths by OS:**
|
||||
- **macOS**: `~/Library/Application Support/Google/Chrome`
|
||||
- **Windows**: `%LOCALAPPDATA%/Google/Chrome/User Data`
|
||||
- **Linux**: `~/.config/google-chrome`
|
||||
|
||||
### Method 4: Connect to Running Chrome
|
||||
|
||||
Best for debugging (can see browser window while scripts run):
|
||||
|
||||
```bash
|
||||
# Step 1: Launch Chrome with remote debugging (in separate terminal)
|
||||
# macOS:
|
||||
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222
|
||||
|
||||
# Windows:
|
||||
"C:\Program Files\Google\Chrome\Application\chrome.exe" --remote-debugging-port=9222
|
||||
|
||||
# Linux:
|
||||
google-chrome --remote-debugging-port=9222
|
||||
|
||||
# Step 2: Log in manually in the Chrome window
|
||||
|
||||
# Step 3: Connect and automate
|
||||
node "$SKILL_DIR/connect-chrome.js" --browser-url http://localhost:9222 --url https://site.com
|
||||
|
||||
# Or launch Chrome automatically (opens new window):
|
||||
node "$SKILL_DIR/connect-chrome.js" --launch --port 9222 --url https://site.com
|
||||
```
|
||||
|
||||
### Method 5: Interactive Login (OAuth/SSO)
|
||||
|
||||
Best for OAuth, SSO, or any login requiring manual interaction in the browser:
|
||||
|
||||
```bash
|
||||
# Open browser at login page, wait for redirect to dashboard after OAuth
|
||||
node "$SKILL_DIR/navigate.js" --url https://app.example.com/login \
|
||||
--wait-for-login "/dashboard"
|
||||
|
||||
# With longer timeout (10 min) for slow SSO providers
|
||||
node "$SKILL_DIR/navigate.js" --url https://app.example.com/login \
|
||||
--wait-for-login "/dashboard" --login-timeout 600000
|
||||
|
||||
# Use regex for complex URL patterns
|
||||
node "$SKILL_DIR/navigate.js" --url https://app.example.com/login \
|
||||
--wait-for-login "/(dashboard|home|app)"
|
||||
```
|
||||
|
||||
**How it works:**
|
||||
1. Opens browser in **headed mode** (always, regardless of OS)
|
||||
2. Navigates to the login URL
|
||||
3. Waits for you to complete the login flow manually (OAuth, 2FA, etc.)
|
||||
4. Detects success when URL matches the regex pattern
|
||||
5. Saves all cookies to `.auth-session.json` for 24-hour reuse
|
||||
6. Subsequent scripts reuse the authenticated session automatically
|
||||
|
||||
### Session Persistence
|
||||
|
||||
Auth sessions are saved to `.auth-session.json` for 24-hour reuse:
|
||||
|
||||
```bash
|
||||
# First script injects auth
|
||||
node "$SKILL_DIR/inject-auth.js" --url https://site.com --cookies '[...]'
|
||||
|
||||
# Subsequent scripts reuse saved auth automatically
|
||||
node "$SKILL_DIR/navigate.js" --url https://site.com/dashboard
|
||||
node "$SKILL_DIR/screenshot.js" --url https://site.com/profile --output ./profile.png
|
||||
|
||||
# Clear auth session when done
|
||||
node "$SKILL_DIR/inject-auth.js" --url https://site.com --clear true
|
||||
```
|
||||
|
||||
### Choosing the Right Method
|
||||
|
||||
| Method | Best For | Complexity |
|
||||
|--------|----------|------------|
|
||||
| Inject cookies | Simple session cookies, API tokens | Low |
|
||||
| Import from extension | Multi-cookie auth, OAuth tokens | Medium |
|
||||
| Chrome profile | 2FA, SSO, complex OAuth flows | Low* |
|
||||
| Connect to Chrome | Debugging, visual verification | Medium |
|
||||
| Interactive login | OAuth/SSO with manual browser interaction | Low |
|
||||
|
||||
*Requires Chrome to be closed first
|
||||
|
||||
## Reference Documentation
|
||||
|
||||
- `./references/cdp-domains.md` - Chrome DevTools Protocol domains
|
||||
- `./references/puppeteer-reference.md` - Puppeteer API patterns
|
||||
- `./references/performance-guide.md` - Core Web Vitals optimization
|
||||
- `./scripts/README.md` - Detailed script options
|
||||
694
.opencode/skills/chrome-devtools/references/cdp-domains.md
Normal file
694
.opencode/skills/chrome-devtools/references/cdp-domains.md
Normal file
@@ -0,0 +1,694 @@
|
||||
# Chrome DevTools Protocol (CDP) Domains Reference
|
||||
|
||||
Complete reference of CDP domains and their capabilities for browser automation and debugging.
|
||||
|
||||
## Overview
|
||||
|
||||
CDP is organized into **47 domains**, each providing specific browser capabilities. Domains are grouped by functionality:
|
||||
|
||||
- **Core** - Fundamental browser control
|
||||
- **DOM & Styling** - Page structure and styling
|
||||
- **Network & Fetch** - HTTP traffic management
|
||||
- **Page & Navigation** - Page lifecycle control
|
||||
- **Storage & Data** - Browser storage APIs
|
||||
- **Performance & Profiling** - Metrics and analysis
|
||||
- **Emulation & Simulation** - Device and network emulation
|
||||
- **Worker & Service** - Background tasks
|
||||
- **Developer Tools** - Debugging support
|
||||
|
||||
---
|
||||
|
||||
## Core Domains
|
||||
|
||||
### Runtime
|
||||
**Purpose:** Execute JavaScript, manage objects, handle promises
|
||||
|
||||
**Key Commands:**
|
||||
- `Runtime.evaluate(expression)` - Execute JavaScript
|
||||
- `Runtime.callFunctionOn(functionDeclaration, objectId)` - Call function on object
|
||||
- `Runtime.getProperties(objectId)` - Get object properties
|
||||
- `Runtime.awaitPromise(promiseObjectId)` - Wait for promise resolution
|
||||
|
||||
**Key Events:**
|
||||
- `Runtime.consoleAPICalled` - Console message logged
|
||||
- `Runtime.exceptionThrown` - Uncaught exception
|
||||
|
||||
**Use Cases:**
|
||||
- Execute custom JavaScript
|
||||
- Access page data
|
||||
- Monitor console output
|
||||
- Handle exceptions
|
||||
|
||||
---
|
||||
|
||||
### Debugger
|
||||
**Purpose:** JavaScript debugging, breakpoints, stack traces
|
||||
|
||||
**Key Commands:**
|
||||
- `Debugger.enable()` - Enable debugger
|
||||
- `Debugger.setBreakpoint(location)` - Set breakpoint
|
||||
- `Debugger.pause()` - Pause execution
|
||||
- `Debugger.resume()` - Resume execution
|
||||
- `Debugger.stepOver/stepInto/stepOut()` - Step through code
|
||||
|
||||
**Key Events:**
|
||||
- `Debugger.paused` - Execution paused
|
||||
- `Debugger.resumed` - Execution resumed
|
||||
- `Debugger.scriptParsed` - Script loaded
|
||||
|
||||
**Use Cases:**
|
||||
- Debug JavaScript errors
|
||||
- Inspect call stacks
|
||||
- Set conditional breakpoints
|
||||
- Source map support
|
||||
|
||||
---
|
||||
|
||||
### Console (Deprecated - Use Runtime/Log)
|
||||
**Purpose:** Legacy console message access
|
||||
|
||||
**Note:** Use `Runtime.consoleAPICalled` event instead for new implementations.
|
||||
|
||||
---
|
||||
|
||||
## DOM & Styling Domains
|
||||
|
||||
### DOM
|
||||
**Purpose:** Access and manipulate DOM tree
|
||||
|
||||
**Key Commands:**
|
||||
- `DOM.getDocument()` - Get root document node
|
||||
- `DOM.querySelector(nodeId, selector)` - Query selector
|
||||
- `DOM.querySelectorAll(nodeId, selector)` - Query all
|
||||
- `DOM.getAttributes(nodeId)` - Get element attributes
|
||||
- `DOM.setOuterHTML(nodeId, outerHTML)` - Replace element
|
||||
- `DOM.getBoxModel(nodeId)` - Get element layout box
|
||||
- `DOM.focus(nodeId)` - Focus element
|
||||
|
||||
**Key Events:**
|
||||
- `DOM.documentUpdated` - Document changed
|
||||
- `DOM.setChildNodes` - Child nodes updated
|
||||
|
||||
**Use Cases:**
|
||||
- Navigate DOM tree
|
||||
- Query elements
|
||||
- Modify DOM structure
|
||||
- Get element positions
|
||||
|
||||
---
|
||||
|
||||
### CSS
|
||||
**Purpose:** Inspect and modify CSS styles
|
||||
|
||||
**Key Commands:**
|
||||
- `CSS.enable()` - Enable CSS domain
|
||||
- `CSS.getComputedStyleForNode(nodeId)` - Get computed styles
|
||||
- `CSS.getInlineStylesForNode(nodeId)` - Get inline styles
|
||||
- `CSS.getMatchedStylesForNode(nodeId)` - Get matched CSS rules
|
||||
- `CSS.setStyleTexts(edits)` - Modify styles
|
||||
|
||||
**Key Events:**
|
||||
- `CSS.styleSheetAdded` - Stylesheet added
|
||||
- `CSS.styleSheetChanged` - Stylesheet modified
|
||||
|
||||
**Use Cases:**
|
||||
- Inspect element styles
|
||||
- Debug CSS issues
|
||||
- Modify styles dynamically
|
||||
- Extract stylesheet data
|
||||
|
||||
---
|
||||
|
||||
### Accessibility
|
||||
**Purpose:** Access accessibility tree
|
||||
|
||||
**Key Commands:**
|
||||
- `Accessibility.enable()` - Enable accessibility
|
||||
- `Accessibility.getFullAXTree()` - Get complete AX tree
|
||||
- `Accessibility.getPartialAXTree(nodeId)` - Get node subtree
|
||||
- `Accessibility.queryAXTree(nodeId, role, name)` - Query AX tree
|
||||
|
||||
**Use Cases:**
|
||||
- Accessibility testing
|
||||
- Screen reader simulation
|
||||
- ARIA attribute inspection
|
||||
- AX tree analysis
|
||||
|
||||
---
|
||||
|
||||
## Network & Fetch Domains
|
||||
|
||||
### Network
|
||||
**Purpose:** Monitor and control HTTP traffic
|
||||
|
||||
**Key Commands:**
|
||||
- `Network.enable()` - Enable network tracking
|
||||
- `Network.setCacheDisabled(cacheDisabled)` - Disable cache
|
||||
- `Network.setExtraHTTPHeaders(headers)` - Add custom headers
|
||||
- `Network.getCookies(urls)` - Get cookies
|
||||
- `Network.setCookie(name, value, domain)` - Set cookie
|
||||
- `Network.getResponseBody(requestId)` - Get response body
|
||||
- `Network.emulateNetworkConditions(offline, latency, downloadThroughput, uploadThroughput)` - Throttle network
|
||||
|
||||
**Key Events:**
|
||||
- `Network.requestWillBeSent` - Request starting
|
||||
- `Network.responseReceived` - Response received
|
||||
- `Network.loadingFinished` - Request completed
|
||||
- `Network.loadingFailed` - Request failed
|
||||
|
||||
**Use Cases:**
|
||||
- Monitor API calls
|
||||
- Intercept requests
|
||||
- Analyze response data
|
||||
- Simulate slow networks
|
||||
- Manage cookies
|
||||
|
||||
---
|
||||
|
||||
### Fetch
|
||||
**Purpose:** Intercept and modify network requests
|
||||
|
||||
**Key Commands:**
|
||||
- `Fetch.enable(patterns)` - Enable request interception
|
||||
- `Fetch.continueRequest(requestId, url, method, headers)` - Continue/modify request
|
||||
- `Fetch.fulfillRequest(requestId, responseCode, headers, body)` - Mock response
|
||||
- `Fetch.failRequest(requestId, errorReason)` - Fail request
|
||||
|
||||
**Key Events:**
|
||||
- `Fetch.requestPaused` - Request intercepted
|
||||
|
||||
**Use Cases:**
|
||||
- Mock API responses
|
||||
- Block requests
|
||||
- Modify request/response
|
||||
- Test error scenarios
|
||||
|
||||
---
|
||||
|
||||
## Page & Navigation Domains
|
||||
|
||||
### Page
|
||||
**Purpose:** Control page lifecycle and navigation
|
||||
|
||||
**Key Commands:**
|
||||
- `Page.enable()` - Enable page domain
|
||||
- `Page.navigate(url)` - Navigate to URL
|
||||
- `Page.reload(ignoreCache)` - Reload page
|
||||
- `Page.goBack()/goForward()` - Navigate history
|
||||
- `Page.captureScreenshot(format, quality)` - Take screenshot
|
||||
- `Page.printToPDF(landscape, displayHeaderFooter)` - Generate PDF
|
||||
- `Page.getLayoutMetrics()` - Get page dimensions
|
||||
- `Page.createIsolatedWorld(frameId)` - Create isolated context
|
||||
- `Page.handleJavaScriptDialog(accept, promptText)` - Handle alerts/confirms
|
||||
|
||||
**Key Events:**
|
||||
- `Page.loadEventFired` - Page loaded
|
||||
- `Page.domContentEventFired` - DOM ready
|
||||
- `Page.frameNavigated` - Frame navigated
|
||||
- `Page.javascriptDialogOpening` - Alert/confirm shown
|
||||
|
||||
**Use Cases:**
|
||||
- Navigate pages
|
||||
- Capture screenshots
|
||||
- Generate PDFs
|
||||
- Handle popups
|
||||
- Monitor page lifecycle
|
||||
|
||||
---
|
||||
|
||||
### Target
|
||||
**Purpose:** Manage browser targets (tabs, workers, frames)
|
||||
|
||||
**Key Commands:**
|
||||
- `Target.getTargets()` - List all targets
|
||||
- `Target.createTarget(url)` - Open new tab
|
||||
- `Target.closeTarget(targetId)` - Close tab
|
||||
- `Target.attachToTarget(targetId)` - Attach debugger
|
||||
- `Target.detachFromTarget(sessionId)` - Detach debugger
|
||||
- `Target.setDiscoverTargets(discover)` - Auto-discover targets
|
||||
|
||||
**Key Events:**
|
||||
- `Target.targetCreated` - New target created
|
||||
- `Target.targetDestroyed` - Target closed
|
||||
- `Target.targetInfoChanged` - Target updated
|
||||
|
||||
**Use Cases:**
|
||||
- Multi-tab automation
|
||||
- Service worker debugging
|
||||
- Frame inspection
|
||||
- Extension debugging
|
||||
|
||||
---
|
||||
|
||||
### Input
|
||||
**Purpose:** Simulate user input
|
||||
|
||||
**Key Commands:**
|
||||
- `Input.dispatchKeyEvent(type, key, code)` - Keyboard input
|
||||
- `Input.dispatchMouseEvent(type, x, y, button)` - Mouse input
|
||||
- `Input.dispatchTouchEvent(type, touchPoints)` - Touch input
|
||||
- `Input.synthesizePinchGesture(x, y, scaleFactor)` - Pinch gesture
|
||||
- `Input.synthesizeScrollGesture(x, y, xDistance, yDistance)` - Scroll
|
||||
|
||||
**Use Cases:**
|
||||
- Simulate clicks
|
||||
- Type text
|
||||
- Drag and drop
|
||||
- Touch gestures
|
||||
- Scroll pages
|
||||
|
||||
---
|
||||
|
||||
## Storage & Data Domains
|
||||
|
||||
### Storage
|
||||
**Purpose:** Manage browser storage
|
||||
|
||||
**Key Commands:**
|
||||
- `Storage.getCookies(browserContextId)` - Get cookies
|
||||
- `Storage.setCookies(cookies)` - Set cookies
|
||||
- `Storage.clearCookies(browserContextId)` - Clear cookies
|
||||
- `Storage.clearDataForOrigin(origin, storageTypes)` - Clear storage
|
||||
- `Storage.getUsageAndQuota(origin)` - Get storage usage
|
||||
|
||||
**Storage Types:**
|
||||
- appcache, cookies, file_systems, indexeddb, local_storage, shader_cache, websql, service_workers, cache_storage
|
||||
|
||||
**Use Cases:**
|
||||
- Cookie management
|
||||
- Clear browser data
|
||||
- Inspect storage usage
|
||||
- Test quota limits
|
||||
|
||||
---
|
||||
|
||||
### DOMStorage
|
||||
**Purpose:** Access localStorage/sessionStorage
|
||||
|
||||
**Key Commands:**
|
||||
- `DOMStorage.enable()` - Enable storage tracking
|
||||
- `DOMStorage.getDOMStorageItems(storageId)` - Get items
|
||||
- `DOMStorage.setDOMStorageItem(storageId, key, value)` - Set item
|
||||
- `DOMStorage.removeDOMStorageItem(storageId, key)` - Remove item
|
||||
|
||||
**Key Events:**
|
||||
- `DOMStorage.domStorageItemsCleared` - Storage cleared
|
||||
- `DOMStorage.domStorageItemAdded/Updated/Removed` - Item changed
|
||||
|
||||
---
|
||||
|
||||
### IndexedDB
|
||||
**Purpose:** Query IndexedDB databases
|
||||
|
||||
**Key Commands:**
|
||||
- `IndexedDB.requestDatabaseNames(securityOrigin)` - List databases
|
||||
- `IndexedDB.requestDatabase(securityOrigin, databaseName)` - Get DB structure
|
||||
- `IndexedDB.requestData(securityOrigin, databaseName, objectStoreName)` - Query data
|
||||
|
||||
**Use Cases:**
|
||||
- Inspect IndexedDB data
|
||||
- Debug database issues
|
||||
- Extract stored data
|
||||
|
||||
---
|
||||
|
||||
### CacheStorage
|
||||
**Purpose:** Manage Cache API
|
||||
|
||||
**Key Commands:**
|
||||
- `CacheStorage.requestCacheNames(securityOrigin)` - List caches
|
||||
- `CacheStorage.requestCachedResponses(cacheId, securityOrigin)` - List cached responses
|
||||
- `CacheStorage.deleteCache(cacheId)` - Delete cache
|
||||
|
||||
**Use Cases:**
|
||||
- Service worker cache inspection
|
||||
- Offline functionality testing
|
||||
|
||||
---
|
||||
|
||||
## Performance & Profiling Domains
|
||||
|
||||
### Performance
|
||||
**Purpose:** Collect performance metrics
|
||||
|
||||
**Key Commands:**
|
||||
- `Performance.enable()` - Enable performance tracking
|
||||
- `Performance.disable()` - Disable tracking
|
||||
- `Performance.getMetrics()` - Get current metrics
|
||||
|
||||
**Metrics:**
|
||||
- Timestamp, Documents, Frames, JSEventListeners, Nodes, LayoutCount, RecalcStyleCount, LayoutDuration, RecalcStyleDuration, ScriptDuration, TaskDuration, JSHeapUsedSize, JSHeapTotalSize
|
||||
|
||||
**Use Cases:**
|
||||
- Monitor page metrics
|
||||
- Track memory usage
|
||||
- Measure render times
|
||||
|
||||
---
|
||||
|
||||
### PerformanceTimeline
|
||||
**Purpose:** Access Performance Timeline API
|
||||
|
||||
**Key Commands:**
|
||||
- `PerformanceTimeline.enable(eventTypes)` - Subscribe to events
|
||||
|
||||
**Event Types:**
|
||||
- mark, measure, navigation, resource, longtask, paint, layout-shift
|
||||
|
||||
**Key Events:**
|
||||
- `PerformanceTimeline.timelineEventAdded` - New performance entry
|
||||
|
||||
---
|
||||
|
||||
### Tracing
|
||||
**Purpose:** Record Chrome trace
|
||||
|
||||
**Key Commands:**
|
||||
- `Tracing.start(categories, options)` - Start recording
|
||||
- `Tracing.end()` - Stop recording
|
||||
- `Tracing.requestMemoryDump()` - Capture memory snapshot
|
||||
|
||||
**Trace Categories:**
|
||||
- blink, cc, devtools, gpu, loading, navigation, rendering, v8, disabled-by-default-*
|
||||
|
||||
**Key Events:**
|
||||
- `Tracing.dataCollected` - Trace chunk received
|
||||
- `Tracing.tracingComplete` - Recording finished
|
||||
|
||||
**Use Cases:**
|
||||
- Deep performance analysis
|
||||
- Frame rendering profiling
|
||||
- CPU flame graphs
|
||||
- Memory profiling
|
||||
|
||||
---
|
||||
|
||||
### Profiler
|
||||
**Purpose:** CPU profiling
|
||||
|
||||
**Key Commands:**
|
||||
- `Profiler.enable()` - Enable profiler
|
||||
- `Profiler.start()` - Start CPU profiling
|
||||
- `Profiler.stop()` - Stop and get profile
|
||||
|
||||
**Use Cases:**
|
||||
- Find CPU bottlenecks
|
||||
- Optimize JavaScript
|
||||
- Generate flame graphs
|
||||
|
||||
---
|
||||
|
||||
### HeapProfiler (via Memory domain)
|
||||
**Purpose:** Memory profiling
|
||||
|
||||
**Key Commands:**
|
||||
- `Memory.getDOMCounters()` - Get DOM object counts
|
||||
- `Memory.prepareForLeakDetection()` - Prepare leak detection
|
||||
- `Memory.forciblyPurgeJavaScriptMemory()` - Force GC
|
||||
- `Memory.setPressureNotificationsSuppressed(suppressed)` - Control memory warnings
|
||||
- `Memory.simulatePressureNotification(level)` - Simulate memory pressure
|
||||
|
||||
**Use Cases:**
|
||||
- Detect memory leaks
|
||||
- Analyze heap snapshots
|
||||
- Monitor object counts
|
||||
|
||||
---
|
||||
|
||||
## Emulation & Simulation Domains
|
||||
|
||||
### Emulation
|
||||
**Purpose:** Emulate device conditions
|
||||
|
||||
**Key Commands:**
|
||||
- `Emulation.setDeviceMetricsOverride(width, height, deviceScaleFactor, mobile)` - Emulate device
|
||||
- `Emulation.setGeolocationOverride(latitude, longitude, accuracy)` - Fake location
|
||||
- `Emulation.setEmulatedMedia(media, features)` - Emulate media type
|
||||
- `Emulation.setTimezoneOverride(timezoneId)` - Override timezone
|
||||
- `Emulation.setLocaleOverride(locale)` - Override language
|
||||
- `Emulation.setUserAgentOverride(userAgent)` - Change user agent
|
||||
|
||||
**Use Cases:**
|
||||
- Mobile device testing
|
||||
- Geolocation testing
|
||||
- Print media emulation
|
||||
- Timezone/locale testing
|
||||
|
||||
---
|
||||
|
||||
### DeviceOrientation
|
||||
**Purpose:** Simulate device orientation
|
||||
|
||||
**Key Commands:**
|
||||
- `DeviceOrientation.setDeviceOrientationOverride(alpha, beta, gamma)` - Set orientation
|
||||
|
||||
**Use Cases:**
|
||||
- Test accelerometer features
|
||||
- Orientation-dependent layouts
|
||||
|
||||
---
|
||||
|
||||
## Worker & Service Domains
|
||||
|
||||
### ServiceWorker
|
||||
**Purpose:** Manage service workers
|
||||
|
||||
**Key Commands:**
|
||||
- `ServiceWorker.enable()` - Enable tracking
|
||||
- `ServiceWorker.unregister(scopeURL)` - Unregister worker
|
||||
- `ServiceWorker.startWorker(scopeURL)` - Start worker
|
||||
- `ServiceWorker.stopWorker(versionId)` - Stop worker
|
||||
- `ServiceWorker.inspectWorker(versionId)` - Debug worker
|
||||
|
||||
**Key Events:**
|
||||
- `ServiceWorker.workerRegistrationUpdated` - Registration changed
|
||||
- `ServiceWorker.workerVersionUpdated` - Version updated
|
||||
|
||||
---
|
||||
|
||||
### WebAuthn
|
||||
**Purpose:** Simulate WebAuthn/FIDO2
|
||||
|
||||
**Key Commands:**
|
||||
- `WebAuthn.enable()` - Enable virtual authenticators
|
||||
- `WebAuthn.addVirtualAuthenticator(options)` - Add virtual device
|
||||
- `WebAuthn.removeVirtualAuthenticator(authenticatorId)` - Remove device
|
||||
- `WebAuthn.addCredential(authenticatorId, credential)` - Add credential
|
||||
|
||||
**Use Cases:**
|
||||
- Test WebAuthn flows
|
||||
- Simulate biometric auth
|
||||
- Test security keys
|
||||
|
||||
---
|
||||
|
||||
## Developer Tools Support
|
||||
|
||||
### Inspector
|
||||
**Purpose:** Protocol-level debugging
|
||||
|
||||
**Key Events:**
|
||||
- `Inspector.detached` - Debugger disconnected
|
||||
- `Inspector.targetCrashed` - Target crashed
|
||||
|
||||
---
|
||||
|
||||
### Log
|
||||
**Purpose:** Collect browser logs
|
||||
|
||||
**Key Commands:**
|
||||
- `Log.enable()` - Enable log collection
|
||||
- `Log.clear()` - Clear logs
|
||||
|
||||
**Key Events:**
|
||||
- `Log.entryAdded` - New log entry
|
||||
|
||||
**Use Cases:**
|
||||
- Collect console logs
|
||||
- Monitor violations
|
||||
- Track deprecations
|
||||
|
||||
---
|
||||
|
||||
### DOMDebugger
|
||||
**Purpose:** DOM-level debugging
|
||||
|
||||
**Key Commands:**
|
||||
- `DOMDebugger.setDOMBreakpoint(nodeId, type)` - Break on DOM changes
|
||||
- `DOMDebugger.setEventListenerBreakpoint(eventName)` - Break on event
|
||||
- `DOMDebugger.setXHRBreakpoint(url)` - Break on XHR
|
||||
|
||||
**Breakpoint Types:**
|
||||
- subtree-modified, attribute-modified, node-removed
|
||||
|
||||
---
|
||||
|
||||
### DOMSnapshot
|
||||
**Purpose:** Capture complete DOM snapshot
|
||||
|
||||
**Key Commands:**
|
||||
- `DOMSnapshot.captureSnapshot(computedStyles)` - Capture full DOM
|
||||
|
||||
**Use Cases:**
|
||||
- Export page structure
|
||||
- Offline analysis
|
||||
- DOM diffing
|
||||
|
||||
---
|
||||
|
||||
### Audits (Lighthouse Integration)
|
||||
**Purpose:** Run automated audits
|
||||
|
||||
**Key Commands:**
|
||||
- `Audits.enable()` - Enable audits
|
||||
- `Audits.getEncodingIssues()` - Check encoding issues
|
||||
|
||||
---
|
||||
|
||||
### LayerTree
|
||||
**Purpose:** Inspect rendering layers
|
||||
|
||||
**Key Commands:**
|
||||
- `LayerTree.enable()` - Enable layer tracking
|
||||
- `LayerTree.compositingReasons(layerId)` - Get why layer created
|
||||
|
||||
**Key Events:**
|
||||
- `LayerTree.layerTreeDidChange` - Layers changed
|
||||
|
||||
**Use Cases:**
|
||||
- Debug rendering performance
|
||||
- Identify layer creation
|
||||
- Optimize compositing
|
||||
|
||||
---
|
||||
|
||||
## Other Domains
|
||||
|
||||
### Browser
|
||||
**Purpose:** Browser-level control
|
||||
|
||||
**Key Commands:**
|
||||
- `Browser.getVersion()` - Get browser info
|
||||
- `Browser.getBrowserCommandLine()` - Get launch args
|
||||
- `Browser.setPermission(permission, setting, origin)` - Set permissions
|
||||
- `Browser.grantPermissions(permissions, origin)` - Grant permissions
|
||||
|
||||
**Permissions:**
|
||||
- geolocation, midi, notifications, push, camera, microphone, background-sync, sensors, accessibility-events, clipboard-read, clipboard-write, payment-handler
|
||||
|
||||
---
|
||||
|
||||
### IO
|
||||
**Purpose:** File I/O operations
|
||||
|
||||
**Key Commands:**
|
||||
- `IO.read(handle, offset, size)` - Read stream
|
||||
- `IO.close(handle)` - Close stream
|
||||
|
||||
**Use Cases:**
|
||||
- Read large response bodies
|
||||
- Process binary data
|
||||
|
||||
---
|
||||
|
||||
### Media
|
||||
**Purpose:** Inspect media players
|
||||
|
||||
**Key Commands:**
|
||||
- `Media.enable()` - Track media players
|
||||
|
||||
**Key Events:**
|
||||
- `Media.playerPropertiesChanged` - Player state changed
|
||||
- `Media.playerEventsAdded` - Player events
|
||||
|
||||
---
|
||||
|
||||
### BackgroundService
|
||||
**Purpose:** Track background services
|
||||
|
||||
**Key Commands:**
|
||||
- `BackgroundService.startObserving(service)` - Track service
|
||||
|
||||
**Services:**
|
||||
- backgroundFetch, backgroundSync, pushMessaging, notifications, paymentHandler, periodicBackgroundSync
|
||||
|
||||
---
|
||||
|
||||
## Domain Dependencies
|
||||
|
||||
Some domains depend on others and must be enabled in order:
|
||||
|
||||
```
|
||||
Runtime (no dependencies)
|
||||
↓
|
||||
DOM (depends on Runtime)
|
||||
↓
|
||||
CSS (depends on DOM)
|
||||
|
||||
Network (no dependencies)
|
||||
|
||||
Page (depends on Runtime)
|
||||
↓
|
||||
Target (depends on Page)
|
||||
|
||||
Debugger (depends on Runtime)
|
||||
```
|
||||
|
||||
## Quick Command Reference
|
||||
|
||||
### Most Common Commands
|
||||
|
||||
```javascript
|
||||
// Navigation
|
||||
Page.navigate(url)
|
||||
Page.reload()
|
||||
|
||||
// JavaScript Execution
|
||||
Runtime.evaluate(expression)
|
||||
|
||||
// DOM Access
|
||||
DOM.getDocument()
|
||||
DOM.querySelector(nodeId, selector)
|
||||
|
||||
// Screenshots
|
||||
Page.captureScreenshot(format, quality)
|
||||
|
||||
// Network Monitoring
|
||||
Network.enable()
|
||||
// Listen for Network.requestWillBeSent events
|
||||
|
||||
// Console Messages
|
||||
// Listen for Runtime.consoleAPICalled events
|
||||
|
||||
// Cookies
|
||||
Network.getCookies(urls)
|
||||
Network.setCookie(...)
|
||||
|
||||
// Device Emulation
|
||||
Emulation.setDeviceMetricsOverride(width, height, ...)
|
||||
|
||||
// Performance
|
||||
Performance.getMetrics()
|
||||
Tracing.start(categories)
|
||||
Tracing.end()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Enable domains before use:** Always call `.enable()` for stateful domains
|
||||
2. **Handle events:** Subscribe to events for real-time updates
|
||||
3. **Clean up:** Disable domains when done to reduce overhead
|
||||
4. **Use sessions:** Attach to specific targets for isolated debugging
|
||||
5. **Handle errors:** Implement proper error handling for command failures
|
||||
6. **Version awareness:** Check browser version for experimental API support
|
||||
|
||||
---
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- [Protocol Viewer](https://chromedevtools.github.io/devtools-protocol/) - Interactive domain browser
|
||||
- [Protocol JSON](https://chromedevtools.github.io/devtools-protocol/tot/json) - Machine-readable specification
|
||||
- [Getting Started with CDP](https://github.com/aslushnikov/getting-started-with-cdp)
|
||||
- [devtools-protocol NPM](https://www.npmjs.com/package/devtools-protocol) - TypeScript definitions
|
||||
940
.opencode/skills/chrome-devtools/references/performance-guide.md
Normal file
940
.opencode/skills/chrome-devtools/references/performance-guide.md
Normal file
@@ -0,0 +1,940 @@
|
||||
# Performance Analysis Guide
|
||||
|
||||
Comprehensive guide to analyzing web performance using Chrome DevTools Protocol, Puppeteer, and chrome-devtools skill.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Core Web Vitals](#core-web-vitals)
|
||||
- [Performance Tracing](#performance-tracing)
|
||||
- [Network Analysis](#network-analysis)
|
||||
- [JavaScript Performance](#javascript-performance)
|
||||
- [Rendering Performance](#rendering-performance)
|
||||
- [Memory Analysis](#memory-analysis)
|
||||
- [Optimization Strategies](#optimization-strategies)
|
||||
|
||||
---
|
||||
|
||||
## Core Web Vitals
|
||||
|
||||
### Overview
|
||||
|
||||
Core Web Vitals are Google's standardized metrics for measuring user experience:
|
||||
|
||||
- **LCP (Largest Contentful Paint)** - Loading performance (< 2.5s good)
|
||||
- **FID (First Input Delay)** - Interactivity (< 100ms good)
|
||||
- **CLS (Cumulative Layout Shift)** - Visual stability (< 0.1 good)
|
||||
|
||||
### Measuring with chrome-devtools-mcp
|
||||
|
||||
```javascript
|
||||
// Start performance trace
|
||||
await useTool('performance_start_trace', {
|
||||
categories: ['loading', 'rendering', 'scripting']
|
||||
});
|
||||
|
||||
// Navigate to page
|
||||
await useTool('navigate_page', {
|
||||
url: 'https://example.com'
|
||||
});
|
||||
|
||||
// Wait for complete load
|
||||
await useTool('wait_for', {
|
||||
waitUntil: 'networkidle'
|
||||
});
|
||||
|
||||
// Stop trace and get data
|
||||
await useTool('performance_stop_trace');
|
||||
|
||||
// Get AI-powered insights
|
||||
const insights = await useTool('performance_analyze_insight');
|
||||
|
||||
// insights will include:
|
||||
// - LCP timing
|
||||
// - FID analysis
|
||||
// - CLS score
|
||||
// - Performance recommendations
|
||||
```
|
||||
|
||||
### Measuring with Puppeteer
|
||||
|
||||
```javascript
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
const browser = await puppeteer.launch();
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Measure Core Web Vitals
|
||||
await page.goto('https://example.com', {
|
||||
waitUntil: 'networkidle2'
|
||||
});
|
||||
|
||||
const vitals = await page.evaluate(() => {
|
||||
return new Promise((resolve) => {
|
||||
const vitals = {
|
||||
LCP: null,
|
||||
FID: null,
|
||||
CLS: 0
|
||||
};
|
||||
|
||||
// LCP
|
||||
new PerformanceObserver((list) => {
|
||||
const entries = list.getEntries();
|
||||
vitals.LCP = entries[entries.length - 1].renderTime ||
|
||||
entries[entries.length - 1].loadTime;
|
||||
}).observe({ entryTypes: ['largest-contentful-paint'] });
|
||||
|
||||
// FID
|
||||
new PerformanceObserver((list) => {
|
||||
vitals.FID = list.getEntries()[0].processingStart -
|
||||
list.getEntries()[0].startTime;
|
||||
}).observe({ entryTypes: ['first-input'] });
|
||||
|
||||
// CLS
|
||||
new PerformanceObserver((list) => {
|
||||
list.getEntries().forEach((entry) => {
|
||||
if (!entry.hadRecentInput) {
|
||||
vitals.CLS += entry.value;
|
||||
}
|
||||
});
|
||||
}).observe({ entryTypes: ['layout-shift'] });
|
||||
|
||||
// Wait 5 seconds for metrics
|
||||
setTimeout(() => resolve(vitals), 5000);
|
||||
});
|
||||
});
|
||||
|
||||
console.log('Core Web Vitals:', vitals);
|
||||
```
|
||||
|
||||
### Other Important Metrics
|
||||
|
||||
**TTFB (Time to First Byte)**
|
||||
```javascript
|
||||
const ttfb = await page.evaluate(() => {
|
||||
const [navigationEntry] = performance.getEntriesByType('navigation');
|
||||
return navigationEntry.responseStart - navigationEntry.requestStart;
|
||||
});
|
||||
```
|
||||
|
||||
**FCP (First Contentful Paint)**
|
||||
```javascript
|
||||
const fcp = await page.evaluate(() => {
|
||||
const paintEntries = performance.getEntriesByType('paint');
|
||||
const fcpEntry = paintEntries.find(e => e.name === 'first-contentful-paint');
|
||||
return fcpEntry ? fcpEntry.startTime : null;
|
||||
});
|
||||
```
|
||||
|
||||
**TTI (Time to Interactive)**
|
||||
```javascript
|
||||
// Requires lighthouse or manual calculation
|
||||
const tti = await page.evaluate(() => {
|
||||
// Complex calculation based on network idle and long tasks
|
||||
// Best to use Lighthouse for accurate TTI
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Tracing
|
||||
|
||||
### Chrome Trace Categories
|
||||
|
||||
**Loading:**
|
||||
- Page load events
|
||||
- Resource loading
|
||||
- Parser activity
|
||||
|
||||
**Rendering:**
|
||||
- Layout calculations
|
||||
- Paint operations
|
||||
- Compositing
|
||||
|
||||
**Scripting:**
|
||||
- JavaScript execution
|
||||
- V8 compilation
|
||||
- Garbage collection
|
||||
|
||||
**Network:**
|
||||
- HTTP requests
|
||||
- WebSocket traffic
|
||||
- Resource fetching
|
||||
|
||||
**Input:**
|
||||
- User input processing
|
||||
- Touch/scroll events
|
||||
|
||||
**GPU:**
|
||||
- GPU operations
|
||||
- Compositing work
|
||||
|
||||
### Record Performance Trace
|
||||
|
||||
**Using chrome-devtools-mcp:**
|
||||
```javascript
|
||||
// Start trace with specific categories
|
||||
await useTool('performance_start_trace', {
|
||||
categories: ['loading', 'rendering', 'scripting', 'network']
|
||||
});
|
||||
|
||||
// Perform actions
|
||||
await useTool('navigate_page', { url: 'https://example.com' });
|
||||
await useTool('wait_for', { waitUntil: 'networkidle' });
|
||||
|
||||
// Optional: Interact with page
|
||||
await useTool('click', { uid: 'button-uid' });
|
||||
|
||||
// Stop trace
|
||||
const traceData = await useTool('performance_stop_trace');
|
||||
|
||||
// Analyze trace
|
||||
const insights = await useTool('performance_analyze_insight');
|
||||
```
|
||||
|
||||
**Using Puppeteer:**
|
||||
```javascript
|
||||
// Start tracing
|
||||
await page.tracing.start({
|
||||
path: 'trace.json',
|
||||
categories: [
|
||||
'devtools.timeline',
|
||||
'disabled-by-default-devtools.timeline',
|
||||
'disabled-by-default-v8.cpu_profiler'
|
||||
]
|
||||
});
|
||||
|
||||
// Navigate
|
||||
await page.goto('https://example.com', {
|
||||
waitUntil: 'networkidle2'
|
||||
});
|
||||
|
||||
// Stop tracing
|
||||
await page.tracing.stop();
|
||||
|
||||
// Analyze in Chrome DevTools (chrome://tracing)
|
||||
```
|
||||
|
||||
### Analyze Trace Data
|
||||
|
||||
**Key Metrics from Trace:**
|
||||
|
||||
1. **Main Thread Activity**
|
||||
- JavaScript execution time
|
||||
- Layout/reflow time
|
||||
- Paint time
|
||||
- Long tasks (> 50ms)
|
||||
|
||||
2. **Network Waterfall**
|
||||
- Request start times
|
||||
- DNS lookup
|
||||
- Connection time
|
||||
- Download time
|
||||
|
||||
3. **Rendering Pipeline**
|
||||
- DOM construction
|
||||
- Style calculation
|
||||
- Layout
|
||||
- Paint
|
||||
- Composite
|
||||
|
||||
**Common Issues to Look For:**
|
||||
- Long tasks blocking main thread
|
||||
- Excessive JavaScript execution
|
||||
- Layout thrashing
|
||||
- Unnecessary repaints
|
||||
- Slow network requests
|
||||
- Large bundle sizes
|
||||
|
||||
---
|
||||
|
||||
## Network Analysis
|
||||
|
||||
### Monitor Network Requests
|
||||
|
||||
**Using chrome-devtools-mcp:**
|
||||
```javascript
|
||||
// Navigate to page
|
||||
await useTool('navigate_page', { url: 'https://example.com' });
|
||||
|
||||
// Wait for all requests
|
||||
await useTool('wait_for', { waitUntil: 'networkidle' });
|
||||
|
||||
// List all requests
|
||||
const requests = await useTool('list_network_requests', {
|
||||
resourceTypes: ['Document', 'Script', 'Stylesheet', 'Image', 'XHR', 'Fetch'],
|
||||
pageSize: 100
|
||||
});
|
||||
|
||||
// Analyze specific request
|
||||
for (const req of requests.requests) {
|
||||
const details = await useTool('get_network_request', {
|
||||
requestId: req.id
|
||||
});
|
||||
|
||||
console.log({
|
||||
url: details.url,
|
||||
method: details.method,
|
||||
status: details.status,
|
||||
size: details.encodedDataLength,
|
||||
time: details.timing.receiveHeadersEnd - details.timing.requestTime,
|
||||
cached: details.fromCache
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
**Using Puppeteer:**
|
||||
```javascript
|
||||
const requests = [];
|
||||
|
||||
// Capture all requests
|
||||
page.on('request', (request) => {
|
||||
requests.push({
|
||||
url: request.url(),
|
||||
method: request.method(),
|
||||
resourceType: request.resourceType(),
|
||||
headers: request.headers()
|
||||
});
|
||||
});
|
||||
|
||||
// Capture responses
|
||||
page.on('response', (response) => {
|
||||
const request = response.request();
|
||||
console.log({
|
||||
url: response.url(),
|
||||
status: response.status(),
|
||||
size: response.headers()['content-length'],
|
||||
cached: response.fromCache(),
|
||||
timing: response.timing()
|
||||
});
|
||||
});
|
||||
|
||||
await page.goto('https://example.com');
|
||||
```
|
||||
|
||||
### Network Performance Metrics
|
||||
|
||||
**Calculate Total Page Weight:**
|
||||
```javascript
|
||||
let totalBytes = 0;
|
||||
let resourceCounts = {};
|
||||
|
||||
page.on('response', async (response) => {
|
||||
const type = response.request().resourceType();
|
||||
const buffer = await response.buffer();
|
||||
|
||||
totalBytes += buffer.length;
|
||||
resourceCounts[type] = (resourceCounts[type] || 0) + 1;
|
||||
});
|
||||
|
||||
await page.goto('https://example.com');
|
||||
|
||||
console.log('Total size:', (totalBytes / 1024 / 1024).toFixed(2), 'MB');
|
||||
console.log('Resources:', resourceCounts);
|
||||
```
|
||||
|
||||
**Identify Slow Requests:**
|
||||
```javascript
|
||||
page.on('response', (response) => {
|
||||
const timing = response.timing();
|
||||
const totalTime = timing.receiveHeadersEnd - timing.requestTime;
|
||||
|
||||
if (totalTime > 1000) { // Slower than 1 second
|
||||
console.log('Slow request:', {
|
||||
url: response.url(),
|
||||
time: totalTime.toFixed(2) + 'ms',
|
||||
size: response.headers()['content-length']
|
||||
});
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Network Throttling
|
||||
|
||||
**Simulate Slow Connection:**
|
||||
```javascript
|
||||
// Using chrome-devtools-mcp
|
||||
await useTool('emulate_network', {
|
||||
throttlingOption: 'Slow 3G' // or 'Fast 3G', 'Slow 4G'
|
||||
});
|
||||
|
||||
// Using Puppeteer
|
||||
const client = await page.createCDPSession();
|
||||
await client.send('Network.emulateNetworkConditions', {
|
||||
offline: false,
|
||||
downloadThroughput: 400 * 1024 / 8, // 400 Kbps
|
||||
uploadThroughput: 400 * 1024 / 8,
|
||||
latency: 2000 // 2000ms RTT
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## JavaScript Performance
|
||||
|
||||
### Identify Long Tasks
|
||||
|
||||
**Using Performance Observer:**
|
||||
```javascript
|
||||
await page.evaluate(() => {
|
||||
return new Promise((resolve) => {
|
||||
const longTasks = [];
|
||||
|
||||
const observer = new PerformanceObserver((list) => {
|
||||
list.getEntries().forEach((entry) => {
|
||||
longTasks.push({
|
||||
name: entry.name,
|
||||
duration: entry.duration,
|
||||
startTime: entry.startTime
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
observer.observe({ entryTypes: ['longtask'] });
|
||||
|
||||
// Collect for 10 seconds
|
||||
setTimeout(() => {
|
||||
observer.disconnect();
|
||||
resolve(longTasks);
|
||||
}, 10000);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### CPU Profiling
|
||||
|
||||
**Using Puppeteer:**
|
||||
```javascript
|
||||
// Start CPU profiling
|
||||
const client = await page.createCDPSession();
|
||||
await client.send('Profiler.enable');
|
||||
await client.send('Profiler.start');
|
||||
|
||||
// Navigate and interact
|
||||
await page.goto('https://example.com');
|
||||
await page.click('.button');
|
||||
|
||||
// Stop profiling
|
||||
const { profile } = await client.send('Profiler.stop');
|
||||
|
||||
// Analyze profile (flame graph data)
|
||||
// Import into Chrome DevTools for visualization
|
||||
```
|
||||
|
||||
### JavaScript Coverage
|
||||
|
||||
**Identify Unused Code:**
|
||||
```javascript
|
||||
// Start coverage
|
||||
await Promise.all([
|
||||
page.coverage.startJSCoverage(),
|
||||
page.coverage.startCSSCoverage()
|
||||
]);
|
||||
|
||||
// Navigate
|
||||
await page.goto('https://example.com');
|
||||
|
||||
// Stop coverage
|
||||
const [jsCoverage, cssCoverage] = await Promise.all([
|
||||
page.coverage.stopJSCoverage(),
|
||||
page.coverage.stopCSSCoverage()
|
||||
]);
|
||||
|
||||
// Calculate unused bytes
|
||||
function calculateUnusedBytes(coverage) {
|
||||
let usedBytes = 0;
|
||||
let totalBytes = 0;
|
||||
|
||||
for (const entry of coverage) {
|
||||
totalBytes += entry.text.length;
|
||||
for (const range of entry.ranges) {
|
||||
usedBytes += range.end - range.start - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
usedBytes,
|
||||
totalBytes,
|
||||
unusedBytes: totalBytes - usedBytes,
|
||||
unusedPercentage: ((totalBytes - usedBytes) / totalBytes * 100).toFixed(2)
|
||||
};
|
||||
}
|
||||
|
||||
console.log('JS Coverage:', calculateUnusedBytes(jsCoverage));
|
||||
console.log('CSS Coverage:', calculateUnusedBytes(cssCoverage));
|
||||
```
|
||||
|
||||
### Bundle Size Analysis
|
||||
|
||||
**Analyze JavaScript Bundles:**
|
||||
```javascript
|
||||
page.on('response', async (response) => {
|
||||
const url = response.url();
|
||||
const type = response.request().resourceType();
|
||||
|
||||
if (type === 'script') {
|
||||
const buffer = await response.buffer();
|
||||
const size = buffer.length;
|
||||
|
||||
console.log({
|
||||
url: url.split('/').pop(),
|
||||
size: (size / 1024).toFixed(2) + ' KB',
|
||||
gzipped: response.headers()['content-encoding'] === 'gzip'
|
||||
});
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rendering Performance
|
||||
|
||||
### Layout Thrashing Detection
|
||||
|
||||
**Monitor Layout Recalculations:**
|
||||
```javascript
|
||||
// Using Performance Observer
|
||||
await page.evaluate(() => {
|
||||
return new Promise((resolve) => {
|
||||
const measurements = [];
|
||||
|
||||
const observer = new PerformanceObserver((list) => {
|
||||
list.getEntries().forEach((entry) => {
|
||||
if (entry.entryType === 'measure' &&
|
||||
entry.name.includes('layout')) {
|
||||
measurements.push({
|
||||
name: entry.name,
|
||||
duration: entry.duration,
|
||||
startTime: entry.startTime
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
observer.observe({ entryTypes: ['measure'] });
|
||||
|
||||
setTimeout(() => {
|
||||
observer.disconnect();
|
||||
resolve(measurements);
|
||||
}, 5000);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### Paint and Composite Metrics
|
||||
|
||||
**Get Paint Metrics:**
|
||||
```javascript
|
||||
const paintMetrics = await page.evaluate(() => {
|
||||
const paints = performance.getEntriesByType('paint');
|
||||
return {
|
||||
firstPaint: paints.find(p => p.name === 'first-paint')?.startTime,
|
||||
firstContentfulPaint: paints.find(p => p.name === 'first-contentful-paint')?.startTime
|
||||
};
|
||||
});
|
||||
```
|
||||
|
||||
### Frame Rate Analysis
|
||||
|
||||
**Monitor FPS:**
|
||||
```javascript
|
||||
await page.evaluate(() => {
|
||||
return new Promise((resolve) => {
|
||||
let frames = 0;
|
||||
let lastTime = performance.now();
|
||||
|
||||
function countFrames() {
|
||||
frames++;
|
||||
requestAnimationFrame(countFrames);
|
||||
}
|
||||
|
||||
countFrames();
|
||||
|
||||
setTimeout(() => {
|
||||
const now = performance.now();
|
||||
const elapsed = (now - lastTime) / 1000;
|
||||
const fps = frames / elapsed;
|
||||
resolve(fps);
|
||||
}, 5000);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### Layout Shifts (CLS)
|
||||
|
||||
**Track Individual Shifts:**
|
||||
```javascript
|
||||
await page.evaluate(() => {
|
||||
return new Promise((resolve) => {
|
||||
const shifts = [];
|
||||
let totalCLS = 0;
|
||||
|
||||
const observer = new PerformanceObserver((list) => {
|
||||
list.getEntries().forEach((entry) => {
|
||||
if (!entry.hadRecentInput) {
|
||||
totalCLS += entry.value;
|
||||
shifts.push({
|
||||
value: entry.value,
|
||||
time: entry.startTime,
|
||||
elements: entry.sources?.map(s => s.node)
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
observer.observe({ entryTypes: ['layout-shift'] });
|
||||
|
||||
setTimeout(() => {
|
||||
observer.disconnect();
|
||||
resolve({ totalCLS, shifts });
|
||||
}, 10000);
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Memory Analysis
|
||||
|
||||
### Memory Metrics
|
||||
|
||||
**Get Memory Usage:**
|
||||
```javascript
|
||||
// Using chrome-devtools-mcp
|
||||
await useTool('evaluate_script', {
|
||||
expression: `
|
||||
({
|
||||
usedJSHeapSize: performance.memory?.usedJSHeapSize,
|
||||
totalJSHeapSize: performance.memory?.totalJSHeapSize,
|
||||
jsHeapSizeLimit: performance.memory?.jsHeapSizeLimit
|
||||
})
|
||||
`,
|
||||
returnByValue: true
|
||||
});
|
||||
|
||||
// Using Puppeteer
|
||||
const metrics = await page.metrics();
|
||||
console.log({
|
||||
jsHeapUsed: (metrics.JSHeapUsedSize / 1024 / 1024).toFixed(2) + ' MB',
|
||||
jsHeapTotal: (metrics.JSHeapTotalSize / 1024 / 1024).toFixed(2) + ' MB',
|
||||
domNodes: metrics.Nodes,
|
||||
documents: metrics.Documents,
|
||||
jsEventListeners: metrics.JSEventListeners
|
||||
});
|
||||
```
|
||||
|
||||
### Memory Leak Detection
|
||||
|
||||
**Monitor Memory Over Time:**
|
||||
```javascript
|
||||
async function detectMemoryLeak(page, duration = 30000) {
|
||||
const samples = [];
|
||||
const interval = 1000; // Sample every second
|
||||
const samples_count = duration / interval;
|
||||
|
||||
for (let i = 0; i < samples_count; i++) {
|
||||
const metrics = await page.metrics();
|
||||
samples.push({
|
||||
time: i,
|
||||
heapUsed: metrics.JSHeapUsedSize
|
||||
});
|
||||
|
||||
await page.waitForTimeout(interval);
|
||||
}
|
||||
|
||||
// Analyze trend
|
||||
const firstSample = samples[0].heapUsed;
|
||||
const lastSample = samples[samples.length - 1].heapUsed;
|
||||
const increase = ((lastSample - firstSample) / firstSample * 100).toFixed(2);
|
||||
|
||||
return {
|
||||
samples,
|
||||
memoryIncrease: increase + '%',
|
||||
possibleLeak: increase > 50 // > 50% increase indicates possible leak
|
||||
};
|
||||
}
|
||||
|
||||
const leakAnalysis = await detectMemoryLeak(page, 30000);
|
||||
console.log('Memory Analysis:', leakAnalysis);
|
||||
```
|
||||
|
||||
### Heap Snapshot
|
||||
|
||||
**Capture Heap Snapshot:**
|
||||
```javascript
|
||||
const client = await page.createCDPSession();
|
||||
|
||||
// Take snapshot
|
||||
await client.send('HeapProfiler.enable');
|
||||
const { result } = await client.send('HeapProfiler.takeHeapSnapshot');
|
||||
|
||||
// Snapshot is streamed in chunks
|
||||
// Save to file or analyze programmatically
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Optimization Strategies
|
||||
|
||||
### Image Optimization
|
||||
|
||||
**Detect Unoptimized Images:**
|
||||
```javascript
|
||||
const images = await page.evaluate(() => {
|
||||
const images = Array.from(document.querySelectorAll('img'));
|
||||
return images.map(img => ({
|
||||
src: img.src,
|
||||
naturalWidth: img.naturalWidth,
|
||||
naturalHeight: img.naturalHeight,
|
||||
displayWidth: img.width,
|
||||
displayHeight: img.height,
|
||||
oversized: img.naturalWidth > img.width * 1.5 ||
|
||||
img.naturalHeight > img.height * 1.5
|
||||
}));
|
||||
});
|
||||
|
||||
const oversizedImages = images.filter(img => img.oversized);
|
||||
console.log('Oversized images:', oversizedImages);
|
||||
```
|
||||
|
||||
### Font Loading
|
||||
|
||||
**Detect Render-Blocking Fonts:**
|
||||
```javascript
|
||||
const fonts = await page.evaluate(() => {
|
||||
return Array.from(document.fonts).map(font => ({
|
||||
family: font.family,
|
||||
weight: font.weight,
|
||||
style: font.style,
|
||||
status: font.status,
|
||||
loaded: font.status === 'loaded'
|
||||
}));
|
||||
});
|
||||
|
||||
console.log('Fonts:', fonts);
|
||||
```
|
||||
|
||||
### Third-Party Scripts
|
||||
|
||||
**Measure Third-Party Impact:**
|
||||
```javascript
|
||||
const thirdPartyDomains = ['googletagmanager.com', 'facebook.net', 'doubleclick.net'];
|
||||
|
||||
page.on('response', async (response) => {
|
||||
const url = response.url();
|
||||
const isThirdParty = thirdPartyDomains.some(domain => url.includes(domain));
|
||||
|
||||
if (isThirdParty) {
|
||||
const buffer = await response.buffer();
|
||||
console.log({
|
||||
url: url,
|
||||
size: (buffer.length / 1024).toFixed(2) + ' KB',
|
||||
type: response.request().resourceType()
|
||||
});
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Critical Rendering Path
|
||||
|
||||
**Identify Render-Blocking Resources:**
|
||||
```javascript
|
||||
await page.goto('https://example.com');
|
||||
|
||||
const renderBlockingResources = await page.evaluate(() => {
|
||||
const resources = performance.getEntriesByType('resource');
|
||||
return resources.filter(resource => {
|
||||
return (resource.initiatorType === 'link' &&
|
||||
resource.name.includes('.css')) ||
|
||||
(resource.initiatorType === 'script' &&
|
||||
!resource.name.includes('async'));
|
||||
}).map(r => ({
|
||||
url: r.name,
|
||||
duration: r.duration,
|
||||
startTime: r.startTime
|
||||
}));
|
||||
});
|
||||
|
||||
console.log('Render-blocking resources:', renderBlockingResources);
|
||||
```
|
||||
|
||||
### Lighthouse Integration
|
||||
|
||||
**Run Lighthouse Audit:**
|
||||
```javascript
|
||||
import lighthouse from 'lighthouse';
|
||||
import { launch } from 'chrome-launcher';
|
||||
|
||||
// Launch Chrome
|
||||
const chrome = await launch({ chromeFlags: ['--headless'] });
|
||||
|
||||
// Run Lighthouse
|
||||
const { lhr } = await lighthouse('https://example.com', {
|
||||
port: chrome.port,
|
||||
onlyCategories: ['performance']
|
||||
});
|
||||
|
||||
// Get scores
|
||||
console.log({
|
||||
performanceScore: lhr.categories.performance.score * 100,
|
||||
metrics: {
|
||||
FCP: lhr.audits['first-contentful-paint'].displayValue,
|
||||
LCP: lhr.audits['largest-contentful-paint'].displayValue,
|
||||
TBT: lhr.audits['total-blocking-time'].displayValue,
|
||||
CLS: lhr.audits['cumulative-layout-shift'].displayValue,
|
||||
SI: lhr.audits['speed-index'].displayValue
|
||||
},
|
||||
opportunities: lhr.audits['opportunities']
|
||||
});
|
||||
|
||||
await chrome.kill();
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Budgets
|
||||
|
||||
### Set Performance Budgets
|
||||
|
||||
```javascript
|
||||
const budgets = {
|
||||
// Core Web Vitals
|
||||
LCP: 2500, // ms
|
||||
FID: 100, // ms
|
||||
CLS: 0.1, // score
|
||||
|
||||
// Other metrics
|
||||
FCP: 1800, // ms
|
||||
TTI: 3800, // ms
|
||||
TBT: 300, // ms
|
||||
|
||||
// Resource budgets
|
||||
totalPageSize: 2 * 1024 * 1024, // 2 MB
|
||||
jsSize: 500 * 1024, // 500 KB
|
||||
cssSize: 100 * 1024, // 100 KB
|
||||
imageSize: 1 * 1024 * 1024, // 1 MB
|
||||
|
||||
// Request counts
|
||||
totalRequests: 50,
|
||||
jsRequests: 10,
|
||||
cssRequests: 5
|
||||
};
|
||||
|
||||
async function checkBudgets(page, budgets) {
|
||||
// Measure actual values
|
||||
const vitals = await measureCoreWebVitals(page);
|
||||
const resources = await analyzeResources(page);
|
||||
|
||||
// Compare against budgets
|
||||
const violations = [];
|
||||
|
||||
if (vitals.LCP > budgets.LCP) {
|
||||
violations.push(`LCP: ${vitals.LCP}ms exceeds budget of ${budgets.LCP}ms`);
|
||||
}
|
||||
|
||||
if (resources.totalSize > budgets.totalPageSize) {
|
||||
violations.push(`Page size: ${resources.totalSize} exceeds budget of ${budgets.totalPageSize}`);
|
||||
}
|
||||
|
||||
// ... check other budgets
|
||||
|
||||
return {
|
||||
passed: violations.length === 0,
|
||||
violations
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Automated Performance Testing
|
||||
|
||||
### CI/CD Integration
|
||||
|
||||
```javascript
|
||||
// performance-test.js
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
async function performanceTest(url) {
|
||||
const browser = await puppeteer.launch();
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Measure metrics
|
||||
await page.goto(url, { waitUntil: 'networkidle2' });
|
||||
const metrics = await page.metrics();
|
||||
const vitals = await measureCoreWebVitals(page);
|
||||
|
||||
await browser.close();
|
||||
|
||||
// Check against thresholds
|
||||
const thresholds = {
|
||||
LCP: 2500,
|
||||
FID: 100,
|
||||
CLS: 0.1,
|
||||
jsHeapSize: 50 * 1024 * 1024 // 50 MB
|
||||
};
|
||||
|
||||
const failed = [];
|
||||
if (vitals.LCP > thresholds.LCP) failed.push('LCP');
|
||||
if (vitals.FID > thresholds.FID) failed.push('FID');
|
||||
if (vitals.CLS > thresholds.CLS) failed.push('CLS');
|
||||
if (metrics.JSHeapUsedSize > thresholds.jsHeapSize) failed.push('Memory');
|
||||
|
||||
if (failed.length > 0) {
|
||||
console.error('Performance test failed:', failed);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('Performance test passed');
|
||||
}
|
||||
|
||||
performanceTest(process.env.TEST_URL);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Performance Testing Checklist
|
||||
|
||||
1. **Measure Multiple Times**
|
||||
- Run tests 3-5 times
|
||||
- Use median values
|
||||
- Account for variance
|
||||
|
||||
2. **Test Different Conditions**
|
||||
- Fast 3G
|
||||
- Slow 3G
|
||||
- Offline
|
||||
- CPU throttling
|
||||
|
||||
3. **Test Different Devices**
|
||||
- Mobile (low-end)
|
||||
- Mobile (high-end)
|
||||
- Desktop
|
||||
- Tablet
|
||||
|
||||
4. **Monitor Over Time**
|
||||
- Track metrics in CI/CD
|
||||
- Set up alerts for regressions
|
||||
- Create performance dashboards
|
||||
|
||||
5. **Focus on User Experience**
|
||||
- Prioritize Core Web Vitals
|
||||
- Test real user journeys
|
||||
- Consider perceived performance
|
||||
|
||||
6. **Optimize Critical Path**
|
||||
- Minimize render-blocking resources
|
||||
- Defer non-critical JavaScript
|
||||
- Optimize font loading
|
||||
- Lazy load images
|
||||
|
||||
---
|
||||
|
||||
## Resources
|
||||
|
||||
- [Web.dev Performance](https://web.dev/performance/)
|
||||
- [Chrome DevTools Performance](https://developer.chrome.com/docs/devtools/performance/)
|
||||
- [Core Web Vitals](https://web.dev/vitals/)
|
||||
- [Lighthouse](https://developer.chrome.com/docs/lighthouse/)
|
||||
- [WebPageTest](https://www.webpagetest.org/)
|
||||
@@ -0,0 +1,953 @@
|
||||
# Puppeteer Quick Reference
|
||||
|
||||
Complete guide to browser automation with Puppeteer - a high-level API over Chrome DevTools Protocol.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Setup](#setup)
|
||||
- [Browser & Page Management](#browser--page-management)
|
||||
- [Navigation](#navigation)
|
||||
- [Element Interaction](#element-interaction)
|
||||
- [JavaScript Execution](#javascript-execution)
|
||||
- [Screenshots & PDFs](#screenshots--pdfs)
|
||||
- [Network Interception](#network-interception)
|
||||
- [Device Emulation](#device-emulation)
|
||||
- [Performance](#performance)
|
||||
- [Common Patterns](#common-patterns)
|
||||
|
||||
---
|
||||
|
||||
## Setup
|
||||
|
||||
### Installation
|
||||
|
||||
```bash
|
||||
# Install Puppeteer
|
||||
npm install puppeteer
|
||||
|
||||
# Install core only (bring your own Chrome)
|
||||
npm install puppeteer-core
|
||||
```
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```javascript
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
// Launch browser
|
||||
const browser = await puppeteer.launch({
|
||||
headless: true,
|
||||
args: ['--no-sandbox']
|
||||
});
|
||||
|
||||
// Open page
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Navigate
|
||||
await page.goto('https://example.com');
|
||||
|
||||
// Do work...
|
||||
|
||||
// Cleanup
|
||||
await browser.close();
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Browser & Page Management
|
||||
|
||||
### Launch Browser
|
||||
|
||||
```javascript
|
||||
const browser = await puppeteer.launch({
|
||||
// Visibility
|
||||
headless: false, // Show browser UI
|
||||
headless: 'new', // New headless mode (Chrome 112+)
|
||||
|
||||
// Chrome location
|
||||
executablePath: '/path/to/chrome',
|
||||
channel: 'chrome', // or 'chrome-canary', 'chrome-beta'
|
||||
|
||||
// Browser context
|
||||
userDataDir: './user-data', // Persistent profile
|
||||
|
||||
// Window size
|
||||
defaultViewport: {
|
||||
width: 1920,
|
||||
height: 1080,
|
||||
deviceScaleFactor: 1,
|
||||
isMobile: false
|
||||
},
|
||||
|
||||
// Advanced options
|
||||
args: [
|
||||
'--no-sandbox',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-web-security',
|
||||
'--disable-features=IsolateOrigins',
|
||||
'--disable-site-isolation-trials',
|
||||
'--start-maximized'
|
||||
],
|
||||
|
||||
// Debugging
|
||||
devtools: true, // Open DevTools automatically
|
||||
slowMo: 250, // Slow down by 250ms per action
|
||||
|
||||
// Network
|
||||
proxy: {
|
||||
server: 'http://proxy.com:8080'
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Connect to Running Browser
|
||||
|
||||
```javascript
|
||||
// Launch Chrome with debugging
|
||||
// google-chrome --remote-debugging-port=9222
|
||||
|
||||
const browser = await puppeteer.connect({
|
||||
browserURL: 'http://localhost:9222',
|
||||
// or browserWSEndpoint: 'ws://localhost:9222/devtools/browser/...'
|
||||
});
|
||||
```
|
||||
|
||||
### Page Management
|
||||
|
||||
```javascript
|
||||
// Create new page
|
||||
const page = await browser.newPage();
|
||||
|
||||
// Get all pages
|
||||
const pages = await browser.pages();
|
||||
|
||||
// Close page
|
||||
await page.close();
|
||||
|
||||
// Multiple pages
|
||||
const page1 = await browser.newPage();
|
||||
const page2 = await browser.newPage();
|
||||
|
||||
// Switch between pages
|
||||
await page1.bringToFront();
|
||||
```
|
||||
|
||||
### Browser Context (Incognito)
|
||||
|
||||
```javascript
|
||||
// Create isolated context
|
||||
const context = await browser.createBrowserContext();
|
||||
const page = await context.newPage();
|
||||
|
||||
// Cleanup context
|
||||
await context.close();
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Navigation
|
||||
|
||||
### Basic Navigation
|
||||
|
||||
```javascript
|
||||
// Navigate to URL
|
||||
await page.goto('https://example.com');
|
||||
|
||||
// Navigate with options
|
||||
await page.goto('https://example.com', {
|
||||
waitUntil: 'networkidle2', // or 'load', 'domcontentloaded', 'networkidle0'
|
||||
timeout: 30000 // Max wait time (ms)
|
||||
});
|
||||
|
||||
// Reload page
|
||||
await page.reload({ waitUntil: 'networkidle2' });
|
||||
|
||||
// Navigation history
|
||||
await page.goBack();
|
||||
await page.goForward();
|
||||
|
||||
// Wait for navigation
|
||||
await page.waitForNavigation({
|
||||
waitUntil: 'networkidle2'
|
||||
});
|
||||
```
|
||||
|
||||
### Wait Until Options
|
||||
|
||||
- `load` - Wait for load event
|
||||
- `domcontentloaded` - Wait for DOMContentLoaded event
|
||||
- `networkidle0` - Wait until no network connections for 500ms
|
||||
- `networkidle2` - Wait until max 2 network connections for 500ms
|
||||
|
||||
---
|
||||
|
||||
## Element Interaction
|
||||
|
||||
### Selectors
|
||||
|
||||
```javascript
|
||||
// CSS selectors
|
||||
await page.$('#id');
|
||||
await page.$('.class');
|
||||
await page.$('div > p');
|
||||
|
||||
// XPath
|
||||
await page.$x('//button[text()="Submit"]');
|
||||
|
||||
// Get all matching elements
|
||||
await page.$$('.item');
|
||||
await page.$$x('//div[@class="item"]');
|
||||
```
|
||||
|
||||
### Click Elements
|
||||
|
||||
```javascript
|
||||
// Click by selector
|
||||
await page.click('.button');
|
||||
|
||||
// Click with options
|
||||
await page.click('.button', {
|
||||
button: 'left', // or 'right', 'middle'
|
||||
clickCount: 1, // 2 for double-click
|
||||
delay: 100 // Delay between mousedown and mouseup
|
||||
});
|
||||
|
||||
// ElementHandle click
|
||||
const button = await page.$('.button');
|
||||
await button.click();
|
||||
```
|
||||
|
||||
### Type Text
|
||||
|
||||
```javascript
|
||||
// Type into input
|
||||
await page.type('#search', 'query text');
|
||||
|
||||
// Type with delay
|
||||
await page.type('#search', 'slow typing', { delay: 100 });
|
||||
|
||||
// Clear and type
|
||||
await page.$eval('#search', el => el.value = '');
|
||||
await page.type('#search', 'new text');
|
||||
```
|
||||
|
||||
### Form Interaction
|
||||
|
||||
```javascript
|
||||
// Fill input
|
||||
await page.type('#username', 'john@example.com');
|
||||
await page.type('#password', 'secret123');
|
||||
|
||||
// Select dropdown option
|
||||
await page.select('#country', 'US'); // By value
|
||||
await page.select('#country', 'USA', 'UK'); // Multiple
|
||||
|
||||
// Check/uncheck checkbox
|
||||
await page.click('input[type="checkbox"]');
|
||||
|
||||
// Choose radio button
|
||||
await page.click('input[value="option2"]');
|
||||
|
||||
// Upload file
|
||||
const input = await page.$('input[type="file"]');
|
||||
await input.uploadFile('/path/to/file.pdf');
|
||||
|
||||
// Submit form
|
||||
await page.click('button[type="submit"]');
|
||||
await page.waitForNavigation();
|
||||
```
|
||||
|
||||
### Hover & Focus
|
||||
|
||||
```javascript
|
||||
// Hover over element
|
||||
await page.hover('.menu-item');
|
||||
|
||||
// Focus element
|
||||
await page.focus('#input');
|
||||
|
||||
// Blur
|
||||
await page.$eval('#input', el => el.blur());
|
||||
```
|
||||
|
||||
### Drag & Drop
|
||||
|
||||
```javascript
|
||||
const source = await page.$('.draggable');
|
||||
const target = await page.$('.drop-zone');
|
||||
|
||||
await source.drag(target);
|
||||
await source.drop(target);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## JavaScript Execution
|
||||
|
||||
### Evaluate in Page Context
|
||||
|
||||
```javascript
|
||||
// Execute JavaScript
|
||||
const title = await page.evaluate(() => document.title);
|
||||
|
||||
// With arguments
|
||||
const text = await page.evaluate(
|
||||
(selector) => document.querySelector(selector).textContent,
|
||||
'.heading'
|
||||
);
|
||||
|
||||
// Return complex data
|
||||
const data = await page.evaluate(() => ({
|
||||
title: document.title,
|
||||
url: location.href,
|
||||
cookies: document.cookie
|
||||
}));
|
||||
|
||||
// With ElementHandle
|
||||
const element = await page.$('.button');
|
||||
const text = await page.evaluate(el => el.textContent, element);
|
||||
```
|
||||
|
||||
### Query & Modify DOM
|
||||
|
||||
```javascript
|
||||
// Get element property
|
||||
const value = await page.$eval('#input', el => el.value);
|
||||
|
||||
// Get multiple elements
|
||||
const items = await page.$$eval('.item', elements =>
|
||||
elements.map(el => el.textContent)
|
||||
);
|
||||
|
||||
// Modify element
|
||||
await page.$eval('#input', (el, value) => {
|
||||
el.value = value;
|
||||
}, 'new value');
|
||||
|
||||
// Add class
|
||||
await page.$eval('.element', el => el.classList.add('active'));
|
||||
```
|
||||
|
||||
### Expose Functions
|
||||
|
||||
```javascript
|
||||
// Expose Node.js function to page
|
||||
await page.exposeFunction('md5', (text) =>
|
||||
crypto.createHash('md5').update(text).digest('hex')
|
||||
);
|
||||
|
||||
// Call from page context
|
||||
const hash = await page.evaluate(async () => {
|
||||
return await window.md5('hello world');
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Screenshots & PDFs
|
||||
|
||||
### Screenshots
|
||||
|
||||
```javascript
|
||||
// Full page screenshot
|
||||
await page.screenshot({
|
||||
path: 'screenshot.png',
|
||||
fullPage: true
|
||||
});
|
||||
|
||||
// Viewport screenshot
|
||||
await page.screenshot({
|
||||
path: 'viewport.png',
|
||||
fullPage: false
|
||||
});
|
||||
|
||||
// Element screenshot
|
||||
const element = await page.$('.chart');
|
||||
await element.screenshot({
|
||||
path: 'chart.png'
|
||||
});
|
||||
|
||||
// Screenshot options
|
||||
await page.screenshot({
|
||||
path: 'page.png',
|
||||
type: 'png', // or 'jpeg', 'webp'
|
||||
quality: 80, // JPEG quality (0-100)
|
||||
clip: { // Crop region
|
||||
x: 0,
|
||||
y: 0,
|
||||
width: 500,
|
||||
height: 500
|
||||
},
|
||||
omitBackground: true // Transparent background
|
||||
});
|
||||
|
||||
// Screenshot to buffer
|
||||
const buffer = await page.screenshot();
|
||||
```
|
||||
|
||||
### PDF Generation
|
||||
|
||||
```javascript
|
||||
// Generate PDF
|
||||
await page.pdf({
|
||||
path: 'page.pdf',
|
||||
format: 'A4', // or 'Letter', 'Legal', etc.
|
||||
printBackground: true,
|
||||
margin: {
|
||||
top: '1cm',
|
||||
right: '1cm',
|
||||
bottom: '1cm',
|
||||
left: '1cm'
|
||||
}
|
||||
});
|
||||
|
||||
// Custom page size
|
||||
await page.pdf({
|
||||
path: 'custom.pdf',
|
||||
width: '8.5in',
|
||||
height: '11in',
|
||||
landscape: true
|
||||
});
|
||||
|
||||
// Header and footer
|
||||
await page.pdf({
|
||||
path: 'report.pdf',
|
||||
displayHeaderFooter: true,
|
||||
headerTemplate: '<div style="font-size:10px;">Header</div>',
|
||||
footerTemplate: '<div style="font-size:10px;">Page <span class="pageNumber"></span></div>'
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Network Interception
|
||||
|
||||
### Request Interception
|
||||
|
||||
```javascript
|
||||
// Enable request interception
|
||||
await page.setRequestInterception(true);
|
||||
|
||||
// Intercept requests
|
||||
page.on('request', (request) => {
|
||||
// Block specific resource types
|
||||
if (request.resourceType() === 'image') {
|
||||
request.abort();
|
||||
}
|
||||
// Block URLs
|
||||
else if (request.url().includes('ads')) {
|
||||
request.abort();
|
||||
}
|
||||
// Modify request
|
||||
else if (request.url().includes('api')) {
|
||||
request.continue({
|
||||
headers: {
|
||||
...request.headers(),
|
||||
'Authorization': 'Bearer token'
|
||||
}
|
||||
});
|
||||
}
|
||||
// Continue normally
|
||||
else {
|
||||
request.continue();
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Mock Responses
|
||||
|
||||
```javascript
|
||||
await page.setRequestInterception(true);
|
||||
|
||||
page.on('request', (request) => {
|
||||
if (request.url().includes('/api/user')) {
|
||||
request.respond({
|
||||
status: 200,
|
||||
contentType: 'application/json',
|
||||
body: JSON.stringify({
|
||||
id: 1,
|
||||
name: 'Mock User'
|
||||
})
|
||||
});
|
||||
} else {
|
||||
request.continue();
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
### Monitor Network
|
||||
|
||||
```javascript
|
||||
// Track requests
|
||||
page.on('request', (request) => {
|
||||
console.log('Request:', request.method(), request.url());
|
||||
});
|
||||
|
||||
// Track responses
|
||||
page.on('response', (response) => {
|
||||
console.log('Response:', response.status(), response.url());
|
||||
});
|
||||
|
||||
// Track failed requests
|
||||
page.on('requestfailed', (request) => {
|
||||
console.log('Failed:', request.failure().errorText, request.url());
|
||||
});
|
||||
|
||||
// Get response body
|
||||
page.on('response', async (response) => {
|
||||
if (response.url().includes('/api/data')) {
|
||||
const json = await response.json();
|
||||
console.log('API Data:', json);
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Device Emulation
|
||||
|
||||
### Predefined Devices
|
||||
|
||||
```javascript
|
||||
import { devices } from 'puppeteer';
|
||||
|
||||
// Emulate iPhone
|
||||
const iPhone = devices['iPhone 13 Pro'];
|
||||
await page.emulate(iPhone);
|
||||
|
||||
// Common devices
|
||||
const iPad = devices['iPad Pro'];
|
||||
const pixel = devices['Pixel 5'];
|
||||
const galaxy = devices['Galaxy S9+'];
|
||||
|
||||
// Navigate after emulation
|
||||
await page.goto('https://example.com');
|
||||
```
|
||||
|
||||
### Custom Device
|
||||
|
||||
```javascript
|
||||
await page.emulate({
|
||||
viewport: {
|
||||
width: 375,
|
||||
height: 812,
|
||||
deviceScaleFactor: 3,
|
||||
isMobile: true,
|
||||
hasTouch: true,
|
||||
isLandscape: false
|
||||
},
|
||||
userAgent: 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X)...'
|
||||
});
|
||||
```
|
||||
|
||||
### Viewport Only
|
||||
|
||||
```javascript
|
||||
await page.setViewport({
|
||||
width: 1920,
|
||||
height: 1080,
|
||||
deviceScaleFactor: 1
|
||||
});
|
||||
```
|
||||
|
||||
### Geolocation
|
||||
|
||||
```javascript
|
||||
// Set geolocation
|
||||
await page.setGeolocation({
|
||||
latitude: 37.7749,
|
||||
longitude: -122.4194,
|
||||
accuracy: 100
|
||||
});
|
||||
|
||||
// Grant permissions
|
||||
const context = browser.defaultBrowserContext();
|
||||
await context.overridePermissions('https://example.com', ['geolocation']);
|
||||
```
|
||||
|
||||
### Timezone & Locale
|
||||
|
||||
```javascript
|
||||
// Set timezone
|
||||
await page.emulateTimezone('America/New_York');
|
||||
|
||||
// Set locale
|
||||
await page.emulateMediaType('screen');
|
||||
await page.evaluateOnNewDocument(() => {
|
||||
Object.defineProperty(navigator, 'language', {
|
||||
get: () => 'en-US'
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance
|
||||
|
||||
### CPU & Network Throttling
|
||||
|
||||
```javascript
|
||||
// CPU throttling
|
||||
const client = await page.createCDPSession();
|
||||
await client.send('Emulation.setCPUThrottlingRate', { rate: 4 });
|
||||
|
||||
// Network throttling
|
||||
await page.emulateNetworkConditions({
|
||||
offline: false,
|
||||
downloadThroughput: 1.5 * 1024 * 1024 / 8, // 1.5 Mbps
|
||||
uploadThroughput: 750 * 1024 / 8, // 750 Kbps
|
||||
latency: 40 // 40ms RTT
|
||||
});
|
||||
|
||||
// Predefined profiles
|
||||
await page.emulateNetworkConditions(
|
||||
puppeteer.networkConditions['Fast 3G']
|
||||
);
|
||||
|
||||
// Disable throttling
|
||||
await page.emulateNetworkConditions({
|
||||
offline: false,
|
||||
downloadThroughput: -1,
|
||||
uploadThroughput: -1,
|
||||
latency: 0
|
||||
});
|
||||
```
|
||||
|
||||
### Performance Metrics
|
||||
|
||||
```javascript
|
||||
// Get metrics
|
||||
const metrics = await page.metrics();
|
||||
console.log(metrics);
|
||||
// {
|
||||
// Timestamp, Documents, Frames, JSEventListeners,
|
||||
// Nodes, LayoutCount, RecalcStyleCount,
|
||||
// LayoutDuration, RecalcStyleDuration,
|
||||
// ScriptDuration, TaskDuration,
|
||||
// JSHeapUsedSize, JSHeapTotalSize
|
||||
// }
|
||||
```
|
||||
|
||||
### Performance Tracing
|
||||
|
||||
```javascript
|
||||
// Start tracing
|
||||
await page.tracing.start({
|
||||
path: 'trace.json',
|
||||
categories: [
|
||||
'devtools.timeline',
|
||||
'disabled-by-default-devtools.timeline'
|
||||
]
|
||||
});
|
||||
|
||||
// Navigate
|
||||
await page.goto('https://example.com');
|
||||
|
||||
// Stop tracing
|
||||
await page.tracing.stop();
|
||||
|
||||
// Analyze trace in chrome://tracing
|
||||
```
|
||||
|
||||
### Coverage (Code Usage)
|
||||
|
||||
```javascript
|
||||
// Start JS coverage
|
||||
await page.coverage.startJSCoverage();
|
||||
|
||||
// Start CSS coverage
|
||||
await page.coverage.startCSSCoverage();
|
||||
|
||||
// Navigate
|
||||
await page.goto('https://example.com');
|
||||
|
||||
// Stop and get coverage
|
||||
const jsCoverage = await page.coverage.stopJSCoverage();
|
||||
const cssCoverage = await page.coverage.stopCSSCoverage();
|
||||
|
||||
// Calculate unused bytes
|
||||
let totalBytes = 0;
|
||||
let usedBytes = 0;
|
||||
for (const entry of [...jsCoverage, ...cssCoverage]) {
|
||||
totalBytes += entry.text.length;
|
||||
for (const range of entry.ranges) {
|
||||
usedBytes += range.end - range.start - 1;
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Used: ${usedBytes / totalBytes * 100}%`);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Wait for Elements
|
||||
|
||||
```javascript
|
||||
// Wait for selector
|
||||
await page.waitForSelector('.element', {
|
||||
visible: true,
|
||||
timeout: 5000
|
||||
});
|
||||
|
||||
// Wait for XPath
|
||||
await page.waitForXPath('//button[text()="Submit"]');
|
||||
|
||||
// Wait for function
|
||||
await page.waitForFunction(
|
||||
() => document.querySelector('.loading') === null,
|
||||
{ timeout: 10000 }
|
||||
);
|
||||
|
||||
// Wait for timeout
|
||||
await page.waitForTimeout(2000);
|
||||
```
|
||||
|
||||
### Handle Dialogs
|
||||
|
||||
```javascript
|
||||
// Alert, confirm, prompt
|
||||
page.on('dialog', async (dialog) => {
|
||||
console.log(dialog.type(), dialog.message());
|
||||
|
||||
// Accept
|
||||
await dialog.accept();
|
||||
// or reject
|
||||
// await dialog.dismiss();
|
||||
// or provide input for prompt
|
||||
// await dialog.accept('input text');
|
||||
});
|
||||
```
|
||||
|
||||
### Handle Downloads
|
||||
|
||||
```javascript
|
||||
// Set download path
|
||||
const client = await page.createCDPSession();
|
||||
await client.send('Page.setDownloadBehavior', {
|
||||
behavior: 'allow',
|
||||
downloadPath: '/path/to/downloads'
|
||||
});
|
||||
|
||||
// Trigger download
|
||||
await page.click('a[download]');
|
||||
```
|
||||
|
||||
### Multiple Pages (Tabs)
|
||||
|
||||
```javascript
|
||||
// Listen for new pages
|
||||
browser.on('targetcreated', async (target) => {
|
||||
if (target.type() === 'page') {
|
||||
const newPage = await target.page();
|
||||
console.log('New page opened:', newPage.url());
|
||||
}
|
||||
});
|
||||
|
||||
// Click link that opens new tab
|
||||
const [newPage] = await Promise.all([
|
||||
new Promise(resolve => browser.once('targetcreated', target => resolve(target.page()))),
|
||||
page.click('a[target="_blank"]')
|
||||
]);
|
||||
|
||||
console.log('New page URL:', newPage.url());
|
||||
```
|
||||
|
||||
### Frames (iframes)
|
||||
|
||||
```javascript
|
||||
// Get all frames
|
||||
const frames = page.frames();
|
||||
|
||||
// Find frame by name
|
||||
const frame = page.frames().find(f => f.name() === 'myframe');
|
||||
|
||||
// Find frame by URL
|
||||
const frame = page.frames().find(f => f.url().includes('example.com'));
|
||||
|
||||
// Main frame
|
||||
const mainFrame = page.mainFrame();
|
||||
|
||||
// Interact with frame
|
||||
await frame.click('.button');
|
||||
await frame.type('#input', 'text');
|
||||
```
|
||||
|
||||
### Infinite Scroll
|
||||
|
||||
```javascript
|
||||
async function autoScroll(page) {
|
||||
await page.evaluate(async () => {
|
||||
await new Promise((resolve) => {
|
||||
let totalHeight = 0;
|
||||
const distance = 100;
|
||||
const timer = setInterval(() => {
|
||||
const scrollHeight = document.body.scrollHeight;
|
||||
window.scrollBy(0, distance);
|
||||
totalHeight += distance;
|
||||
|
||||
if (totalHeight >= scrollHeight) {
|
||||
clearInterval(timer);
|
||||
resolve();
|
||||
}
|
||||
}, 100);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
await autoScroll(page);
|
||||
```
|
||||
|
||||
### Cookies
|
||||
|
||||
```javascript
|
||||
// Get cookies
|
||||
const cookies = await page.cookies();
|
||||
|
||||
// Set cookies
|
||||
await page.setCookie({
|
||||
name: 'session',
|
||||
value: 'abc123',
|
||||
domain: 'example.com',
|
||||
path: '/',
|
||||
httpOnly: true,
|
||||
secure: true,
|
||||
sameSite: 'Strict'
|
||||
});
|
||||
|
||||
// Delete cookies
|
||||
await page.deleteCookie({ name: 'session' });
|
||||
```
|
||||
|
||||
### Local Storage
|
||||
|
||||
```javascript
|
||||
// Set localStorage
|
||||
await page.evaluate(() => {
|
||||
localStorage.setItem('key', 'value');
|
||||
});
|
||||
|
||||
// Get localStorage
|
||||
const value = await page.evaluate(() => {
|
||||
return localStorage.getItem('key');
|
||||
});
|
||||
|
||||
// Clear localStorage
|
||||
await page.evaluate(() => localStorage.clear());
|
||||
```
|
||||
|
||||
### Error Handling
|
||||
|
||||
```javascript
|
||||
try {
|
||||
await page.goto('https://example.com', {
|
||||
waitUntil: 'networkidle2',
|
||||
timeout: 30000
|
||||
});
|
||||
} catch (error) {
|
||||
if (error.name === 'TimeoutError') {
|
||||
console.error('Page load timeout');
|
||||
} else {
|
||||
console.error('Navigation failed:', error);
|
||||
}
|
||||
|
||||
// Take screenshot on error
|
||||
await page.screenshot({ path: 'error.png' });
|
||||
}
|
||||
```
|
||||
|
||||
### Stealth Mode (Avoid Detection)
|
||||
|
||||
```javascript
|
||||
// Hide automation indicators
|
||||
await page.evaluateOnNewDocument(() => {
|
||||
// Override navigator.webdriver
|
||||
Object.defineProperty(navigator, 'webdriver', {
|
||||
get: () => false
|
||||
});
|
||||
|
||||
// Mock chrome object
|
||||
window.chrome = {
|
||||
runtime: {}
|
||||
};
|
||||
|
||||
// Mock permissions
|
||||
const originalQuery = window.navigator.permissions.query;
|
||||
window.navigator.permissions.query = (parameters) => (
|
||||
parameters.name === 'notifications' ?
|
||||
Promise.resolve({ state: 'granted' }) :
|
||||
originalQuery(parameters)
|
||||
);
|
||||
});
|
||||
|
||||
// Set realistic user agent
|
||||
await page.setUserAgent(
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||
);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Debugging Tips
|
||||
|
||||
### Take Screenshots on Error
|
||||
|
||||
```javascript
|
||||
page.on('pageerror', async (error) => {
|
||||
console.error('Page error:', error);
|
||||
await page.screenshot({ path: `error-${Date.now()}.png` });
|
||||
});
|
||||
```
|
||||
|
||||
### Console Logging
|
||||
|
||||
```javascript
|
||||
// Forward console to Node
|
||||
page.on('console', (msg) => {
|
||||
console.log('PAGE LOG:', msg.text());
|
||||
});
|
||||
```
|
||||
|
||||
### Slow Down Execution
|
||||
|
||||
```javascript
|
||||
const browser = await puppeteer.launch({
|
||||
slowMo: 250 // 250ms delay between actions
|
||||
});
|
||||
```
|
||||
|
||||
### Keep Browser Open
|
||||
|
||||
```javascript
|
||||
const browser = await puppeteer.launch({
|
||||
headless: false,
|
||||
devtools: true
|
||||
});
|
||||
|
||||
// Prevent auto-close
|
||||
await page.evaluate(() => debugger);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Always close browser:** Use try/finally or process cleanup
|
||||
2. **Wait appropriately:** Use waitForSelector, not setTimeout
|
||||
3. **Handle errors:** Wrap navigation in try/catch
|
||||
4. **Optimize selectors:** Use specific selectors for reliability
|
||||
5. **Avoid race conditions:** Wait for navigation after clicks
|
||||
6. **Reuse pages:** Don't create new pages unnecessarily
|
||||
7. **Set timeouts:** Always specify reasonable timeouts
|
||||
8. **Clean up:** Close unused pages and contexts
|
||||
|
||||
---
|
||||
|
||||
## Resources
|
||||
|
||||
- [Puppeteer Documentation](https://pptr.dev/)
|
||||
- [Puppeteer API](https://pptr.dev/api)
|
||||
- [Puppeteer Examples](https://github.com/puppeteer/puppeteer/tree/main/examples)
|
||||
- [Awesome Puppeteer](https://github.com/transitive-bullshit/awesome-puppeteer)
|
||||
3
.opencode/skills/chrome-devtools/scripts/.gitignore
vendored
Normal file
3
.opencode/skills/chrome-devtools/scripts/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
node_modules
|
||||
.browser-session.json
|
||||
.auth-session.json
|
||||
290
.opencode/skills/chrome-devtools/scripts/README.md
Normal file
290
.opencode/skills/chrome-devtools/scripts/README.md
Normal file
@@ -0,0 +1,290 @@
|
||||
# Chrome DevTools Scripts
|
||||
|
||||
CLI scripts for browser automation using Puppeteer.
|
||||
|
||||
**CRITICAL**: Always check `pwd` before running scripts.
|
||||
|
||||
## Installation
|
||||
|
||||
## Skill Location
|
||||
|
||||
Skills can exist in **project-scope** or **user-scope**. Priority: project-scope > user-scope.
|
||||
|
||||
```bash
|
||||
# Detect skill location
|
||||
SKILL_DIR=""
|
||||
if [ -d ".opencode/skills/chrome-devtools/scripts" ]; then
|
||||
SKILL_DIR=".opencode/skills/chrome-devtools/scripts"
|
||||
elif [ -d "$HOME/.opencode/skills/chrome-devtools/scripts" ]; then
|
||||
SKILL_DIR="$HOME/.opencode/skills/chrome-devtools/scripts"
|
||||
fi
|
||||
cd "$SKILL_DIR"
|
||||
```
|
||||
|
||||
### Quick Install
|
||||
|
||||
```bash
|
||||
pwd # Should show current working directory
|
||||
cd $SKILL_DIR/.opencode/skills/chrome-devtools/scripts
|
||||
./install.sh # Auto-checks dependencies and installs
|
||||
```
|
||||
|
||||
### Manual Installation
|
||||
|
||||
**Linux/WSL** - Install system dependencies first:
|
||||
```bash
|
||||
./install-deps.sh # Auto-detects OS (Ubuntu, Debian, Fedora, etc.)
|
||||
```
|
||||
|
||||
Or manually:
|
||||
```bash
|
||||
sudo apt-get install -y libnss3 libnspr4 libasound2t64 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libxkbcommon0 libxcomposite1 libxdamage1 libxfixes3 libxrandr2 libgbm1
|
||||
```
|
||||
|
||||
**All platforms** - Install Node dependencies:
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
## Scripts
|
||||
|
||||
**CRITICAL**: Always check `pwd` before running scripts.
|
||||
|
||||
### inject-auth.js
|
||||
Inject authentication (cookies, tokens, storage) for testing protected routes.
|
||||
|
||||
**Workflow for testing protected routes:**
|
||||
1. User manually logs into the site in their browser
|
||||
2. User extracts cookies/tokens from browser DevTools (Application tab)
|
||||
3. Run inject-auth.js to inject auth into puppeteer session
|
||||
4. Run other scripts which will use the authenticated session
|
||||
|
||||
```bash
|
||||
# Inject cookies
|
||||
node inject-auth.js --url https://example.com --cookies '[{"name":"session","value":"abc123","domain":".example.com"}]'
|
||||
|
||||
# Inject Bearer token (stores in localStorage + sets HTTP header)
|
||||
node inject-auth.js --url https://example.com --token "Bearer eyJhbGciOi..." --header Authorization
|
||||
|
||||
# Inject localStorage items
|
||||
node inject-auth.js --url https://example.com --local-storage '{"auth_token":"xyz","user_id":"123"}'
|
||||
|
||||
# Inject sessionStorage items
|
||||
node inject-auth.js --url https://example.com --session-storage '{"temp_key":"value"}'
|
||||
|
||||
# Combined injection
|
||||
node inject-auth.js --url https://example.com \
|
||||
--cookies '[{"name":"session","value":"abc"}]' \
|
||||
--local-storage '{"user":"data"}' \
|
||||
--reload true
|
||||
|
||||
# Clear saved auth session
|
||||
node inject-auth.js --url https://example.com --cookies '[]' --clear true
|
||||
```
|
||||
|
||||
Options:
|
||||
- `--cookies '<json>'` - JSON array of cookie objects (name, value, domain required)
|
||||
- `--token '<token>'` - Bearer token to inject
|
||||
- `--token-key '<key>'` - localStorage key for token (default: access_token)
|
||||
- `--header '<name>'` - HTTP header name for token (e.g., Authorization)
|
||||
- `--local-storage '<json>'` - JSON object of localStorage key-value pairs
|
||||
- `--session-storage '<json>'` - JSON object of sessionStorage key-value pairs
|
||||
- `--reload true` - Reload page after injection to apply auth
|
||||
- `--clear true` - Clear the saved auth session file
|
||||
|
||||
**Session persistence:** Auth is saved to `.auth-session.json` (valid 24h) and automatically applied by subsequent script runs until `--clear true` is used or browser closes.
|
||||
|
||||
### navigate.js
|
||||
Navigate to a URL.
|
||||
|
||||
```bash
|
||||
node navigate.js --url https://example.com [--wait-until networkidle2] [--timeout 30000]
|
||||
```
|
||||
|
||||
### screenshot.js
|
||||
Take a screenshot with automatic compression.
|
||||
|
||||
**Important**: Always save screenshots to `./docs/screenshots` directory.
|
||||
|
||||
```bash
|
||||
node screenshot.js --output screenshot.png [--url https://example.com] [--full-page true] [--selector .element] [--max-size 5] [--no-compress]
|
||||
```
|
||||
|
||||
**Automatic Compression**: Screenshots >5MB are automatically compressed using ImageMagick to ensure compatibility with Gemini API and Claude Code. Install ImageMagick for this feature:
|
||||
- macOS: `brew install imagemagick`
|
||||
- Linux: `sudo apt-get install imagemagick`
|
||||
|
||||
Options:
|
||||
- `--max-size N` - Custom size threshold in MB (default: 5)
|
||||
- `--no-compress` - Disable automatic compression
|
||||
- `--format png|jpeg` - Output format (default: png)
|
||||
- `--quality N` - JPEG quality 0-100 (default: auto)
|
||||
|
||||
### click.js
|
||||
Click an element.
|
||||
|
||||
```bash
|
||||
node click.js --selector ".button" [--url https://example.com] [--wait-for ".result"]
|
||||
```
|
||||
|
||||
### fill.js
|
||||
Fill form fields.
|
||||
|
||||
```bash
|
||||
node fill.js --selector "#input" --value "text" [--url https://example.com] [--clear true]
|
||||
```
|
||||
|
||||
### evaluate.js
|
||||
Execute JavaScript in page context.
|
||||
|
||||
```bash
|
||||
node evaluate.js --script "document.title" [--url https://example.com]
|
||||
```
|
||||
|
||||
### snapshot.js
|
||||
Get DOM snapshot with interactive elements.
|
||||
|
||||
```bash
|
||||
node snapshot.js [--url https://example.com] [--output snapshot.json]
|
||||
```
|
||||
|
||||
### console.js
|
||||
Monitor console messages.
|
||||
|
||||
```bash
|
||||
node console.js --url https://example.com [--types error,warn] [--duration 5000]
|
||||
```
|
||||
|
||||
### network.js
|
||||
Monitor network requests.
|
||||
|
||||
```bash
|
||||
node network.js --url https://example.com [--types xhr,fetch] [--output requests.json]
|
||||
```
|
||||
|
||||
### performance.js
|
||||
Measure performance metrics and record trace.
|
||||
|
||||
```bash
|
||||
node performance.js --url https://example.com [--trace trace.json] [--metrics] [--resources true]
|
||||
```
|
||||
|
||||
### ws-debug.js
|
||||
Debug WebSocket connections (basic mode).
|
||||
|
||||
```bash
|
||||
node ws-debug.js
|
||||
```
|
||||
|
||||
Monitors WebSocket events via CDP: created, handshake, response, closed, error.
|
||||
|
||||
### ws-full-debug.js
|
||||
Debug WebSocket connections with full event tracking.
|
||||
|
||||
```bash
|
||||
node ws-full-debug.js
|
||||
```
|
||||
|
||||
Monitors all WebSocket events including frame sent/received, with detailed logging.
|
||||
|
||||
## Common Options
|
||||
|
||||
- `--headless false` - Show browser window
|
||||
- `--close false` - Keep browser open
|
||||
- `--timeout 30000` - Set timeout in milliseconds
|
||||
- `--wait-until networkidle2` - Wait strategy (load, domcontentloaded, networkidle0, networkidle2)
|
||||
|
||||
## Selector Support
|
||||
|
||||
Scripts that accept `--selector` (click.js, fill.js, screenshot.js) support both **CSS** and **XPath** selectors.
|
||||
|
||||
### CSS Selectors (Default)
|
||||
|
||||
```bash
|
||||
# Element tag
|
||||
node click.js --selector "button" --url https://example.com
|
||||
|
||||
# Class selector
|
||||
node click.js --selector ".btn-submit" --url https://example.com
|
||||
|
||||
# ID selector
|
||||
node fill.js --selector "#email" --value "user@example.com" --url https://example.com
|
||||
|
||||
# Attribute selector
|
||||
node click.js --selector 'button[type="submit"]' --url https://example.com
|
||||
|
||||
# Complex selector
|
||||
node screenshot.js --selector "div.container > button.btn-primary" --output btn.png
|
||||
```
|
||||
|
||||
### XPath Selectors
|
||||
|
||||
XPath selectors start with `/` or `(//` and are automatically detected:
|
||||
|
||||
```bash
|
||||
# Text matching - exact
|
||||
node click.js --selector '//button[text()="Submit"]' --url https://example.com
|
||||
|
||||
# Text matching - contains
|
||||
node click.js --selector '//button[contains(text(),"Submit")]' --url https://example.com
|
||||
|
||||
# Attribute matching
|
||||
node fill.js --selector '//input[@type="email"]' --value "user@example.com"
|
||||
|
||||
# Multiple conditions
|
||||
node click.js --selector '//button[@type="submit" and contains(text(),"Save")]'
|
||||
|
||||
# Descendant selection
|
||||
node screenshot.js --selector '//div[@class="modal"]//button[@class="close"]' --output modal.png
|
||||
|
||||
# Nth element
|
||||
node click.js --selector '(//button)[2]' # Second button on page
|
||||
```
|
||||
|
||||
### Discovering Selectors
|
||||
|
||||
Use `snapshot.js` to discover correct selectors:
|
||||
|
||||
```bash
|
||||
# Get all interactive elements
|
||||
node snapshot.js --url https://example.com | jq '.elements[]'
|
||||
|
||||
# Find buttons
|
||||
node snapshot.js --url https://example.com | jq '.elements[] | select(.tagName=="BUTTON")'
|
||||
|
||||
# Find inputs
|
||||
node snapshot.js --url https://example.com | jq '.elements[] | select(.tagName=="INPUT")'
|
||||
```
|
||||
|
||||
### Security
|
||||
|
||||
XPath selectors are validated to prevent injection attacks. The following patterns are blocked:
|
||||
- `javascript:`
|
||||
- `<script`
|
||||
- `onerror=`, `onload=`, `onclick=`
|
||||
- `eval(`, `Function(`, `constructor(`
|
||||
|
||||
Selectors exceeding 1000 characters are rejected (DoS prevention).
|
||||
|
||||
## Output Format
|
||||
|
||||
All scripts output JSON to stdout:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"url": "https://example.com",
|
||||
"title": "Example Domain",
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
Errors are output to stderr:
|
||||
|
||||
```json
|
||||
{
|
||||
"success": false,
|
||||
"error": "Error message",
|
||||
"stack": "..."
|
||||
}
|
||||
```
|
||||
@@ -0,0 +1,102 @@
|
||||
/**
|
||||
* Tests for error handling in chrome-devtools scripts
|
||||
* Verifies scripts exit with code 1 on errors
|
||||
* Run with: node --test __tests__/error-handling.test.js
|
||||
*
|
||||
* Note: These tests verify exit code behavior. When puppeteer is not installed,
|
||||
* scripts still exit with code 1 (module not found), which validates the error path.
|
||||
* When puppeteer IS installed, missing --url triggers application-level error with code 1.
|
||||
*/
|
||||
import { describe, it } from 'node:test';
|
||||
import assert from 'node:assert';
|
||||
import { spawn } from 'node:child_process';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import path from 'node:path';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const scriptsDir = path.join(__dirname, '..');
|
||||
|
||||
function runScript(script, args = [], timeout = 10000) {
|
||||
return new Promise((resolve) => {
|
||||
const proc = spawn('node', [path.join(scriptsDir, script), ...args], {
|
||||
timeout,
|
||||
stdio: ['pipe', 'pipe', 'pipe']
|
||||
});
|
||||
|
||||
let stdout = '';
|
||||
let stderr = '';
|
||||
|
||||
proc.stdout.on('data', (data) => { stdout += data; });
|
||||
proc.stderr.on('data', (data) => { stderr += data; });
|
||||
|
||||
proc.on('close', (code) => {
|
||||
resolve({ code, stdout, stderr, combined: stdout + stderr });
|
||||
});
|
||||
|
||||
proc.on('error', (err) => {
|
||||
resolve({ code: 1, stdout, stderr: err.message, combined: err.message });
|
||||
});
|
||||
|
||||
setTimeout(() => {
|
||||
proc.kill('SIGTERM');
|
||||
resolve({ code: null, stdout, stderr, timedOut: true, combined: stdout + stderr });
|
||||
}, timeout);
|
||||
});
|
||||
}
|
||||
|
||||
describe('chrome-devtools error handling', () => {
|
||||
describe('console.js', () => {
|
||||
it('should exit with code 1 when --url is missing or on error', async () => {
|
||||
const result = await runScript('console.js', []);
|
||||
assert.strictEqual(result.code, 1, 'Expected exit code 1');
|
||||
});
|
||||
|
||||
it('should output error information', async () => {
|
||||
const result = await runScript('console.js', []);
|
||||
assert.strictEqual(result.code, 1);
|
||||
// Either app-level error (--url required) or module error (puppeteer not found)
|
||||
const hasError = result.combined.toLowerCase().includes('error') ||
|
||||
result.combined.includes('--url');
|
||||
assert.ok(hasError, 'Expected error in output');
|
||||
});
|
||||
});
|
||||
|
||||
describe('evaluate.js', () => {
|
||||
it('should exit with code 1 when --url is missing or on error', async () => {
|
||||
const result = await runScript('evaluate.js', []);
|
||||
assert.strictEqual(result.code, 1, 'Expected exit code 1');
|
||||
});
|
||||
});
|
||||
|
||||
describe('navigate.js', () => {
|
||||
it('should exit with code 1 when --url is missing or on error', async () => {
|
||||
const result = await runScript('navigate.js', []);
|
||||
assert.strictEqual(result.code, 1, 'Expected exit code 1');
|
||||
});
|
||||
});
|
||||
|
||||
describe('network.js', () => {
|
||||
it('should exit with code 1 when --url is missing or on error', async () => {
|
||||
const result = await runScript('network.js', []);
|
||||
assert.strictEqual(result.code, 1, 'Expected exit code 1');
|
||||
});
|
||||
});
|
||||
|
||||
describe('performance.js', () => {
|
||||
it('should exit with code 1 when --url is missing or on error', async () => {
|
||||
const result = await runScript('performance.js', []);
|
||||
assert.strictEqual(result.code, 1, 'Expected exit code 1');
|
||||
});
|
||||
});
|
||||
|
||||
describe('all scripts exit code consistency', () => {
|
||||
const scripts = ['console.js', 'evaluate.js', 'navigate.js', 'network.js', 'performance.js'];
|
||||
|
||||
for (const script of scripts) {
|
||||
it(`${script} should exit 1 on invalid input or error`, async () => {
|
||||
const result = await runScript(script, []);
|
||||
assert.strictEqual(result.code, 1, `${script} should exit with code 1`);
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,210 @@
|
||||
/**
|
||||
* Tests for selector parsing library
|
||||
* Run with: node --test __tests__/selector.test.js
|
||||
*/
|
||||
import { describe, it } from 'node:test';
|
||||
import assert from 'node:assert';
|
||||
import { parseSelector } from '../lib/selector.js';
|
||||
|
||||
describe('parseSelector', () => {
|
||||
describe('CSS Selectors', () => {
|
||||
it('should detect simple CSS selectors', () => {
|
||||
const result = parseSelector('button');
|
||||
assert.strictEqual(result.type, 'css');
|
||||
assert.strictEqual(result.selector, 'button');
|
||||
});
|
||||
|
||||
it('should detect class selectors', () => {
|
||||
const result = parseSelector('.btn-submit');
|
||||
assert.strictEqual(result.type, 'css');
|
||||
assert.strictEqual(result.selector, '.btn-submit');
|
||||
});
|
||||
|
||||
it('should detect ID selectors', () => {
|
||||
const result = parseSelector('#email-input');
|
||||
assert.strictEqual(result.type, 'css');
|
||||
assert.strictEqual(result.selector, '#email-input');
|
||||
});
|
||||
|
||||
it('should detect attribute selectors', () => {
|
||||
const result = parseSelector('button[type="submit"]');
|
||||
assert.strictEqual(result.type, 'css');
|
||||
assert.strictEqual(result.selector, 'button[type="submit"]');
|
||||
});
|
||||
|
||||
it('should detect complex CSS selectors', () => {
|
||||
const result = parseSelector('div.container > button.btn-primary:hover');
|
||||
assert.strictEqual(result.type, 'css');
|
||||
});
|
||||
});
|
||||
|
||||
describe('XPath Selectors', () => {
|
||||
it('should detect absolute XPath', () => {
|
||||
const result = parseSelector('/html/body/button');
|
||||
assert.strictEqual(result.type, 'xpath');
|
||||
assert.strictEqual(result.selector, '/html/body/button');
|
||||
});
|
||||
|
||||
it('should detect relative XPath', () => {
|
||||
const result = parseSelector('//button');
|
||||
assert.strictEqual(result.type, 'xpath');
|
||||
assert.strictEqual(result.selector, '//button');
|
||||
});
|
||||
|
||||
it('should detect XPath with text matching', () => {
|
||||
const result = parseSelector('//button[text()="Click Me"]');
|
||||
assert.strictEqual(result.type, 'xpath');
|
||||
});
|
||||
|
||||
it('should detect XPath with contains', () => {
|
||||
const result = parseSelector('//button[contains(text(),"Submit")]');
|
||||
assert.strictEqual(result.type, 'xpath');
|
||||
});
|
||||
|
||||
it('should detect XPath with attributes', () => {
|
||||
const result = parseSelector('//input[@type="email"]');
|
||||
assert.strictEqual(result.type, 'xpath');
|
||||
});
|
||||
|
||||
it('should detect grouped XPath', () => {
|
||||
const result = parseSelector('(//button)[1]');
|
||||
assert.strictEqual(result.type, 'xpath');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Security Validation', () => {
|
||||
it('should block javascript: injection', () => {
|
||||
assert.throws(
|
||||
() => parseSelector('//button[@onclick="javascript:alert(1)"]'),
|
||||
/XPath injection detected.*javascript:/i
|
||||
);
|
||||
});
|
||||
|
||||
it('should block <script tag injection', () => {
|
||||
assert.throws(
|
||||
() => parseSelector('//div[contains(text(),"<script>alert(1)</script>")]'),
|
||||
/XPath injection detected.*<script/i
|
||||
);
|
||||
});
|
||||
|
||||
it('should block onerror= injection', () => {
|
||||
assert.throws(
|
||||
() => parseSelector('//img[@onerror="alert(1)"]'),
|
||||
/XPath injection detected.*onerror=/i
|
||||
);
|
||||
});
|
||||
|
||||
it('should block onload= injection', () => {
|
||||
assert.throws(
|
||||
() => parseSelector('//body[@onload="malicious()"]'),
|
||||
/XPath injection detected.*onload=/i
|
||||
);
|
||||
});
|
||||
|
||||
it('should block onclick= injection', () => {
|
||||
assert.throws(
|
||||
() => parseSelector('//a[@onclick="steal()"]'),
|
||||
/XPath injection detected.*onclick=/i
|
||||
);
|
||||
});
|
||||
|
||||
it('should block eval( injection', () => {
|
||||
assert.throws(
|
||||
() => parseSelector('//div[eval("malicious")]'),
|
||||
/XPath injection detected.*eval\(/i
|
||||
);
|
||||
});
|
||||
|
||||
it('should block Function( injection', () => {
|
||||
assert.throws(
|
||||
() => parseSelector('//div[Function("return 1")()]'),
|
||||
/XPath injection detected.*Function\(/i
|
||||
);
|
||||
});
|
||||
|
||||
it('should block constructor( injection', () => {
|
||||
assert.throws(
|
||||
() => parseSelector('//div[constructor("alert(1)")()]'),
|
||||
/XPath injection detected.*constructor\(/i
|
||||
);
|
||||
});
|
||||
|
||||
it('should be case-insensitive for security checks', () => {
|
||||
assert.throws(
|
||||
() => parseSelector('//div[@ONERROR="alert(1)"]'),
|
||||
/XPath injection detected/i
|
||||
);
|
||||
});
|
||||
|
||||
it('should block extremely long selectors (DoS prevention)', () => {
|
||||
const longSelector = '//' + 'a'.repeat(1001);
|
||||
assert.throws(
|
||||
() => parseSelector(longSelector),
|
||||
/XPath selector too long/i
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Edge Cases', () => {
|
||||
it('should throw on empty string', () => {
|
||||
assert.throws(
|
||||
() => parseSelector(''),
|
||||
/Selector must be a non-empty string/
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw on null', () => {
|
||||
assert.throws(
|
||||
() => parseSelector(null),
|
||||
/Selector must be a non-empty string/
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw on undefined', () => {
|
||||
assert.throws(
|
||||
() => parseSelector(undefined),
|
||||
/Selector must be a non-empty string/
|
||||
);
|
||||
});
|
||||
|
||||
it('should throw on non-string input', () => {
|
||||
assert.throws(
|
||||
() => parseSelector(123),
|
||||
/Selector must be a non-empty string/
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle selectors with special characters', () => {
|
||||
const result = parseSelector('button[data-test="submit-form"]');
|
||||
assert.strictEqual(result.type, 'css');
|
||||
});
|
||||
|
||||
it('should allow safe XPath with parentheses', () => {
|
||||
const result = parseSelector('//button[contains(text(),"Save")]');
|
||||
assert.strictEqual(result.type, 'xpath');
|
||||
// Should not throw
|
||||
});
|
||||
});
|
||||
|
||||
describe('Real-World Examples', () => {
|
||||
it('should handle common button selector', () => {
|
||||
const result = parseSelector('//button[contains(text(),"Submit")]');
|
||||
assert.strictEqual(result.type, 'xpath');
|
||||
});
|
||||
|
||||
it('should handle complex form selector', () => {
|
||||
const result = parseSelector('//form[@id="login-form"]//input[@type="email"]');
|
||||
assert.strictEqual(result.type, 'xpath');
|
||||
});
|
||||
|
||||
it('should handle descendant selector', () => {
|
||||
const result = parseSelector('//div[@class="modal"]//button[@class="close"]');
|
||||
assert.strictEqual(result.type, 'xpath');
|
||||
});
|
||||
|
||||
it('should handle nth-child equivalent', () => {
|
||||
const result = parseSelector('(//li)[3]');
|
||||
assert.strictEqual(result.type, 'xpath');
|
||||
});
|
||||
});
|
||||
});
|
||||
363
.opencode/skills/chrome-devtools/scripts/aria-snapshot.js
Executable file
363
.opencode/skills/chrome-devtools/scripts/aria-snapshot.js
Executable file
@@ -0,0 +1,363 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Get ARIA-based accessibility snapshot with stable element refs
|
||||
* Usage: node aria-snapshot.js [--url https://example.com] [--output snapshot.yaml]
|
||||
*
|
||||
* Returns YAML-formatted accessibility tree with:
|
||||
* - Semantic roles (button, link, textbox, heading, etc.)
|
||||
* - Accessible names (what screen readers announce)
|
||||
* - Element states (checked, disabled, expanded)
|
||||
* - Stable refs [ref=eN] that persist for interaction
|
||||
*
|
||||
* Session behavior:
|
||||
* By default, browser stays running for session persistence
|
||||
* Use --close true to fully close browser
|
||||
*/
|
||||
import { getBrowser, getPage, closeBrowser, disconnectBrowser, parseArgs, outputJSON, outputError } from './lib/browser.js';
|
||||
import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
/**
|
||||
* Get ARIA snapshot script to inject into page
|
||||
* Builds YAML-formatted accessibility tree with element references
|
||||
*/
|
||||
function getAriaSnapshotScript() {
|
||||
return `
|
||||
(function() {
|
||||
// Store refs on window for later retrieval via selectRef
|
||||
window.__chromeDevToolsRefs = window.__chromeDevToolsRefs || new Map();
|
||||
let refCounter = window.__chromeDevToolsRefCounter || 1;
|
||||
|
||||
// ARIA roles we care about for interaction
|
||||
const INTERACTIVE_ROLES = new Set([
|
||||
'button', 'link', 'textbox', 'checkbox', 'radio', 'combobox',
|
||||
'listbox', 'option', 'menuitem', 'menuitemcheckbox', 'menuitemradio',
|
||||
'tab', 'switch', 'slider', 'spinbutton', 'searchbox', 'tree', 'treeitem',
|
||||
'grid', 'gridcell', 'row', 'rowheader', 'columnheader'
|
||||
]);
|
||||
|
||||
// Landmark roles for structure
|
||||
const LANDMARK_ROLES = new Set([
|
||||
'banner', 'navigation', 'main', 'complementary', 'contentinfo',
|
||||
'search', 'form', 'region', 'article', 'dialog', 'alertdialog'
|
||||
]);
|
||||
|
||||
// Implicit ARIA roles from HTML elements
|
||||
const IMPLICIT_ROLES = {
|
||||
'A': (el) => el.href ? 'link' : null,
|
||||
'BUTTON': () => 'button',
|
||||
'INPUT': (el) => {
|
||||
const type = el.type?.toLowerCase();
|
||||
if (type === 'checkbox') return 'checkbox';
|
||||
if (type === 'radio') return 'radio';
|
||||
if (type === 'submit' || type === 'button' || type === 'reset') return 'button';
|
||||
if (type === 'search') return 'searchbox';
|
||||
if (type === 'range') return 'slider';
|
||||
if (type === 'number') return 'spinbutton';
|
||||
return 'textbox';
|
||||
},
|
||||
'TEXTAREA': () => 'textbox',
|
||||
'SELECT': () => 'combobox',
|
||||
'OPTION': () => 'option',
|
||||
'IMG': () => 'img',
|
||||
'NAV': () => 'navigation',
|
||||
'MAIN': () => 'main',
|
||||
'HEADER': () => 'banner',
|
||||
'FOOTER': () => 'contentinfo',
|
||||
'ASIDE': () => 'complementary',
|
||||
'ARTICLE': () => 'article',
|
||||
'SECTION': (el) => el.getAttribute('aria-label') || el.getAttribute('aria-labelledby') ? 'region' : null,
|
||||
'FORM': () => 'form',
|
||||
'UL': () => 'list',
|
||||
'OL': () => 'list',
|
||||
'LI': () => 'listitem',
|
||||
'H1': () => 'heading',
|
||||
'H2': () => 'heading',
|
||||
'H3': () => 'heading',
|
||||
'H4': () => 'heading',
|
||||
'H5': () => 'heading',
|
||||
'H6': () => 'heading',
|
||||
'TABLE': () => 'table',
|
||||
'TR': () => 'row',
|
||||
'TH': () => 'columnheader',
|
||||
'TD': () => 'cell',
|
||||
'DIALOG': () => 'dialog'
|
||||
};
|
||||
|
||||
function getRole(el) {
|
||||
// Explicit role takes precedence
|
||||
const explicitRole = el.getAttribute('role');
|
||||
if (explicitRole) return explicitRole;
|
||||
|
||||
// Check implicit role
|
||||
const implicitFn = IMPLICIT_ROLES[el.tagName];
|
||||
if (implicitFn) return implicitFn(el);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function getAccessibleName(el) {
|
||||
// aria-label takes precedence
|
||||
const ariaLabel = el.getAttribute('aria-label');
|
||||
if (ariaLabel) return ariaLabel.trim();
|
||||
|
||||
// aria-labelledby
|
||||
const labelledBy = el.getAttribute('aria-labelledby');
|
||||
if (labelledBy) {
|
||||
const labels = labelledBy.split(' ')
|
||||
.map(id => document.getElementById(id)?.textContent?.trim())
|
||||
.filter(Boolean)
|
||||
.join(' ');
|
||||
if (labels) return labels;
|
||||
}
|
||||
|
||||
// Input associated label
|
||||
if (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA' || el.tagName === 'SELECT') {
|
||||
if (el.id) {
|
||||
const label = document.querySelector('label[for="' + el.id + '"]');
|
||||
if (label) return label.textContent?.trim();
|
||||
}
|
||||
// Check parent label
|
||||
const parentLabel = el.closest('label');
|
||||
if (parentLabel) {
|
||||
const labelText = parentLabel.textContent?.replace(el.value || '', '')?.trim();
|
||||
if (labelText) return labelText;
|
||||
}
|
||||
}
|
||||
|
||||
// Button/link content
|
||||
if (el.tagName === 'BUTTON' || el.tagName === 'A') {
|
||||
const text = el.textContent?.trim();
|
||||
if (text) return text.substring(0, 100);
|
||||
}
|
||||
|
||||
// Alt text for images
|
||||
if (el.tagName === 'IMG') {
|
||||
return el.alt || null;
|
||||
}
|
||||
|
||||
// Title attribute fallback
|
||||
if (el.title) return el.title.trim();
|
||||
|
||||
// Placeholder for inputs
|
||||
if (el.placeholder) return null; // Return null, will add as /placeholder
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function getStateFlags(el) {
|
||||
const flags = [];
|
||||
|
||||
// Checked state
|
||||
if (el.checked || el.getAttribute('aria-checked') === 'true') {
|
||||
flags.push('checked');
|
||||
}
|
||||
|
||||
// Disabled state
|
||||
if (el.disabled || el.getAttribute('aria-disabled') === 'true') {
|
||||
flags.push('disabled');
|
||||
}
|
||||
|
||||
// Expanded state
|
||||
if (el.getAttribute('aria-expanded') === 'true') {
|
||||
flags.push('expanded');
|
||||
}
|
||||
|
||||
// Selected state
|
||||
if (el.selected || el.getAttribute('aria-selected') === 'true') {
|
||||
flags.push('selected');
|
||||
}
|
||||
|
||||
// Pressed state
|
||||
if (el.getAttribute('aria-pressed') === 'true') {
|
||||
flags.push('pressed');
|
||||
}
|
||||
|
||||
// Required state
|
||||
if (el.required || el.getAttribute('aria-required') === 'true') {
|
||||
flags.push('required');
|
||||
}
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
function isVisible(el) {
|
||||
const style = window.getComputedStyle(el);
|
||||
if (style.display === 'none' || style.visibility === 'hidden') return false;
|
||||
const rect = el.getBoundingClientRect();
|
||||
return rect.width > 0 && rect.height > 0;
|
||||
}
|
||||
|
||||
function isInteractiveOrLandmark(role) {
|
||||
return INTERACTIVE_ROLES.has(role) || LANDMARK_ROLES.has(role);
|
||||
}
|
||||
|
||||
function shouldInclude(el) {
|
||||
if (!isVisible(el)) return false;
|
||||
const role = getRole(el);
|
||||
if (!role) return false;
|
||||
// Include interactive, landmarks, and structural elements
|
||||
return isInteractiveOrLandmark(role) ||
|
||||
role === 'heading' ||
|
||||
role === 'img' ||
|
||||
role === 'list' ||
|
||||
role === 'listitem' ||
|
||||
role === 'table' ||
|
||||
role === 'row' ||
|
||||
role === 'cell' ||
|
||||
role === 'columnheader';
|
||||
}
|
||||
|
||||
function assignRef(el, role) {
|
||||
// Only assign refs to interactive elements
|
||||
if (!INTERACTIVE_ROLES.has(role)) return null;
|
||||
|
||||
const ref = 'e' + refCounter++;
|
||||
window.__chromeDevToolsRefs.set(ref, el);
|
||||
return ref;
|
||||
}
|
||||
|
||||
function buildYaml(el, indent = 0) {
|
||||
const role = getRole(el);
|
||||
if (!role) return '';
|
||||
|
||||
const prefix = ' '.repeat(indent) + '- ';
|
||||
const lines = [];
|
||||
|
||||
// Build the line: role "name" [flags] [ref=eN]
|
||||
let line = prefix + role;
|
||||
|
||||
const name = getAccessibleName(el);
|
||||
if (name) {
|
||||
line += ' "' + name.replace(/"/g, '\\\\"') + '"';
|
||||
}
|
||||
|
||||
// Add heading level
|
||||
if (role === 'heading') {
|
||||
const level = el.tagName.match(/H(\\d)/)?.[1] || el.getAttribute('aria-level');
|
||||
if (level) line += ' [level=' + level + ']';
|
||||
}
|
||||
|
||||
// Add state flags
|
||||
const flags = getStateFlags(el);
|
||||
flags.forEach(flag => {
|
||||
line += ' [' + flag + ']';
|
||||
});
|
||||
|
||||
// Add ref for interactive elements
|
||||
const ref = assignRef(el, role);
|
||||
if (ref) {
|
||||
line += ' [ref=' + ref + ']';
|
||||
}
|
||||
|
||||
lines.push(line);
|
||||
|
||||
// Add metadata on subsequent lines
|
||||
if (el.tagName === 'A' && el.href) {
|
||||
lines.push(' '.repeat(indent + 1) + '/url: ' + el.href);
|
||||
}
|
||||
if (el.placeholder) {
|
||||
lines.push(' '.repeat(indent + 1) + '/placeholder: "' + el.placeholder + '"');
|
||||
}
|
||||
if (el.tagName === 'INPUT' && el.value && el.type !== 'password') {
|
||||
lines.push(' '.repeat(indent + 1) + '/value: "' + el.value.substring(0, 50) + '"');
|
||||
}
|
||||
|
||||
// Process children
|
||||
const children = Array.from(el.children);
|
||||
children.forEach(child => {
|
||||
const childYaml = buildYaml(child, indent + 1);
|
||||
if (childYaml) lines.push(childYaml);
|
||||
});
|
||||
|
||||
return lines.join('\\n');
|
||||
}
|
||||
|
||||
function getSnapshot() {
|
||||
const lines = [];
|
||||
|
||||
// Start from body
|
||||
const children = Array.from(document.body.children);
|
||||
children.forEach(child => {
|
||||
const yaml = buildYaml(child, 0);
|
||||
if (yaml) lines.push(yaml);
|
||||
});
|
||||
|
||||
// Save ref counter for next snapshot
|
||||
window.__chromeDevToolsRefCounter = refCounter;
|
||||
|
||||
return lines.join('\\n');
|
||||
}
|
||||
|
||||
return getSnapshot();
|
||||
})();
|
||||
`;
|
||||
}
|
||||
|
||||
async function ariaSnapshot() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
|
||||
try {
|
||||
const browser = await getBrowser({
|
||||
headless: args.headless
|
||||
});
|
||||
|
||||
const page = await getPage(browser);
|
||||
|
||||
// Navigate if URL provided
|
||||
if (args.url) {
|
||||
await page.goto(args.url, {
|
||||
waitUntil: args['wait-until'] || 'networkidle2'
|
||||
});
|
||||
}
|
||||
|
||||
// Get ARIA snapshot
|
||||
const snapshot = await page.evaluate(getAriaSnapshotScript());
|
||||
|
||||
// Build result
|
||||
const result = {
|
||||
success: true,
|
||||
url: page.url(),
|
||||
title: await page.title(),
|
||||
format: 'yaml',
|
||||
snapshot: snapshot
|
||||
};
|
||||
|
||||
// Output to file or stdout
|
||||
if (args.output) {
|
||||
const outputPath = args.output;
|
||||
|
||||
// Ensure snapshots directory exists
|
||||
const outputDir = path.dirname(outputPath);
|
||||
await fs.mkdir(outputDir, { recursive: true });
|
||||
|
||||
// Write YAML snapshot
|
||||
await fs.writeFile(outputPath, snapshot, 'utf8');
|
||||
|
||||
outputJSON({
|
||||
success: true,
|
||||
output: path.resolve(outputPath),
|
||||
url: page.url()
|
||||
});
|
||||
} else {
|
||||
// Output to stdout
|
||||
outputJSON(result);
|
||||
}
|
||||
|
||||
// Default: disconnect to keep browser running for session persistence
|
||||
// Use --close true to fully close browser
|
||||
if (args.close === 'true') {
|
||||
await closeBrowser();
|
||||
} else {
|
||||
await disconnectBrowser();
|
||||
}
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
outputError(error);
|
||||
}
|
||||
}
|
||||
|
||||
ariaSnapshot();
|
||||
84
.opencode/skills/chrome-devtools/scripts/click.js
Executable file
84
.opencode/skills/chrome-devtools/scripts/click.js
Executable file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Click an element
|
||||
* Usage: node click.js --selector ".button" [--url https://example.com] [--wait-for ".result"]
|
||||
* Supports both CSS and XPath selectors:
|
||||
* - CSS: node click.js --selector "button.submit"
|
||||
* - XPath: node click.js --selector "//button[contains(text(),'Submit')]"
|
||||
*/
|
||||
import { getBrowser, getPage, closeBrowser, disconnectBrowser, parseArgs, outputJSON, outputError } from './lib/browser.js';
|
||||
import { parseSelector, waitForElement, clickElement, enhanceError } from './lib/selector.js';
|
||||
|
||||
async function click() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
|
||||
if (!args.selector) {
|
||||
outputError(new Error('--selector is required'));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const browser = await getBrowser({
|
||||
headless: args.headless
|
||||
});
|
||||
|
||||
const page = await getPage(browser);
|
||||
|
||||
// Navigate if URL provided
|
||||
if (args.url) {
|
||||
await page.goto(args.url, {
|
||||
waitUntil: args['wait-until'] || 'networkidle2'
|
||||
});
|
||||
}
|
||||
|
||||
// Parse and validate selector
|
||||
const parsed = parseSelector(args.selector);
|
||||
|
||||
// Wait for element based on selector type
|
||||
await waitForElement(page, parsed, {
|
||||
visible: true,
|
||||
timeout: parseInt(args.timeout || '5000')
|
||||
});
|
||||
|
||||
// Set up navigation promise BEFORE clicking (in case click triggers immediate navigation)
|
||||
const navigationPromise = page.waitForNavigation({
|
||||
waitUntil: 'load',
|
||||
timeout: 5000
|
||||
}).catch(() => null); // Catch timeout - navigation may not occur
|
||||
|
||||
// Click element
|
||||
await clickElement(page, parsed);
|
||||
|
||||
// Wait for optional selector after click
|
||||
if (args['wait-for']) {
|
||||
await page.waitForSelector(args['wait-for'], {
|
||||
timeout: parseInt(args.timeout || '5000')
|
||||
});
|
||||
} else {
|
||||
// Wait for navigation to complete (or timeout if no navigation)
|
||||
await navigationPromise;
|
||||
}
|
||||
|
||||
outputJSON({
|
||||
success: true,
|
||||
url: page.url(),
|
||||
title: await page.title()
|
||||
});
|
||||
|
||||
// Default: disconnect to keep browser running for session persistence
|
||||
// Use --close true to fully close browser
|
||||
if (args.close === 'true') {
|
||||
await closeBrowser();
|
||||
} else {
|
||||
await disconnectBrowser();
|
||||
}
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
// Enhance error message with troubleshooting tips
|
||||
const enhanced = enhanceError(error, args.selector);
|
||||
outputError(enhanced);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
click();
|
||||
146
.opencode/skills/chrome-devtools/scripts/connect-chrome.js
Normal file
146
.opencode/skills/chrome-devtools/scripts/connect-chrome.js
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Connect to an existing Chrome browser launched with remote debugging
|
||||
*
|
||||
* Two-step workflow:
|
||||
* 1. User launches Chrome with: chrome --remote-debugging-port=9222
|
||||
* 2. Connect with this script: node connect-chrome.js --browser-url http://localhost:9222
|
||||
*
|
||||
* Or launch Chrome automatically:
|
||||
* node connect-chrome.js --launch --port 9222
|
||||
*
|
||||
* This is useful for:
|
||||
* - Debugging (can see browser window while scripts run)
|
||||
* - Using existing Chrome session with all logged-in accounts
|
||||
* - Avoiding Puppeteer's bundled Chromium
|
||||
*/
|
||||
import { spawn } from 'child_process';
|
||||
import { getBrowser, getPage, disconnectBrowser, parseArgs, outputJSON, outputError } from './lib/browser.js';
|
||||
|
||||
/**
|
||||
* Get Chrome executable path based on OS
|
||||
* @returns {string} - Path to Chrome executable
|
||||
*/
|
||||
function getChromeExecutablePath() {
|
||||
switch (process.platform) {
|
||||
case 'darwin':
|
||||
return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome';
|
||||
case 'win32':
|
||||
// Try common installation paths
|
||||
const paths = [
|
||||
`${process.env['PROGRAMFILES']}/Google/Chrome/Application/chrome.exe`,
|
||||
`${process.env['PROGRAMFILES(X86)']}/Google/Chrome/Application/chrome.exe`,
|
||||
`${process.env.LOCALAPPDATA}/Google/Chrome/Application/chrome.exe`
|
||||
];
|
||||
// Return first path (user should have Chrome installed in standard location)
|
||||
return paths[0];
|
||||
default: // Linux
|
||||
return 'google-chrome';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Launch Chrome with remote debugging enabled
|
||||
* @param {number} port - Debug port (default 9222)
|
||||
* @returns {Promise<ChildProcess>}
|
||||
*/
|
||||
function launchChromeWithDebugging(port = 9222) {
|
||||
const chromePath = getChromeExecutablePath();
|
||||
const args = [
|
||||
`--remote-debugging-port=${port}`,
|
||||
'--no-first-run',
|
||||
'--no-default-browser-check'
|
||||
];
|
||||
|
||||
const chrome = spawn(chromePath, args, {
|
||||
detached: true,
|
||||
stdio: 'ignore'
|
||||
});
|
||||
|
||||
chrome.unref();
|
||||
return chrome;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for Chrome debug endpoint to be ready
|
||||
* @param {string} browserUrl - Browser debug URL
|
||||
* @param {number} timeout - Max wait time in ms
|
||||
* @returns {Promise<boolean>}
|
||||
*/
|
||||
async function waitForDebugEndpoint(browserUrl, timeout = 10000) {
|
||||
const start = Date.now();
|
||||
const checkUrl = `${browserUrl}/json/version`;
|
||||
|
||||
while (Date.now() - start < timeout) {
|
||||
try {
|
||||
const response = await fetch(checkUrl);
|
||||
if (response.ok) return true;
|
||||
} catch {
|
||||
// Not ready yet
|
||||
}
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
async function connectChrome() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
const port = parseInt(args.port || '9222');
|
||||
const browserUrl = args['browser-url'] || `http://localhost:${port}`;
|
||||
|
||||
try {
|
||||
// Launch Chrome if requested
|
||||
if (args.launch) {
|
||||
launchChromeWithDebugging(port);
|
||||
|
||||
// Wait for debug endpoint
|
||||
const ready = await waitForDebugEndpoint(browserUrl);
|
||||
if (!ready) {
|
||||
outputError(new Error(`Chrome did not start within timeout. Check if port ${port} is available.`));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Connect to Chrome via browserUrl
|
||||
const browser = await getBrowser({ browserUrl });
|
||||
const page = await getPage(browser);
|
||||
|
||||
// Navigate if URL provided
|
||||
if (args.url) {
|
||||
await page.goto(args.url, {
|
||||
waitUntil: args['wait-until'] || 'networkidle2',
|
||||
timeout: parseInt(args.timeout || '30000')
|
||||
});
|
||||
}
|
||||
|
||||
const result = {
|
||||
success: true,
|
||||
browserUrl,
|
||||
connected: true,
|
||||
url: page.url(),
|
||||
title: await page.title(),
|
||||
hint: args.launch
|
||||
? 'Chrome launched with debugging. Browser window is visible.'
|
||||
: 'Connected to existing Chrome instance.'
|
||||
};
|
||||
|
||||
outputJSON(result);
|
||||
|
||||
// Default: disconnect to keep browser running
|
||||
await disconnectBrowser();
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
// Provide helpful error message
|
||||
if (error.message.includes('ECONNREFUSED')) {
|
||||
outputError(new Error(
|
||||
`Could not connect to Chrome at ${browserUrl}. ` +
|
||||
`Make sure Chrome is running with: ` +
|
||||
`chrome --remote-debugging-port=${port}`
|
||||
));
|
||||
} else {
|
||||
outputError(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
connectChrome();
|
||||
81
.opencode/skills/chrome-devtools/scripts/console.js
Executable file
81
.opencode/skills/chrome-devtools/scripts/console.js
Executable file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Monitor console messages
|
||||
* Usage: node console.js --url https://example.com [--types error,warn] [--duration 5000]
|
||||
*/
|
||||
import { getBrowser, getPage, closeBrowser, disconnectBrowser, parseArgs, outputJSON, outputError } from './lib/browser.js';
|
||||
|
||||
async function monitorConsole() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
|
||||
if (!args.url) {
|
||||
outputError(new Error('--url is required'));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const browser = await getBrowser({
|
||||
headless: args.headless
|
||||
});
|
||||
|
||||
const page = await getPage(browser);
|
||||
|
||||
const messages = [];
|
||||
const filterTypes = args.types ? args.types.split(',') : null;
|
||||
|
||||
// Listen for console messages
|
||||
page.on('console', (msg) => {
|
||||
const type = msg.type();
|
||||
|
||||
if (!filterTypes || filterTypes.includes(type)) {
|
||||
messages.push({
|
||||
type: type,
|
||||
text: msg.text(),
|
||||
location: msg.location(),
|
||||
timestamp: Date.now()
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Listen for page errors
|
||||
page.on('pageerror', (error) => {
|
||||
messages.push({
|
||||
type: 'pageerror',
|
||||
text: error.message,
|
||||
stack: error.stack,
|
||||
timestamp: Date.now()
|
||||
});
|
||||
});
|
||||
|
||||
// Navigate
|
||||
await page.goto(args.url, {
|
||||
waitUntil: args['wait-until'] || 'networkidle2'
|
||||
});
|
||||
|
||||
// Wait for additional time if specified
|
||||
if (args.duration) {
|
||||
await new Promise(resolve => setTimeout(resolve, parseInt(args.duration)));
|
||||
}
|
||||
|
||||
outputJSON({
|
||||
success: true,
|
||||
url: page.url(),
|
||||
messageCount: messages.length,
|
||||
messages: messages
|
||||
});
|
||||
|
||||
// Default: disconnect to keep browser running for session persistence
|
||||
// Use --close true to fully close browser
|
||||
if (args.close === 'true') {
|
||||
await closeBrowser();
|
||||
} else {
|
||||
await disconnectBrowser();
|
||||
}
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
outputError(error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
monitorConsole();
|
||||
56
.opencode/skills/chrome-devtools/scripts/evaluate.js
Executable file
56
.opencode/skills/chrome-devtools/scripts/evaluate.js
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Execute JavaScript in page context
|
||||
* Usage: node evaluate.js --script "document.title" [--url https://example.com]
|
||||
*/
|
||||
import { getBrowser, getPage, closeBrowser, disconnectBrowser, parseArgs, outputJSON, outputError } from './lib/browser.js';
|
||||
|
||||
async function evaluate() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
|
||||
if (!args.script) {
|
||||
outputError(new Error('--script is required'));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const browser = await getBrowser({
|
||||
headless: args.headless
|
||||
});
|
||||
|
||||
const page = await getPage(browser);
|
||||
|
||||
// Navigate if URL provided
|
||||
if (args.url) {
|
||||
await page.goto(args.url, {
|
||||
waitUntil: args['wait-until'] || 'networkidle2'
|
||||
});
|
||||
}
|
||||
|
||||
const result = await page.evaluate(async (script) => {
|
||||
// Wrap in async IIFE so user scripts can use await
|
||||
// eslint-disable-next-line no-eval
|
||||
return await eval(`(async () => { return ${script}; })()`);
|
||||
}, args.script);
|
||||
|
||||
outputJSON({
|
||||
success: true,
|
||||
result: result,
|
||||
url: page.url()
|
||||
});
|
||||
|
||||
// Default: disconnect to keep browser running for session persistence
|
||||
// Use --close true to fully close browser
|
||||
if (args.close === 'true') {
|
||||
await closeBrowser();
|
||||
} else {
|
||||
await disconnectBrowser();
|
||||
}
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
outputError(error);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
evaluate();
|
||||
77
.opencode/skills/chrome-devtools/scripts/fill.js
Executable file
77
.opencode/skills/chrome-devtools/scripts/fill.js
Executable file
@@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Fill form fields
|
||||
* Usage: node fill.js --selector "#input" --value "text" [--url https://example.com]
|
||||
* Supports both CSS and XPath selectors:
|
||||
* - CSS: node fill.js --selector "#email" --value "user@example.com"
|
||||
* - XPath: node fill.js --selector "//input[@type='email']" --value "user@example.com"
|
||||
*/
|
||||
import { getBrowser, getPage, closeBrowser, disconnectBrowser, parseArgs, outputJSON, outputError } from './lib/browser.js';
|
||||
import { parseSelector, waitForElement, typeIntoElement, enhanceError } from './lib/selector.js';
|
||||
|
||||
async function fill() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
|
||||
if (!args.selector) {
|
||||
outputError(new Error('--selector is required'));
|
||||
return;
|
||||
}
|
||||
|
||||
if (!args.value) {
|
||||
outputError(new Error('--value is required'));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const browser = await getBrowser({
|
||||
headless: args.headless
|
||||
});
|
||||
|
||||
const page = await getPage(browser);
|
||||
|
||||
// Navigate if URL provided
|
||||
if (args.url) {
|
||||
await page.goto(args.url, {
|
||||
waitUntil: args['wait-until'] || 'networkidle2'
|
||||
});
|
||||
}
|
||||
|
||||
// Parse and validate selector
|
||||
const parsed = parseSelector(args.selector);
|
||||
|
||||
// Wait for element based on selector type
|
||||
await waitForElement(page, parsed, {
|
||||
visible: true,
|
||||
timeout: parseInt(args.timeout || '5000')
|
||||
});
|
||||
|
||||
// Type into element
|
||||
await typeIntoElement(page, parsed, args.value, {
|
||||
clear: args.clear === 'true',
|
||||
delay: parseInt(args.delay || '0')
|
||||
});
|
||||
|
||||
outputJSON({
|
||||
success: true,
|
||||
selector: args.selector,
|
||||
value: args.value,
|
||||
url: page.url()
|
||||
});
|
||||
|
||||
// Default: disconnect to keep browser running for session persistence
|
||||
// Use --close true to fully close browser
|
||||
if (args.close === 'true') {
|
||||
await closeBrowser();
|
||||
} else {
|
||||
await disconnectBrowser();
|
||||
}
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
// Enhance error message with troubleshooting tips
|
||||
const enhanced = enhanceError(error, args.selector);
|
||||
outputError(enhanced);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fill();
|
||||
205
.opencode/skills/chrome-devtools/scripts/import-cookies.js
Normal file
205
.opencode/skills/chrome-devtools/scripts/import-cookies.js
Normal file
@@ -0,0 +1,205 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Import cookies from JSON file exported by browser extensions
|
||||
* Supports: EditThisCookie, Cookie-Editor, Netscape (txt) formats
|
||||
*
|
||||
* Usage:
|
||||
* node import-cookies.js --file ./cookies.json --url https://example.com
|
||||
* node import-cookies.js --file ./cookies.txt --format netscape --url https://example.com
|
||||
*
|
||||
* Workflow:
|
||||
* 1. Install "Cookie-Editor" or "EditThisCookie" Chrome extension
|
||||
* 2. Navigate to target site and log in manually
|
||||
* 3. Export cookies as JSON via extension
|
||||
* 4. Run this script to import into puppeteer session
|
||||
* 5. Use other scripts (screenshot, navigate) with authenticated session
|
||||
*/
|
||||
import fs from 'fs';
|
||||
import { getBrowser, getPage, closeBrowser, disconnectBrowser, parseArgs, outputJSON, outputError, saveAuthSession } from './lib/browser.js';
|
||||
|
||||
/**
|
||||
* Parse cookies from EditThisCookie/Cookie-Editor JSON format
|
||||
* @param {Array} cookies - Array of cookie objects
|
||||
* @returns {Array} - Normalized cookie array for Puppeteer
|
||||
*/
|
||||
function parseJsonCookies(cookies) {
|
||||
return cookies.map(cookie => {
|
||||
// Handle different property names from various extensions
|
||||
const normalized = {
|
||||
name: cookie.name,
|
||||
value: cookie.value,
|
||||
domain: cookie.domain,
|
||||
path: cookie.path || '/',
|
||||
httpOnly: cookie.httpOnly ?? false,
|
||||
secure: cookie.secure ?? false,
|
||||
sameSite: cookie.sameSite || 'Lax'
|
||||
};
|
||||
|
||||
// Handle expiration (different extensions use different names)
|
||||
if (cookie.expirationDate) {
|
||||
normalized.expires = cookie.expirationDate;
|
||||
} else if (cookie.expires) {
|
||||
normalized.expires = typeof cookie.expires === 'number'
|
||||
? cookie.expires
|
||||
: new Date(cookie.expires).getTime() / 1000;
|
||||
}
|
||||
|
||||
return normalized;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Netscape cookie file format (used by curl, wget, etc.)
|
||||
* Format: domain\tflags\tpath\tsecure\texpiration\tname\tvalue
|
||||
* @param {string} content - Netscape format cookie file content
|
||||
* @returns {Array} - Normalized cookie array for Puppeteer
|
||||
*/
|
||||
function parseNetscapeCookies(content) {
|
||||
const cookies = [];
|
||||
const lines = content.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
// Skip comments and empty lines
|
||||
if (line.startsWith('#') || line.trim() === '') continue;
|
||||
|
||||
const parts = line.split('\t');
|
||||
if (parts.length < 7) continue;
|
||||
|
||||
const [domain, , path, secure, expires, name, value] = parts;
|
||||
|
||||
cookies.push({
|
||||
name: name.trim(),
|
||||
value: value.trim(),
|
||||
domain: domain.trim(),
|
||||
path: path.trim() || '/',
|
||||
secure: secure.toUpperCase() === 'TRUE',
|
||||
httpOnly: false, // Netscape format doesn't include httpOnly
|
||||
expires: parseInt(expires, 10) || undefined,
|
||||
sameSite: 'Lax'
|
||||
});
|
||||
}
|
||||
|
||||
return cookies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect cookie file format from content
|
||||
* @param {string} content - File content
|
||||
* @returns {string} - 'json' or 'netscape'
|
||||
*/
|
||||
function detectFormat(content) {
|
||||
const trimmed = content.trim();
|
||||
if (trimmed.startsWith('[') || trimmed.startsWith('{')) {
|
||||
return 'json';
|
||||
}
|
||||
return 'netscape';
|
||||
}
|
||||
|
||||
async function importCookies() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
|
||||
if (!args.file) {
|
||||
outputError(new Error('--file is required (path to cookies file)'));
|
||||
return;
|
||||
}
|
||||
|
||||
if (!args.url) {
|
||||
outputError(new Error('--url is required (target URL to apply cookies)'));
|
||||
return;
|
||||
}
|
||||
|
||||
// Read cookie file
|
||||
let fileContent;
|
||||
try {
|
||||
fileContent = fs.readFileSync(args.file, 'utf8');
|
||||
} catch (e) {
|
||||
outputError(new Error(`Failed to read cookie file: ${e.message}`));
|
||||
return;
|
||||
}
|
||||
|
||||
// Parse cookies based on format
|
||||
const format = args.format || detectFormat(fileContent);
|
||||
let cookies;
|
||||
|
||||
try {
|
||||
if (format === 'json') {
|
||||
const parsed = JSON.parse(fileContent);
|
||||
// Handle both array and object with cookies property
|
||||
const cookieArray = Array.isArray(parsed) ? parsed : (parsed.cookies || []);
|
||||
cookies = parseJsonCookies(cookieArray);
|
||||
} else {
|
||||
cookies = parseNetscapeCookies(fileContent);
|
||||
}
|
||||
} catch (e) {
|
||||
outputError(new Error(`Failed to parse cookies (${format}): ${e.message}`));
|
||||
return;
|
||||
}
|
||||
|
||||
if (cookies.length === 0) {
|
||||
outputError(new Error('No valid cookies found in file'));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const browser = await getBrowser({
|
||||
headless: args.headless
|
||||
});
|
||||
|
||||
const page = await getPage(browser);
|
||||
|
||||
// Navigate to URL first to establish domain context
|
||||
await page.goto(args.url, {
|
||||
waitUntil: args['wait-until'] || 'networkidle2',
|
||||
timeout: parseInt(args.timeout || '30000')
|
||||
});
|
||||
|
||||
// Filter cookies by domain if --strict-domain is set
|
||||
let cookiesToApply = cookies;
|
||||
if (args['strict-domain']) {
|
||||
const urlDomain = new URL(args.url).hostname;
|
||||
cookiesToApply = cookies.filter(c => {
|
||||
const cookieDomain = c.domain.startsWith('.') ? c.domain.slice(1) : c.domain;
|
||||
return urlDomain.endsWith(cookieDomain);
|
||||
});
|
||||
}
|
||||
|
||||
// Apply cookies
|
||||
await page.setCookie(...cookiesToApply);
|
||||
|
||||
// Save to auth session for persistence
|
||||
saveAuthSession({ cookies: cookiesToApply });
|
||||
|
||||
// Reload to apply cookies if --reload is set
|
||||
if (args.reload === 'true') {
|
||||
await page.reload({ waitUntil: 'networkidle2' });
|
||||
}
|
||||
|
||||
const result = {
|
||||
success: true,
|
||||
file: args.file,
|
||||
format,
|
||||
url: args.url,
|
||||
imported: {
|
||||
total: cookiesToApply.length,
|
||||
names: cookiesToApply.map(c => c.name)
|
||||
},
|
||||
persisted: true,
|
||||
finalUrl: page.url(),
|
||||
title: await page.title()
|
||||
};
|
||||
|
||||
outputJSON(result);
|
||||
|
||||
// Default: disconnect to keep browser running
|
||||
if (args.close === 'true') {
|
||||
await closeBrowser();
|
||||
} else {
|
||||
await disconnectBrowser();
|
||||
}
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
outputError(error);
|
||||
}
|
||||
}
|
||||
|
||||
importCookies();
|
||||
230
.opencode/skills/chrome-devtools/scripts/inject-auth.js
Executable file
230
.opencode/skills/chrome-devtools/scripts/inject-auth.js
Executable file
@@ -0,0 +1,230 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Inject authentication cookies/tokens into browser session
|
||||
* Usage: node inject-auth.js --url https://example.com --cookies '[{"name":"token","value":"xxx","domain":".example.com"}]'
|
||||
* node inject-auth.js --url https://example.com --token "Bearer xxx" [--header Authorization]
|
||||
* node inject-auth.js --url https://example.com --local-storage '{"key":"value"}'
|
||||
* node inject-auth.js --url https://example.com --session-storage '{"key":"value"}'
|
||||
*
|
||||
* This script injects authentication data into browser session for testing protected routes.
|
||||
* The session persists across script executions until --close true is used.
|
||||
*
|
||||
* Workflow for testing protected routes:
|
||||
* 1. User manually logs into the site in their browser
|
||||
* 2. User extracts cookies/tokens from browser DevTools
|
||||
* 3. Run this script to inject auth into puppeteer session
|
||||
* 4. Run other scripts (screenshot, navigate, etc.) which will use authenticated session
|
||||
*
|
||||
* Session behavior:
|
||||
* --close false : Keep browser running (default for chaining)
|
||||
* --close true : Close browser completely and clear session
|
||||
*/
|
||||
import { getBrowser, getPage, closeBrowser, disconnectBrowser, parseArgs, outputJSON, outputError, saveAuthSession, clearAuthSession } from './lib/browser.js';
|
||||
|
||||
/**
|
||||
* Parse cookies from JSON string or file
|
||||
* @param {string} cookiesInput - JSON string or file path
|
||||
* @returns {Array} - Array of cookie objects
|
||||
*/
|
||||
function parseCookies(cookiesInput) {
|
||||
try {
|
||||
// Try parsing as JSON string
|
||||
return JSON.parse(cookiesInput);
|
||||
} catch {
|
||||
throw new Error(`Invalid cookies format. Expected JSON array: [{"name":"cookie_name","value":"cookie_value","domain":".example.com"}]`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse storage data from JSON string
|
||||
* @param {string} storageInput - JSON string
|
||||
* @returns {Object} - Storage key-value pairs
|
||||
*/
|
||||
function parseStorage(storageInput) {
|
||||
try {
|
||||
return JSON.parse(storageInput);
|
||||
} catch {
|
||||
throw new Error(`Invalid storage format. Expected JSON object: {"key":"value"}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function injectAuth() {
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
|
||||
if (!args.url) {
|
||||
outputError(new Error('--url is required (base URL for the protected site)'));
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate at least one auth method provided
|
||||
if (!args.cookies && !args.token && !args['local-storage'] && !args['session-storage']) {
|
||||
outputError(new Error('At least one auth method required: --cookies, --token, --local-storage, or --session-storage'));
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const browser = await getBrowser({
|
||||
headless: args.headless
|
||||
});
|
||||
|
||||
const page = await getPage(browser);
|
||||
|
||||
// Navigate to the URL first to set the domain context
|
||||
await page.goto(args.url, {
|
||||
waitUntil: args['wait-until'] || 'networkidle2',
|
||||
timeout: parseInt(args.timeout || '30000')
|
||||
});
|
||||
|
||||
const result = {
|
||||
success: true,
|
||||
url: args.url,
|
||||
injected: []
|
||||
};
|
||||
|
||||
// Inject cookies
|
||||
if (args.cookies) {
|
||||
const cookies = parseCookies(args.cookies);
|
||||
|
||||
// Validate and normalize cookies
|
||||
const normalizedCookies = cookies.map(cookie => {
|
||||
if (!cookie.name || !cookie.value) {
|
||||
throw new Error(`Cookie must have 'name' and 'value' properties`);
|
||||
}
|
||||
|
||||
// Extract domain from URL if not provided
|
||||
if (!cookie.domain) {
|
||||
const urlObj = new URL(args.url);
|
||||
cookie.domain = urlObj.hostname;
|
||||
}
|
||||
|
||||
return {
|
||||
name: cookie.name,
|
||||
value: cookie.value,
|
||||
domain: cookie.domain,
|
||||
path: cookie.path || '/',
|
||||
httpOnly: cookie.httpOnly !== undefined ? cookie.httpOnly : false,
|
||||
secure: cookie.secure !== undefined ? cookie.secure : args.url.startsWith('https'),
|
||||
sameSite: cookie.sameSite || 'Lax',
|
||||
...(cookie.expires && { expires: cookie.expires })
|
||||
};
|
||||
});
|
||||
|
||||
await page.setCookie(...normalizedCookies);
|
||||
result.injected.push({
|
||||
type: 'cookies',
|
||||
count: normalizedCookies.length,
|
||||
names: normalizedCookies.map(c => c.name)
|
||||
});
|
||||
}
|
||||
|
||||
// Inject Bearer token via localStorage (common pattern)
|
||||
if (args.token) {
|
||||
const tokenKey = args['token-key'] || 'access_token';
|
||||
const token = args.token.startsWith('Bearer ') ? args.token.slice(7) : args.token;
|
||||
|
||||
await page.evaluate((key, value) => {
|
||||
localStorage.setItem(key, value);
|
||||
}, tokenKey, token);
|
||||
|
||||
result.injected.push({
|
||||
type: 'token',
|
||||
key: tokenKey,
|
||||
storage: 'localStorage'
|
||||
});
|
||||
|
||||
// Also set Authorization header for future requests if header option provided
|
||||
if (args.header) {
|
||||
await page.setExtraHTTPHeaders({
|
||||
[args.header]: args.token.startsWith('Bearer ') ? args.token : `Bearer ${args.token}`
|
||||
});
|
||||
result.injected.push({
|
||||
type: 'header',
|
||||
name: args.header
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Inject localStorage items
|
||||
if (args['local-storage']) {
|
||||
const storageData = parseStorage(args['local-storage']);
|
||||
|
||||
await page.evaluate((data) => {
|
||||
Object.entries(data).forEach(([key, value]) => {
|
||||
localStorage.setItem(key, typeof value === 'string' ? value : JSON.stringify(value));
|
||||
});
|
||||
}, storageData);
|
||||
|
||||
result.injected.push({
|
||||
type: 'localStorage',
|
||||
keys: Object.keys(storageData)
|
||||
});
|
||||
}
|
||||
|
||||
// Inject sessionStorage items
|
||||
if (args['session-storage']) {
|
||||
const storageData = parseStorage(args['session-storage']);
|
||||
|
||||
await page.evaluate((data) => {
|
||||
Object.entries(data).forEach(([key, value]) => {
|
||||
sessionStorage.setItem(key, typeof value === 'string' ? value : JSON.stringify(value));
|
||||
});
|
||||
}, storageData);
|
||||
|
||||
result.injected.push({
|
||||
type: 'sessionStorage',
|
||||
keys: Object.keys(storageData)
|
||||
});
|
||||
}
|
||||
|
||||
// Reload page to apply auth (optional, use --reload true)
|
||||
if (args.reload === 'true') {
|
||||
await page.reload({ waitUntil: 'networkidle2' });
|
||||
result.reloaded = true;
|
||||
}
|
||||
|
||||
// Save auth session to file for persistence across script executions
|
||||
const authSessionData = {};
|
||||
|
||||
if (args.cookies) {
|
||||
authSessionData.cookies = parseCookies(args.cookies);
|
||||
}
|
||||
if (args['local-storage']) {
|
||||
authSessionData.localStorage = parseStorage(args['local-storage']);
|
||||
}
|
||||
if (args['session-storage']) {
|
||||
authSessionData.sessionStorage = parseStorage(args['session-storage']);
|
||||
}
|
||||
if (args.token && args.header) {
|
||||
authSessionData.headers = {
|
||||
[args.header]: args.token.startsWith('Bearer ') ? args.token : `Bearer ${args.token}`
|
||||
};
|
||||
}
|
||||
|
||||
// Clear existing auth if --clear flag used
|
||||
if (args.clear === 'true') {
|
||||
clearAuthSession();
|
||||
result.cleared = true;
|
||||
} else if (Object.keys(authSessionData).length > 0) {
|
||||
saveAuthSession(authSessionData);
|
||||
result.persisted = true;
|
||||
}
|
||||
|
||||
// Verify auth by checking page title and URL after injection
|
||||
result.finalUrl = page.url();
|
||||
result.title = await page.title();
|
||||
|
||||
outputJSON(result);
|
||||
|
||||
// Default: disconnect to keep browser running for session persistence
|
||||
if (args.close === 'true') {
|
||||
await closeBrowser();
|
||||
} else {
|
||||
await disconnectBrowser();
|
||||
}
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
outputError(error);
|
||||
}
|
||||
}
|
||||
|
||||
injectAuth();
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user