english/.opencode/skills/ai-multimodal/scripts/gemini_batch_process.py

#!/usr/bin/env python3
"""
Batch process multiple media files using Gemini API.

Supports all Gemini modalities:
- Audio: Transcription, analysis, summarization
- Image: Captioning, detection, OCR, analysis
- Video: Summarization, Q&A, scene detection
- Document: PDF extraction, structured output
- Generation: Image creation via Imagen 4 or Nano Banana (Gemini native)
  - Nano Banana 2 (gemini-3.1-flash-image-preview): Fastest, 95% Pro quality (default)
  - Nano Banana Pro (gemini-3-pro-image-preview): Quality/4K text/reasoning
  - Imagen 4 (imagen-4.0-*): Production-grade generation
"""

import argparse
import json
import os
import sys
import time
from pathlib import Path
from typing import List, Dict, Any, Optional
import csv
import shutil

# Import centralized environment resolver (works for both local and global installs)
CLAUDE_ROOT = Path(__file__).parent.parent.parent.parent
sys.path.insert(0, str(CLAUDE_ROOT / 'scripts'))
try:
    from resolve_env import resolve_env
    CENTRALIZED_RESOLVER_AVAILABLE = True
except ImportError:
    # Fallback if centralized resolver not available
    CENTRALIZED_RESOLVER_AVAILABLE = False
    try:
        from dotenv import load_dotenv
    except ImportError:
        load_dotenv = None

# Import key rotation support
sys.path.insert(0, str(Path(__file__).parent.parent.parent / 'common'))
try:
    from api_key_rotator import KeyRotator, is_rate_limit_error, is_server_error
    from api_key_helper import find_all_api_keys
    KEY_ROTATION_AVAILABLE = True
except ImportError:
    KEY_ROTATION_AVAILABLE = False
    KeyRotator = None
    is_rate_limit_error = None
    is_server_error = None
    find_all_api_keys = None

try:
    from google import genai
    from google.genai import types
except ImportError:
    print("Error: google-genai package not installed")
    print("Install with: pip install google-genai")
    sys.exit(1)


# Image generation model configuration
# Default: gemini-3.1-flash-image-preview (Nano Banana 2 - 3-5x faster, 95% Pro quality)
# Alternative: imagen-4.0-generate-001 (production quality)
# All image generation requires billing - no completely free option exists
IMAGE_MODEL_DEFAULT = 'gemini-3.1-flash-image-preview'  # Nano Banana 2 (fastest, near-Pro quality)
IMAGE_MODEL_FALLBACK = 'gemini-2.5-flash-image'  # Fallback if Nano Banana 2 fails
IMAGEN_MODELS = {
    'imagen-4.0-generate-001',
    'imagen-4.0-ultra-generate-001',
    'imagen-4.0-fast-generate-001',
}
# Video models have no fallback - Veo always requires billing


def find_api_key() -> Optional[str]:
    """Find Gemini API key using centralized resolver or fallback.

    Uses ~/.opencode/scripts/resolve_env.py for consistent resolution across all skills.
    Falls back to local resolution if centralized resolver not available.

    Priority order (highest to lowest):
    1. process.env (runtime environment variables)
    2. PROJECT/.opencode/skills/ai-multimodal/.env (skill-specific)
    3. PROJECT/.opencode/skills/.env (shared skills)
    4. PROJECT/.opencode/.env (project global)
    5. ~/.opencode/skills/ai-multimodal/.env (user skill-specific)
    6. ~/.opencode/skills/.env (user shared)
    7. ~/.opencode/.env (user global)
    """
    if CENTRALIZED_RESOLVER_AVAILABLE:
        # Use centralized resolver (recommended)
        return resolve_env('GEMINI_API_KEY', skill='ai-multimodal')

    # Fallback: Local resolution (legacy)
    api_key = os.getenv('GEMINI_API_KEY')
    if api_key:
        return api_key

    if load_dotenv:
        script_dir = Path(__file__).parent
        skill_dir = script_dir.parent
        skills_dir = skill_dir.parent
        claude_dir = skills_dir.parent

        env_files = [
            claude_dir / '.env',
            skills_dir / '.env',
            skill_dir / '.env',
        ]

        for env_file in env_files:
            if env_file.exists():
                load_dotenv(env_file, override=True)

        api_key = os.getenv('GEMINI_API_KEY')
        if api_key:
            return api_key

    return None


def get_default_model(task: str) -> str:
    """Get default model for task from environment or fallback.

    Priority:
    1. Environment variable for specific capability
    2. Legacy GEMINI_MODEL variable
    3. Hard-coded defaults
    """
    if task == 'generate':  # Image generation
        model = os.getenv('IMAGE_GEN_MODEL')
        if model:
            return model
        # Fallback to legacy
        model = os.getenv('GEMINI_IMAGE_GEN_MODEL')
        if model:
            return model
        # Default to Nano Banana 2 (fastest, near-Pro quality)
        # Alternative: imagen-4.0-generate-001 for production quality
        return 'gemini-3.1-flash-image-preview'

    elif task == 'generate-video':
        model = os.getenv('VIDEO_GEN_MODEL')
        if model:
            return model
        return 'veo-3.1-generate-preview'  # New default

    elif task in ['analyze', 'transcribe', 'extract']:
        model = os.getenv('MULTIMODAL_MODEL')
        if model:
            return model
        # Fallback to legacy
        model = os.getenv('GEMINI_MODEL')
        if model:
            return model
        return 'gemini-2.5-flash'  # Existing default

    return 'gemini-2.5-flash'


def validate_model_task_combination(model: str, task: str) -> None:
    """Validate model is compatible with task.

    Raises:
        ValueError: If combination is invalid
    """
    # Video generation requires Veo
    if task == 'generate-video':
        if not model.startswith('veo-'):
            raise ValueError(
                f"Video generation requires Veo model, got '{model}'\n"
                f"Valid models: veo-3.1-generate-preview, veo-3.1-fast-generate-preview, "
                f"veo-3.0-generate-001, veo-3.0-fast-generate-001"
            )

    # Image generation models
    if task == 'generate':
        valid_image_models = [
            'imagen-4.0-generate-001',
            'imagen-4.0-ultra-generate-001',
            'imagen-4.0-fast-generate-001',
            'gemini-3.1-flash-image-preview',
            'gemini-3-pro-image-preview',
            'gemini-2.5-flash-image',
            'gemini-2.5-flash-image-preview',
        ]
        if model not in valid_image_models:
            # Allow gemini models for analysis-based generation (backward compat)
            if not model.startswith('gemini-'):
                raise ValueError(
                    f"Image generation requires Imagen/Gemini image model, got '{model}'\n"
                    f"Valid models: {', '.join(valid_image_models)}"
                )


def infer_task_from_file(file_path: str) -> str:
    """Infer task type from file extension.

    Returns:
        'transcribe' for audio files
        'analyze' for image/video/document files
    """
    ext = Path(file_path).suffix.lower()

    audio_extensions = {'.mp3', '.wav', '.aac', '.flac', '.ogg', '.aiff', '.m4a'}
    image_extensions = {'.jpg', '.jpeg', '.png', '.webp', '.heic', '.heif', '.gif', '.bmp'}
    video_extensions = {'.mp4', '.mpeg', '.mov', '.avi', '.flv', '.mpg', '.webm', '.wmv', '.3gpp', '.mkv'}
    document_extensions = {'.pdf', '.txt', '.html', '.md', '.doc', '.docx'}

    if ext in audio_extensions:
        return 'transcribe'
    elif ext in image_extensions:
        return 'analyze'
    elif ext in video_extensions:
        return 'analyze'
    elif ext in document_extensions:
        return 'extract'

    # Default to analyze for unknown types
    return 'analyze'


def get_mime_type(file_path: str) -> str:
    """Determine MIME type from file extension."""
    ext = Path(file_path).suffix.lower()

    mime_types = {
        # Audio
        '.mp3': 'audio/mp3',
        '.wav': 'audio/wav',
        '.aac': 'audio/aac',
        '.flac': 'audio/flac',
        '.ogg': 'audio/ogg',
        '.aiff': 'audio/aiff',
        # Image
        '.jpg': 'image/jpeg',
        '.jpeg': 'image/jpeg',
        '.png': 'image/png',
        '.webp': 'image/webp',
        '.heic': 'image/heic',
        '.heif': 'image/heif',
        # Video
        '.mp4': 'video/mp4',
        '.mpeg': 'video/mpeg',
        '.mov': 'video/quicktime',
        '.avi': 'video/x-msvideo',
        '.flv': 'video/x-flv',
        '.mpg': 'video/mpeg',
        '.webm': 'video/webm',
        '.wmv': 'video/x-ms-wmv',
        '.3gpp': 'video/3gpp',
        # Document
        '.pdf': 'application/pdf',
        '.txt': 'text/plain',
        '.html': 'text/html',
        '.md': 'text/markdown',
    }

    return mime_types.get(ext, 'application/octet-stream')


def upload_file(client: genai.Client, file_path: str, verbose: bool = False) -> Any:
    """Upload file to Gemini File API."""
    if verbose:
        print(f"Uploading {file_path}...")

    myfile = client.files.upload(file=file_path)

    # Wait for processing (video/audio files need processing)
    mime_type = get_mime_type(file_path)
    if mime_type.startswith('video/') or mime_type.startswith('audio/'):
        max_wait = 300  # 5 minutes
        elapsed = 0
        while myfile.state.name == 'PROCESSING' and elapsed < max_wait:
            time.sleep(2)
            myfile = client.files.get(name=myfile.name)
            elapsed += 2
            if verbose and elapsed % 10 == 0:
                print(f"  Processing... {elapsed}s")

        if myfile.state.name == 'FAILED':
            raise ValueError(f"File processing failed: {file_path}")

        if myfile.state.name == 'PROCESSING':
            raise TimeoutError(f"Processing timeout after {max_wait}s: {file_path}")

    if verbose:
        print(f"  Uploaded: {myfile.name}")

    return myfile


def _is_billing_error(error: Exception) -> bool:
    """Check if error is due to billing/access restrictions."""
    error_str = str(error).lower()
    billing_indicators = [
        'billing',
        'billed users',
        'payment',
        'access denied',
        'not authorized',
        'permission denied',
    ]
    return any(indicator in error_str for indicator in billing_indicators)


def _is_free_tier_quota_error(error: Exception) -> bool:
    """Check if error indicates free tier has zero quota for this model.

    Free tier users have NO access to image/video generation models.
    The API returns 'limit: 0' or 'RESOURCE_EXHAUSTED' with quota details.
    """
    error_str = str(error)
    # Check for zero quota indicators
    return (
        'RESOURCE_EXHAUSTED' in error_str and
        ('limit: 0' in error_str or 'free_tier' in error_str.lower())
    )


FREE_TIER_NO_ACCESS_MSG = """
[FREE TIER LIMITATION] Image/Video generation is NOT available on free tier.

Free tier users have zero quota (limit: 0) for:
- All Imagen models (imagen-4.0-*)
- All Veo models (veo-*)
- Gemini image models (gemini-*-image, gemini-*-image-preview)

To use image/video generation:
1. Enable billing: https://aistudio.google.com/apikey
2. Or use Google Cloud $300 free credits: https://cloud.google.com/free

STOP: Do not retry image/video generation on free tier - it will always fail.
""".strip()


def generate_image_imagen4(
    client,
    prompt: str,
    model: str,
    num_images: int = 1,
    aspect_ratio: str = '1:1',
    size: str = '1K',
    verbose: bool = False
) -> Dict[str, Any]:
    """Generate image using Imagen 4 models.

    Returns special status 'billing_required' if model needs billing,
    allowing caller to fallback to free-tier generate_content API.
    """
    try:
        # Build config based on model (Fast doesn't support imageSize)
        config_params = {
            'numberOfImages': num_images,
            'aspectRatio': aspect_ratio
        }

        # Only Standard and Ultra support imageSize parameter
        if 'fast' not in model.lower() and model.startswith('imagen-'):
            config_params['imageSize'] = size

        gen_config = types.GenerateImagesConfig(**config_params)

        if verbose:
            print(f"  Generating with: {model}")
            print(f"  Config: {num_images} images, {aspect_ratio}", end='')
            if 'fast' not in model.lower() and model.startswith('imagen-'):
                print(f", {size}")
            else:
                print()

        response = client.models.generate_images(
            model=model,
            prompt=prompt,
            config=gen_config
        )

        # Save images
        generated_files = []
        for i, generated_image in enumerate(response.generated_images):
            # Find project root
            script_dir = Path(__file__).parent
            project_root = script_dir
            for parent in [script_dir] + list(script_dir.parents):
                if (parent / '.git').exists() or (parent / '.claude').exists():
                    project_root = parent
                    break

            output_dir = project_root / 'docs' / 'assets'
            output_dir.mkdir(parents=True, exist_ok=True)
            output_file = output_dir / f"imagen4_generated_{int(time.time())}_{i}.png"

            with open(output_file, 'wb') as f:
                f.write(generated_image.image.image_bytes)
            generated_files.append(str(output_file))

            if verbose:
                print(f"  Saved: {output_file}")

        return {
            'status': 'success',
            'generated_images': generated_files,
            'model': model
        }

    except Exception as e:
        # Return special status for billing errors so caller can fallback
        if _is_billing_error(e) and model in IMAGEN_MODELS:
            return {
                'status': 'billing_required',
                'original_model': model,
                'error': str(e)
            }

        if verbose:
            print(f"  Error: {str(e)}")
            import traceback
            traceback.print_exc()
        return {
            'status': 'error',
            'error': str(e)
        }


def generate_video_veo(
    client,
    prompt: str,
    model: str,
    resolution: str = '1080p',
    aspect_ratio: str = '16:9',
    reference_images: Optional[List[str]] = None,
    verbose: bool = False
) -> Dict[str, Any]:
    """Generate video using Veo models.

    For image-to-video with first/last frames (Veo 3.1):
    - First reference image becomes the opening frame (image parameter)
    - Second reference image becomes the closing frame (last_frame config)
    - Model interpolates between them to create smooth video
    """
    try:
        # Build config with snake_case for Python SDK
        config_params = {
            'aspect_ratio': aspect_ratio,
            'resolution': resolution
        }

        # Prepare first frame and last frame images
        first_frame = None
        last_frame = None

        if reference_images:
            import mimetypes

            def load_image(img_path_str: str) -> types.Image:
                """Load image file as types.Image with bytes and mime type."""
                img_path = Path(img_path_str)
                image_bytes = img_path.read_bytes()
                mime_type, _ = mimetypes.guess_type(str(img_path))
                if not mime_type:
                    mime_type = 'image/png'
                return types.Image(
                    image_bytes=image_bytes,
                    mime_type=mime_type
                )

            # First image = opening frame
            if len(reference_images) >= 1:
                first_frame = load_image(reference_images[0])

            # Second image = closing frame (last_frame in config)
            if len(reference_images) >= 2:
                last_frame = load_image(reference_images[1])
                config_params['last_frame'] = last_frame

        gen_config = types.GenerateVideosConfig(**config_params)

        if verbose:
            print(f"  Generating video with Veo: {model}")
            print(f"  Config: {resolution}, {aspect_ratio}")
            if first_frame:
                print(f"  First frame: provided")
            if last_frame:
                print(f"  Last frame: provided (interpolation mode)")

        start = time.time()

        if verbose:
            print(f"  Starting video generation (this may take 11s-6min)...")

        # Call generate_videos with image parameter for first frame
        operation = client.models.generate_videos(
            model=model,
            prompt=prompt,
            image=first_frame,  # First frame as opening image
            config=gen_config
        )

        # Poll operation until complete
        poll_count = 0
        while not operation.done:
            poll_count += 1
            if verbose and poll_count % 3 == 0:  # Update every 30s
                elapsed = time.time() - start
                print(f"  Still generating... ({elapsed:.0f}s elapsed)")
            time.sleep(10)
            operation = client.operations.get(operation)

        duration = time.time() - start

        # Access generated video from operation response
        generated_video = operation.response.generated_videos[0]

        # Download the video file first
        client.files.download(file=generated_video.video)

        # Save video
        script_dir = Path(__file__).parent
        project_root = script_dir
        for parent in [script_dir] + list(script_dir.parents):
            if (parent / '.git').exists() or (parent / '.claude').exists():
                project_root = parent
                break

        output_dir = project_root / 'docs' / 'assets'
        output_dir.mkdir(parents=True, exist_ok=True)
        output_file = output_dir / f"veo_generated_{int(time.time())}.mp4"

        # Now save to file
        generated_video.video.save(str(output_file))

        file_size = output_file.stat().st_size / (1024 * 1024)  # MB

        if verbose:
            print(f"  Generated in {duration:.1f}s")
            print(f"  File size: {file_size:.2f} MB")
            print(f"  Saved: {output_file}")

        return {
            'status': 'success',
            'generated_video': str(output_file),
            'generation_time': duration,
            'file_size_mb': file_size,
            'model': model
        }

    except Exception as e:
        if verbose:
            print(f"  Error: {str(e)}")
            import traceback
            traceback.print_exc()
        return {
            'status': 'error',
            'error': str(e)
        }


def process_file(
    client: genai.Client,
    file_path: Optional[str],
    prompt: str,
    model: str,
    task: str,
    format_output: str,
    aspect_ratio: Optional[str] = None,
    image_size: Optional[str] = None,
    verbose: bool = False,
    max_retries: int = 3
) -> Dict[str, Any]:
    """Process a single file with retry logic.

    Args:
        image_size: Image size for Nano Banana models (1K, 2K, 4K). Must be uppercase K.
                    Note: Not all models support image_size - only pass when explicitly needed.
    """

    for attempt in range(max_retries):
        try:
            # For generation tasks without input files
            if task == 'generate' and not file_path:
                content = [prompt]
            else:
                # Process input file
                file_path = Path(file_path)
                # Determine if we need File API
                file_size = file_path.stat().st_size
                use_file_api = file_size > 20 * 1024 * 1024  # >20MB

                if use_file_api:
                    # Upload to File API
                    myfile = upload_file(client, str(file_path), verbose)
                    content = [prompt, myfile]
                else:
                    # Inline data
                    with open(file_path, 'rb') as f:
                        file_bytes = f.read()

                    mime_type = get_mime_type(str(file_path))
                    content = [
                        prompt,
                        types.Part.from_bytes(data=file_bytes, mime_type=mime_type)
                    ]

            # Configure request
            config_args = {}
            if task == 'generate':
                # Nano Banana requires fully uppercase 'IMAGE' per API spec
                config_args['response_modalities'] = ['IMAGE']
                # Build image_config with aspect_ratio and/or image_size
                image_config_args = {}
                if aspect_ratio:
                    image_config_args['aspect_ratio'] = aspect_ratio
                if image_size:
                    # image_size must be uppercase K (1K, 2K, 4K)
                    image_config_args['image_size'] = image_size
                if image_config_args:
                    config_args['image_config'] = types.ImageConfig(**image_config_args)

            if format_output == 'json':
                config_args['response_mime_type'] = 'application/json'

            config = types.GenerateContentConfig(**config_args) if config_args else None

            # Generate content
            response = client.models.generate_content(
                model=model,
                contents=content,
                config=config
            )

            # Extract response
            result = {
                'file': str(file_path) if file_path else 'generated',
                'status': 'success',
                'response': response.text if hasattr(response, 'text') else None
            }

            # Handle image output
            if task == 'generate' and hasattr(response, 'candidates'):
                for i, part in enumerate(response.candidates[0].content.parts):
                    if part.inline_data:
                        # Determine output directory - use project root docs/assets
                        if file_path:
                            output_dir = Path(file_path).parent
                            base_name = Path(file_path).stem
                        else:
                            # Find project root (look for .git or .claude directory)
                            script_dir = Path(__file__).parent
                            project_root = script_dir
                            for parent in [script_dir] + list(script_dir.parents):
                                if (parent / '.git').exists() or (parent / '.claude').exists():
                                    project_root = parent
                                    break

                            output_dir = project_root / 'docs' / 'assets'
                            output_dir.mkdir(parents=True, exist_ok=True)
                            base_name = "generated"

                        output_file = output_dir / f"{base_name}_generated_{i}.png"
                        with open(output_file, 'wb') as f:
                            f.write(part.inline_data.data)
                        result['generated_image'] = str(output_file)
                        if verbose:
                            print(f"  Saved image to: {output_file}")

            return result

        except Exception as e:
            # Don't retry on billing/free tier errors - they won't resolve
            if _is_billing_error(e) or _is_free_tier_quota_error(e):
                return {
                    'file': str(file_path) if file_path else 'generated',
                    'status': 'error',
                    'error': str(e)
                }

            # Check if this is a rate limit error (candidate for key rotation)
            is_rate_limited = (
                KEY_ROTATION_AVAILABLE and
                is_rate_limit_error and
                is_rate_limit_error(e)
            )

            # Check if this is a transient server error (503, 500, etc.)
            is_5xx = (
                KEY_ROTATION_AVAILABLE and
                is_server_error and
                is_server_error(e)
            )

            # Use more retries for transient 5xx errors (up to 5 attempts)
            effective_max = max(max_retries, 5) if is_5xx else max_retries

            if attempt == effective_max - 1:
                return {
                    'file': str(file_path) if file_path else 'generated',
                    'status': 'error',
                    'error': str(e),
                    'rate_limited': is_rate_limited  # Flag for caller to handle rotation
                }

            # Longer backoff for 5xx (4s, 8s, 16s, 32s) vs default (1s, 2s, 4s)
            if is_5xx:
                wait_time = 4 * (2 ** attempt)  # 4, 8, 16, 32, 64
            else:
                wait_time = 2 ** attempt  # 1, 2, 4
            if verbose:
                error_type = "5xx server error" if is_5xx else "error"
                print(f"  Retry {attempt + 1}/{effective_max - 1} after {wait_time}s ({error_type}): {e}")
            time.sleep(wait_time)


def batch_process(
    files: List[str],
    prompt: str,
    model: str,
    task: str,
    format_output: str,
    aspect_ratio: Optional[str] = None,
    num_images: int = 1,
    size: str = '1K',
    resolution: str = '1080p',
    reference_images: Optional[List[str]] = None,
    output_file: Optional[str] = None,
    verbose: bool = False,
    dry_run: bool = False
) -> List[Dict[str, Any]]:
    """Batch process multiple files with automatic key rotation."""

    # Initialize key rotator or fall back to single key
    rotator = None
    api_key = None

    if KEY_ROTATION_AVAILABLE and find_all_api_keys:
        all_keys = find_all_api_keys()
        if all_keys:
            if len(all_keys) > 1:
                rotator = KeyRotator(keys=all_keys, verbose=verbose)
                api_key = rotator.get_key()
                if verbose:
                    print(f"✓ Key rotation enabled with {len(all_keys)} keys", file=sys.stderr)
            else:
                api_key = all_keys[0]
                if verbose:
                    print(f"✓ Using single API key: {api_key[:8]}...", file=sys.stderr)

    # Fallback to original single-key lookup
    if not api_key:
        api_key = find_api_key()

    if not api_key:
        print("Error: GEMINI_API_KEY not found in any location")
        print("\nSearched locations (highest to lowest priority):")
        print("  1. OS environment (process.env)")
        if CENTRALIZED_RESOLVER_AVAILABLE:
            from resolve_env import get_env_file_paths
            for i, (desc, path) in enumerate(get_env_file_paths('ai-multimodal'), 2):
                exists = "[OK]" if path.exists() else "[  ]"
                print(f"  {i}. {exists} {path}")
        else:
            print("  2-7. .env files (centralized resolver unavailable)")
        print("\nQuick fix — add your key to any .env file above:")
        print("  echo 'GEMINI_API_KEY=your-key' >> ~/.opencode/.env")
        print("\nOther options:")
        print("  - Run setup checker: python scripts/check_setup.py")
        print("  - Show full hierarchy: python ~/.opencode/scripts/resolve_env.py --show-hierarchy --skill ai-multimodal -v")
        print("\nFor key rotation, add multiple keys to any .env:")
        print("   GEMINI_API_KEY=key1")
        print("   GEMINI_API_KEY_2=key2")
        print("   GEMINI_API_KEY_3=key3")
        sys.exit(1)

    if dry_run:
        print("DRY RUN MODE - No API calls will be made")
        print(f"Files to process: {len(files)}")
        print(f"Model: {model}")
        print(f"Task: {task}")
        print(f"Prompt: {prompt}")
        if rotator:
            print(f"API keys available: {rotator.key_count}")
        return []

    # Create client with current key
    client = genai.Client(api_key=api_key)
    results = []

    def get_client_with_rotation(error: Optional[Exception] = None) -> Optional[genai.Client]:
        """Get client, rotating key if rate limited."""
        nonlocal client, api_key

        if error and rotator and is_rate_limit_error and is_rate_limit_error(error):
            # Try to rotate to next key
            if rotator.mark_rate_limited(str(error)):
                new_key = rotator.get_key()
                if new_key:
                    api_key = new_key
                    client = genai.Client(api_key=api_key)
                    return client
            # All keys exhausted
            return None
        return client

    # For generation tasks without input files, process once
    if task == 'generate' and not files:
        if verbose:
            print(f"\nGenerating image from prompt...")

        # Use Imagen 4 API for imagen models
        if model.startswith('imagen-') or model in IMAGEN_MODELS:
            result = generate_image_imagen4(
                client=client,
                prompt=prompt,
                model=model,
                num_images=num_images,
                aspect_ratio=aspect_ratio or '1:1',
                size=size or '1K',  # Default to 1K for Imagen models
                verbose=verbose
            )

            # Silent fallback to cheaper model if Imagen billing required
            if result.get('status') == 'billing_required':
                if verbose:
                    print(f"  Falling back to: {IMAGE_MODEL_FALLBACK}")
                result = process_file(
                    client=client,
                    file_path=None,
                    prompt=prompt,
                    model=IMAGE_MODEL_FALLBACK,
                    task=task,
                    format_output=format_output,
                    aspect_ratio=aspect_ratio,
                    image_size=size,
                    verbose=verbose
                )
                # Check if free tier (zero quota) - stop immediately with clear message
                error_str = result.get('error', '')
                if result.get('status') == 'error':
                    if _is_free_tier_quota_error(Exception(error_str)):
                        result['error'] = FREE_TIER_NO_ACCESS_MSG
                    elif _is_billing_error(Exception(error_str)):
                        result['error'] = (
                            "Image generation requires billing. Enable billing at: "
                            "https://aistudio.google.com/apikey or use Google Cloud credits."
                        )
        else:
            # Nano Banana (Flash/Pro) or other models via generate_content API
            result = process_file(
                client=client,
                file_path=None,
                prompt=prompt,
                model=model,
                task=task,
                format_output=format_output,
                aspect_ratio=aspect_ratio,
                image_size=size,
                verbose=verbose
            )
            # Check for free tier error
            if result.get('status') == 'error':
                error_str = result.get('error', '')
                if _is_free_tier_quota_error(Exception(error_str)):
                    result['error'] = FREE_TIER_NO_ACCESS_MSG

        results.append(result)

        if verbose:
            status = result.get('status', 'unknown')
            print(f"  Status: {status}")

    elif task == 'generate-video' and not files:
        if verbose:
            print(f"\nGenerating video from prompt...")

        result = generate_video_veo(
            client=client,
            prompt=prompt,
            model=model,
            resolution=resolution,
            aspect_ratio=aspect_ratio or '16:9',
            reference_images=reference_images,
            verbose=verbose
        )

        # Check for free tier error - video gen has NO free tier access
        if result.get('status') == 'error':
            error_str = result.get('error', '')
            if _is_free_tier_quota_error(Exception(error_str)) or _is_billing_error(Exception(error_str)):
                result['error'] = FREE_TIER_NO_ACCESS_MSG

        results.append(result)

        if verbose:
            status = result.get('status', 'unknown')
            print(f"  Status: {status}")
    else:
        # Process input files with key rotation support
        for i, file_path in enumerate(files, 1):
            if verbose:
                print(f"\n[{i}/{len(files)}] Processing: {file_path}")

            # Try processing with key rotation on rate limit
            max_rotation_attempts = rotator.key_count if rotator else 1
            result = None

            for rotation_attempt in range(max_rotation_attempts):
                result = process_file(
                    client=client,
                    file_path=file_path,
                    prompt=prompt,
                    model=model,
                    task=task,
                    format_output=format_output,
                    aspect_ratio=aspect_ratio,
                    image_size=size,
                    verbose=verbose
                )

                # Check if rate limited and can rotate
                if (result.get('rate_limited') and rotator and
                    rotation_attempt < max_rotation_attempts - 1):
                    new_client = get_client_with_rotation(Exception(result.get('error', '')))
                    if new_client:
                        client = new_client
                        if verbose:
                            print(f"  Retrying with rotated key...")
                        continue
                    else:
                        # All keys exhausted - mark result with clear error
                        if verbose:
                            print(f"  ⚠ All API keys exhausted (on cooldown)", file=sys.stderr)
                        result['error'] = "All API keys exhausted (rate limited). Try again later."
                break

            results.append(result)

            if verbose:
                status = result.get('status', 'unknown')
                print(f"  Status: {status}")

    # Save results
    if output_file:
        save_results(results, output_file, format_output)

    return results


def print_results(results: List[Dict[str, Any]], task: str) -> None:
    """Print results to stdout for LLM workflows.

    Always prints actual results (not just success/fail counts) so LLMs
    can continue processing based on the output.
    """
    if not results:
        return

    print("\n=== RESULTS ===\n")

    for result in results:
        file_name = result.get('file', 'generated')
        status = result.get('status', 'unknown')

        print(f"[{file_name}]")
        print(f"Status: {status}")

        if status == 'success':
            # Print task-specific output
            if task in ['analyze', 'transcribe', 'extract']:
                response = result.get('response')
                if response:
                    print(f"Result:\n{response}")

            elif task == 'generate':
                # Image generation
                generated_images = result.get('generated_images', [])
                if generated_images:
                    print(f"Generated images: {len(generated_images)}")
                    for img in generated_images:
                        print(f"  - {img}")
                else:
                    generated_image = result.get('generated_image')
                    if generated_image:
                        print(f"Generated image: {generated_image}")

            elif task == 'generate-video':
                generated_video = result.get('generated_video')
                if generated_video:
                    print(f"Generated video: {generated_video}")
                    gen_time = result.get('generation_time')
                    if gen_time:
                        print(f"Generation time: {gen_time:.1f}s")
                    file_size = result.get('file_size_mb')
                    if file_size:
                        print(f"File size: {file_size:.2f} MB")

        elif status == 'error':
            error = result.get('error', 'Unknown error')
            print(f"Error: {error}")

        print()  # Blank line between results


def save_results(results: List[Dict[str, Any]], output_file: str, format_output: str):
    """Save results to file."""
    output_path = Path(output_file)

    # Special handling for image generation - if output has image extension, copy the generated image
    image_extensions = {'.png', '.jpg', '.jpeg', '.webp', '.gif', '.bmp'}
    video_extensions = {'.mp4', '.mov', '.avi', '.webm'}

    if output_path.suffix.lower() in image_extensions and len(results) == 1:
        # Ensure output directory exists
        output_path.parent.mkdir(parents=True, exist_ok=True)

        # Check for multiple generated images
        generated_images = results[0].get('generated_images')
        if generated_images:
            # Copy first image to the specified output location
            shutil.copy2(generated_images[0], output_path)
            return

        # Legacy single image field
        generated_image = results[0].get('generated_image')
        if generated_image:
            shutil.copy2(generated_image, output_path)
            return
        else:
            # Don't write text reports to image files - save error as .txt instead
            output_path = output_path.with_suffix('.error.txt')
            output_path.parent.mkdir(parents=True, exist_ok=True)  # Ensure directory exists
            print(f"Warning: Generation failed, saving error report to: {output_path}")

    if output_path.suffix.lower() in video_extensions and len(results) == 1:
        # Ensure output directory exists
        output_path.parent.mkdir(parents=True, exist_ok=True)

        generated_video = results[0].get('generated_video')
        if generated_video:
            shutil.copy2(generated_video, output_path)
            return
        else:
            output_path = output_path.with_suffix('.error.txt')
            output_path.parent.mkdir(parents=True, exist_ok=True)
            print(f"Warning: Video generation failed, saving error report to: {output_path}")

    if format_output == 'json':
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2)
    elif format_output == 'csv':
        with open(output_path, 'w', newline='', encoding='utf-8') as f:
            fieldnames = ['file', 'status', 'response', 'error']
            writer = csv.DictWriter(f, fieldnames=fieldnames)
            writer.writeheader()
            for result in results:
                writer.writerow({
                    'file': result.get('file', ''),
                    'status': result.get('status', ''),
                    'response': result.get('response', ''),
                    'error': result.get('error', '')
                })
    else:  # markdown
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write("# Batch Processing Results\n\n")
            for i, result in enumerate(results, 1):
                f.write(f"## {i}. {result.get('file', 'Unknown')}\n\n")
                f.write(f"**Status**: {result.get('status', 'unknown')}\n\n")
                if result.get('response'):
                    f.write(f"**Response**:\n\n{result['response']}\n\n")
                if result.get('error'):
                    f.write(f"**Error**: {result['error']}\n\n")


def main():
    parser = argparse.ArgumentParser(
        description='Batch process media files with Gemini API',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Transcribe multiple audio files
  %(prog)s --files *.mp3 --task transcribe --model gemini-2.5-flash

  # Analyze images
  %(prog)s --files *.jpg --task analyze --prompt "Describe this image" \\
    --model gemini-2.5-flash

  # Process PDFs to JSON
  %(prog)s --files *.pdf --task extract --prompt "Extract data as JSON" \\
    --format json --output results.json

  # Generate images with Nano Banana Flash (fast)
  %(prog)s --task generate --prompt "A mountain landscape at sunset" \\
    --model gemini-2.5-flash-image --aspect-ratio 16:9 --size 2K

  # Generate images with Nano Banana Pro (4K text, reasoning)
  %(prog)s --task generate --prompt "Travel poster with text 'EXPLORE'" \\
    --model gemini-3-pro-image-preview --aspect-ratio 3:4 --size 4K

  # Generate images with Imagen 4 (production quality)
  %(prog)s --task generate --prompt "Product photo of coffee mug" \\
    --model imagen-4.0-ultra-generate-001 --aspect-ratio 1:1 --size 2K
        """
    )

    parser.add_argument('--files', nargs='*', help='Input files to process')
    parser.add_argument('--task',
                       choices=['transcribe', 'analyze', 'extract', 'generate', 'generate-video'],
                       help='Task to perform (auto-detected from file type if not specified)')
    parser.add_argument('--prompt', help='Prompt for analysis/generation')
    parser.add_argument('--model',
                       help='Model to use (default: auto-detected from task and env vars)')
    parser.add_argument('--format', dest='format_output', default='text',
                       choices=['text', 'json', 'csv', 'markdown'],
                       help='Output format (default: text)')

    # Image generation options
    # All 10 aspect ratios supported by Nano Banana / Imagen 4
    parser.add_argument('--aspect-ratio',
                       choices=['1:1', '2:3', '3:2', '3:4', '4:3', '4:5', '5:4', '9:16', '16:9', '21:9'],
                       help='Aspect ratio for image/video generation')
    parser.add_argument('--num-images', type=int, default=1,
                       help='Number of images to generate (1-4, default: 1)')
    # 4K available for Nano Banana Pro (gemini-3-pro-image-preview)
    # Note: Not all models support --size, only use when needed
    parser.add_argument('--size', choices=['1K', '2K', '4K'], default=None,
                       help='Image size - 1K/2K for Imagen 4, 1K/2K/4K for Nano Banana (optional)')

    # Video generation options
    parser.add_argument('--resolution', choices=['720p', '1080p'], default='1080p',
                       help='Video resolution (default: 1080p)')
    parser.add_argument('--reference-images', nargs='+',
                       help='Reference images for video generation (max 3)')

    parser.add_argument('--output', help='Output file for results')
    parser.add_argument('--verbose', '-v', action='store_true',
                       help='Verbose output')
    parser.add_argument('--dry-run', action='store_true',
                       help='Show what would be done without making API calls')

    args = parser.parse_args()

    # Auto-detect task from file type if not specified
    if not args.task:
        if args.files and len(args.files) > 0:
            args.task = infer_task_from_file(args.files[0])
            if args.verbose:
                print(f"Auto-detected task: {args.task} (from file extension)")
        else:
            parser.error("--task required when no input files provided")

    # Auto-detect model if not specified
    if not args.model:
        args.model = get_default_model(args.task)
        if args.verbose:
            print(f"Auto-detected model: {args.model}")

    # Validate model/task combination
    try:
        validate_model_task_combination(args.model, args.task)
    except ValueError as e:
        parser.error(str(e))

    # Validate arguments
    if args.task not in ['generate', 'generate-video'] and not args.files:
        parser.error("--files required for non-generation tasks")

    if args.task in ['generate', 'generate-video'] and not args.prompt:
        parser.error("--prompt required for generation tasks")

    if args.task not in ['generate', 'generate-video'] and not args.prompt:
        # Set default prompts
        if args.task == 'transcribe':
            args.prompt = 'Generate a transcript with timestamps'
        elif args.task == 'analyze':
            args.prompt = 'Analyze this content'
        elif args.task == 'extract':
            args.prompt = 'Extract key information'

    # Process files
    files = args.files or []
    results = batch_process(
        files=files,
        prompt=args.prompt,
        model=args.model,
        task=args.task,
        format_output=args.format_output,
        aspect_ratio=args.aspect_ratio,
        num_images=args.num_images,
        size=args.size,
        resolution=args.resolution,
        reference_images=args.reference_images,
        output_file=args.output,
        verbose=args.verbose,
        dry_run=args.dry_run
    )

    # Print results and summary
    if not args.dry_run and results:
        # Always print actual results for LLM workflows
        print_results(results, args.task)

        # Print summary
        success = sum(1 for r in results if r.get('status') == 'success')
        failed = len(results) - success
        print(f"{'='*50}")
        print(f"Summary: {len(results)} processed, {success} success, {failed} failed")
        if args.output:
            print(f"Results saved to: {args.output}")


if __name__ == '__main__':
    main()