#!/usr/bin/env node /** * path-extractor.cjs - Extract paths from Claude Code tool inputs * * Extracts file_path, path, pattern params and parses Bash commands * to find all path-like arguments. */ // Flags that indicate the following value should NOT be checked as a path // These are "exclude" semantics - the user is explicitly skipping these paths const EXCLUDE_FLAGS = [ '--exclude', '--ignore', '--skip', '--prune', '-x', // tar exclude shorthand '-path', // find -path (used with -prune) '--exclude-dir' // grep --exclude-dir ]; // Filesystem commands where bare directory names (build, dist, etc.) // should be extracted as paths. For non-fs commands (grep, echo, sed), // only tokens that look like actual paths (contain / or extension) are extracted. const FILESYSTEM_COMMANDS = [ 'cd', 'ls', 'cat', 'head', 'tail', 'less', 'more', 'rm', 'cp', 'mv', 'find', 'touch', 'mkdir', 'rmdir', 'stat', 'file', 'du', 'tree', 'chmod', 'chown', 'ln', 'readlink', 'realpath', 'wc', 'tee', 'tar', 'zip', 'unzip', 'open', 'code', 'vim', 'nano', 'bat', 'rsync', 'scp', 'diff' ]; /** * Extract all paths from a tool_input object * Handles: file_path, path, pattern params and command strings * * @param {Object} toolInput - The tool_input from hook JSON * @returns {string[]} Array of extracted paths */ function extractFromToolInput(toolInput) { const paths = []; if (!toolInput || typeof toolInput !== 'object') { return paths; } // Direct path params (Read, Edit, Write, Grep, Glob tools) const directParams = ['file_path', 'path', 'pattern']; for (const param of directParams) { if (toolInput[param] && typeof toolInput[param] === 'string') { const normalized = normalizeExtractedPath(toolInput[param]); if (normalized) paths.push(normalized); } } // Extract from Bash command if present if (toolInput.command && typeof toolInput.command === 'string') { const cmdPaths = extractFromCommand(toolInput.command); paths.push(...cmdPaths); } return paths.filter(Boolean); } /** * Extract path-like segments from a Bash command string. * * Uses pipe-segment-aware command context: for filesystem commands (cd, cat, ls, rm, etc.) * bare blocked directory names are extracted with priority. For non-filesystem commands * (grep, echo, sed, etc.) only tokens that structurally look like paths are extracted, * preventing false positives on search terms and string arguments. * * @param {string} command - The command string * @returns {string[]} Array of extracted paths */ function extractFromCommand(command) { if (!command || typeof command !== 'string') { return []; } const paths = []; // First, extract quoted strings (preserve spaces in paths) const quotedPattern = /["']([^"']+)["']/g; let match; while ((match = quotedPattern.exec(command)) !== null) { const content = match[1]; // Skip sed/awk regex expressions (s/pattern/replacement/flags) if (/^s[\/|@#,]/.test(content)) continue; if (looksLikePath(content)) { paths.push(normalizeExtractedPath(content)); } } // Remove quoted strings for unquoted path extraction const withoutQuotes = command.replace(/["'][^"']*["']/g, ' '); // Split on whitespace and extract path-like tokens const tokens = withoutQuotes.split(/\s+/).filter(Boolean); // Track command context per pipe segment let commandName = null; let isFsCommand = false; let skipNextToken = false; let heredocDelimiter = null; let nextIsHeredocDelimiter = false; for (const token of tokens) { // Heredoc delimiter capture (after << or <<-) if (nextIsHeredocDelimiter) { heredocDelimiter = token.replace(/^['"]/, '').replace(/['"]$/, ''); nextIsHeredocDelimiter = false; continue; } // Skip heredoc body content until closing delimiter if (heredocDelimiter) { if (token === heredocDelimiter) { heredocDelimiter = null; } continue; } // Detect heredoc start: < 2) { heredocDelimiter = token.replace(/^<<-?['"]?/, '').replace(/['"]?$/, ''); continue; } if (token === '<<' || token === '<<-') { nextIsHeredocDelimiter = true; continue; } // Skip value after exclude flags (--exclude node_modules format) if (skipNextToken) { skipNextToken = false; continue; } // Reset command context at command/pipe boundaries if (token === '&&' || token === ';' || token.startsWith('|')) { commandName = null; isFsCommand = false; continue; } // Skip flags and shell operators if (isSkippableToken(token)) { if (EXCLUDE_FLAGS.includes(token)) { skipNextToken = true; } continue; } // Determine the command for this pipe segment (first non-flag token) if (commandName === null) { commandName = token.toLowerCase(); isFsCommand = FILESYSTEM_COMMANDS.includes(commandName); // Skip the command word itself if (isCommandKeyword(token) || isFsCommand) continue; // Non-keyword command (e.g., ./script.sh) — fall through to path check } // For filesystem commands, extract blocked dir names with priority. // "cd build", "ls dist", "cat node_modules/..." — "build"/"dist" are paths here. if (isFsCommand && isBlockedDirName(token)) { paths.push(normalizeExtractedPath(token)); continue; } // Skip common non-path command words if (isCommandKeyword(token)) continue; // Check if it looks like a path if (looksLikePath(token)) { paths.push(normalizeExtractedPath(token)); } } return paths; } // Common blocked directory names that should be extracted even if they // match command keywords (e.g., "build" is both a subcommand and a dir name) // Keep in sync with DEFAULT_PATTERNS in pattern-matcher.cjs const BLOCKED_DIR_NAMES = [ 'node_modules', '__pycache__', '.git', 'dist', 'build', '.next', '.nuxt', '.venv', 'venv', 'vendor', 'target', 'coverage' ]; /** * Check if token is exactly a blocked directory name * This takes priority over command keyword filtering * * @param {string} token - Token to check * @returns {boolean} */ function isBlockedDirName(token) { return BLOCKED_DIR_NAMES.includes(token); } /** * Check if a string looks like a file path * * @param {string} str - String to check * @returns {boolean} */ function looksLikePath(str) { if (!str || str.length < 2) return false; // Contains path separator if (str.includes('/') || str.includes('\\')) return true; // Starts with relative path indicator if (str.startsWith('./') || str.startsWith('../')) return true; // Has file extension (likely a file) if (/\.\w{1,6}$/.test(str)) return true; // Looks like a directory path if (/^[a-zA-Z0-9_-]+\//.test(str)) return true; return false; } /** * Check if token should be skipped (flags, operators) * * @param {string} token - Token to check * @returns {boolean} */ function isSkippableToken(token) { // Flags if (token.startsWith('-')) return true; // Shell operators if (['|', '||', '&&', '>', '>>', '<', '<<', '&', ';'].includes(token)) return true; if (token.startsWith('|') || token.startsWith('>') || token.startsWith('<')) return true; if (token.startsWith('&')) return true; // Numeric values if (/^\d+$/.test(token)) return true; return false; } /** * Check if token is a common command keyword (not a path) * * @param {string} token - Token to check * @returns {boolean} */ function isCommandKeyword(token) { const keywords = [ // Shell commands 'echo', 'cat', 'ls', 'cd', 'rm', 'cp', 'mv', 'find', 'grep', 'head', 'tail', 'wc', 'du', 'tree', 'touch', 'mkdir', 'rmdir', 'pwd', 'which', 'env', 'export', 'source', 'bash', 'sh', 'zsh', 'true', 'false', 'test', 'xargs', 'tee', 'sort', 'uniq', 'cut', 'tr', 'sed', 'awk', 'diff', 'chmod', 'chown', 'ln', 'file', // Package managers and their subcommands 'npm', 'pnpm', 'yarn', 'bun', 'npx', 'pnpx', 'bunx', 'node', 'run', 'build', 'test', 'lint', 'dev', 'start', 'install', 'ci', 'exec', 'add', 'remove', 'update', 'publish', 'pack', 'init', 'create', // Build tools 'tsc', 'esbuild', 'vite', 'webpack', 'rollup', 'turbo', 'nx', 'jest', 'vitest', 'mocha', 'eslint', 'prettier', // Git 'git', 'commit', 'push', 'pull', 'merge', 'rebase', 'checkout', 'branch', 'status', 'log', 'diff', 'add', 'reset', 'stash', 'fetch', 'clone', // Docker 'docker', 'compose', 'up', 'down', 'ps', 'logs', 'exec', 'container', 'image', // Misc 'sudo', 'time', 'timeout', 'watch', 'make', 'cargo', 'python', 'python3', 'pip', 'ruby', 'gem', 'go', 'rust', 'java', 'javac', 'mvn', 'gradle' ]; return keywords.includes(token.toLowerCase()); } /** * Normalize an extracted path * - Remove surrounding quotes * - Normalize path separators to forward slash * * @param {string} path - Path to normalize * @returns {string} Normalized path */ function normalizeExtractedPath(path) { if (!path) return ''; let normalized = path.trim(); // Remove surrounding quotes if ((normalized.startsWith('"') && normalized.endsWith('"')) || (normalized.startsWith("'") && normalized.endsWith("'"))) { normalized = normalized.slice(1, -1); } // Strip shell metacharacters from edges (backticks, parens, braces) normalized = normalized.replace(/^[`({\[]+/, '').replace(/[`)};\]]+$/, ''); // Normalize path separators to forward slash normalized = normalized.replace(/\\/g, '/'); // Remove trailing slash for consistency if (normalized.endsWith('/') && normalized.length > 1) { normalized = normalized.slice(0, -1); } return normalized; } module.exports = { extractFromToolInput, extractFromCommand, looksLikePath, isSkippableToken, isCommandKeyword, isBlockedDirName, normalizeExtractedPath, BLOCKED_DIR_NAMES, EXCLUDE_FLAGS, FILESYSTEM_COMMANDS };