328 lines
9.9 KiB
JavaScript
Executable File
328 lines
9.9 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
/**
|
|
* path-extractor.cjs - Extract paths from Claude Code tool inputs
|
|
*
|
|
* Extracts file_path, path, pattern params and parses Bash commands
|
|
* to find all path-like arguments.
|
|
*/
|
|
|
|
// Flags that indicate the following value should NOT be checked as a path
|
|
// These are "exclude" semantics - the user is explicitly skipping these paths
|
|
const EXCLUDE_FLAGS = [
|
|
'--exclude', '--ignore', '--skip', '--prune',
|
|
'-x', // tar exclude shorthand
|
|
'-path', // find -path (used with -prune)
|
|
'--exclude-dir' // grep --exclude-dir
|
|
];
|
|
|
|
// Filesystem commands where bare directory names (build, dist, etc.)
|
|
// should be extracted as paths. For non-fs commands (grep, echo, sed),
|
|
// only tokens that look like actual paths (contain / or extension) are extracted.
|
|
const FILESYSTEM_COMMANDS = [
|
|
'cd', 'ls', 'cat', 'head', 'tail', 'less', 'more',
|
|
'rm', 'cp', 'mv', 'find', 'touch', 'mkdir', 'rmdir',
|
|
'stat', 'file', 'du', 'tree', 'chmod', 'chown', 'ln',
|
|
'readlink', 'realpath', 'wc', 'tee', 'tar', 'zip', 'unzip',
|
|
'open', 'code', 'vim', 'nano', 'bat', 'rsync', 'scp', 'diff'
|
|
];
|
|
|
|
/**
|
|
* Extract all paths from a tool_input object
|
|
* Handles: file_path, path, pattern params and command strings
|
|
*
|
|
* @param {Object} toolInput - The tool_input from hook JSON
|
|
* @returns {string[]} Array of extracted paths
|
|
*/
|
|
function extractFromToolInput(toolInput) {
|
|
const paths = [];
|
|
|
|
if (!toolInput || typeof toolInput !== 'object') {
|
|
return paths;
|
|
}
|
|
|
|
// Direct path params (Read, Edit, Write, Grep, Glob tools)
|
|
const directParams = ['file_path', 'path', 'pattern'];
|
|
for (const param of directParams) {
|
|
if (toolInput[param] && typeof toolInput[param] === 'string') {
|
|
const normalized = normalizeExtractedPath(toolInput[param]);
|
|
if (normalized) paths.push(normalized);
|
|
}
|
|
}
|
|
|
|
// Extract from Bash command if present
|
|
if (toolInput.command && typeof toolInput.command === 'string') {
|
|
const cmdPaths = extractFromCommand(toolInput.command);
|
|
paths.push(...cmdPaths);
|
|
}
|
|
|
|
return paths.filter(Boolean);
|
|
}
|
|
|
|
/**
|
|
* Extract path-like segments from a Bash command string.
|
|
*
|
|
* Uses pipe-segment-aware command context: for filesystem commands (cd, cat, ls, rm, etc.)
|
|
* bare blocked directory names are extracted with priority. For non-filesystem commands
|
|
* (grep, echo, sed, etc.) only tokens that structurally look like paths are extracted,
|
|
* preventing false positives on search terms and string arguments.
|
|
*
|
|
* @param {string} command - The command string
|
|
* @returns {string[]} Array of extracted paths
|
|
*/
|
|
function extractFromCommand(command) {
|
|
if (!command || typeof command !== 'string') {
|
|
return [];
|
|
}
|
|
|
|
const paths = [];
|
|
|
|
// First, extract quoted strings (preserve spaces in paths)
|
|
const quotedPattern = /["']([^"']+)["']/g;
|
|
let match;
|
|
while ((match = quotedPattern.exec(command)) !== null) {
|
|
const content = match[1];
|
|
|
|
// Skip sed/awk regex expressions (s/pattern/replacement/flags)
|
|
if (/^s[\/|@#,]/.test(content)) continue;
|
|
|
|
if (looksLikePath(content)) {
|
|
paths.push(normalizeExtractedPath(content));
|
|
}
|
|
}
|
|
|
|
// Remove quoted strings for unquoted path extraction
|
|
const withoutQuotes = command.replace(/["'][^"']*["']/g, ' ');
|
|
|
|
// Split on whitespace and extract path-like tokens
|
|
const tokens = withoutQuotes.split(/\s+/).filter(Boolean);
|
|
|
|
// Track command context per pipe segment
|
|
let commandName = null;
|
|
let isFsCommand = false;
|
|
let skipNextToken = false;
|
|
let heredocDelimiter = null;
|
|
let nextIsHeredocDelimiter = false;
|
|
|
|
for (const token of tokens) {
|
|
// Heredoc delimiter capture (after << or <<-)
|
|
if (nextIsHeredocDelimiter) {
|
|
heredocDelimiter = token.replace(/^['"]/, '').replace(/['"]$/, '');
|
|
nextIsHeredocDelimiter = false;
|
|
continue;
|
|
}
|
|
|
|
// Skip heredoc body content until closing delimiter
|
|
if (heredocDelimiter) {
|
|
if (token === heredocDelimiter) {
|
|
heredocDelimiter = null;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Detect heredoc start: <<EOF, <<'EOF', <<"EOF", <<-EOF
|
|
if (token.startsWith('<<') && token.length > 2) {
|
|
heredocDelimiter = token.replace(/^<<-?['"]?/, '').replace(/['"]?$/, '');
|
|
continue;
|
|
}
|
|
if (token === '<<' || token === '<<-') {
|
|
nextIsHeredocDelimiter = true;
|
|
continue;
|
|
}
|
|
|
|
// Skip value after exclude flags (--exclude node_modules format)
|
|
if (skipNextToken) {
|
|
skipNextToken = false;
|
|
continue;
|
|
}
|
|
|
|
// Reset command context at command/pipe boundaries
|
|
if (token === '&&' || token === ';' || token.startsWith('|')) {
|
|
commandName = null;
|
|
isFsCommand = false;
|
|
continue;
|
|
}
|
|
|
|
// Skip flags and shell operators
|
|
if (isSkippableToken(token)) {
|
|
if (EXCLUDE_FLAGS.includes(token)) {
|
|
skipNextToken = true;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Determine the command for this pipe segment (first non-flag token)
|
|
if (commandName === null) {
|
|
commandName = token.toLowerCase();
|
|
isFsCommand = FILESYSTEM_COMMANDS.includes(commandName);
|
|
// Skip the command word itself
|
|
if (isCommandKeyword(token) || isFsCommand) continue;
|
|
// Non-keyword command (e.g., ./script.sh) — fall through to path check
|
|
}
|
|
|
|
// For filesystem commands, extract blocked dir names with priority.
|
|
// "cd build", "ls dist", "cat node_modules/..." — "build"/"dist" are paths here.
|
|
if (isFsCommand && isBlockedDirName(token)) {
|
|
paths.push(normalizeExtractedPath(token));
|
|
continue;
|
|
}
|
|
|
|
// Skip common non-path command words
|
|
if (isCommandKeyword(token)) continue;
|
|
|
|
// Check if it looks like a path
|
|
if (looksLikePath(token)) {
|
|
paths.push(normalizeExtractedPath(token));
|
|
}
|
|
}
|
|
|
|
return paths;
|
|
}
|
|
|
|
// Common blocked directory names that should be extracted even if they
|
|
// match command keywords (e.g., "build" is both a subcommand and a dir name)
|
|
// Keep in sync with DEFAULT_PATTERNS in pattern-matcher.cjs
|
|
const BLOCKED_DIR_NAMES = [
|
|
'node_modules', '__pycache__', '.git', 'dist', 'build',
|
|
'.next', '.nuxt', '.venv', 'venv', 'vendor', 'target', 'coverage'
|
|
];
|
|
|
|
/**
|
|
* Check if token is exactly a blocked directory name
|
|
* This takes priority over command keyword filtering
|
|
*
|
|
* @param {string} token - Token to check
|
|
* @returns {boolean}
|
|
*/
|
|
function isBlockedDirName(token) {
|
|
return BLOCKED_DIR_NAMES.includes(token);
|
|
}
|
|
|
|
/**
|
|
* Check if a string looks like a file path
|
|
*
|
|
* @param {string} str - String to check
|
|
* @returns {boolean}
|
|
*/
|
|
function looksLikePath(str) {
|
|
if (!str || str.length < 2) return false;
|
|
|
|
// Contains path separator
|
|
if (str.includes('/') || str.includes('\\')) return true;
|
|
|
|
// Starts with relative path indicator
|
|
if (str.startsWith('./') || str.startsWith('../')) return true;
|
|
|
|
// Has file extension (likely a file)
|
|
if (/\.\w{1,6}$/.test(str)) return true;
|
|
|
|
// Looks like a directory path
|
|
if (/^[a-zA-Z0-9_-]+\//.test(str)) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Check if token should be skipped (flags, operators)
|
|
*
|
|
* @param {string} token - Token to check
|
|
* @returns {boolean}
|
|
*/
|
|
function isSkippableToken(token) {
|
|
// Flags
|
|
if (token.startsWith('-')) return true;
|
|
|
|
// Shell operators
|
|
if (['|', '||', '&&', '>', '>>', '<', '<<', '&', ';'].includes(token)) return true;
|
|
if (token.startsWith('|') || token.startsWith('>') || token.startsWith('<')) return true;
|
|
if (token.startsWith('&')) return true;
|
|
|
|
// Numeric values
|
|
if (/^\d+$/.test(token)) return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Check if token is a common command keyword (not a path)
|
|
*
|
|
* @param {string} token - Token to check
|
|
* @returns {boolean}
|
|
*/
|
|
function isCommandKeyword(token) {
|
|
const keywords = [
|
|
// Shell commands
|
|
'echo', 'cat', 'ls', 'cd', 'rm', 'cp', 'mv', 'find', 'grep', 'head', 'tail',
|
|
'wc', 'du', 'tree', 'touch', 'mkdir', 'rmdir', 'pwd', 'which', 'env', 'export',
|
|
'source', 'bash', 'sh', 'zsh', 'true', 'false', 'test', 'xargs', 'tee', 'sort',
|
|
'uniq', 'cut', 'tr', 'sed', 'awk', 'diff', 'chmod', 'chown', 'ln', 'file',
|
|
|
|
// Package managers and their subcommands
|
|
'npm', 'pnpm', 'yarn', 'bun', 'npx', 'pnpx', 'bunx', 'node',
|
|
'run', 'build', 'test', 'lint', 'dev', 'start', 'install', 'ci', 'exec',
|
|
'add', 'remove', 'update', 'publish', 'pack', 'init', 'create',
|
|
|
|
// Build tools
|
|
'tsc', 'esbuild', 'vite', 'webpack', 'rollup', 'turbo', 'nx',
|
|
'jest', 'vitest', 'mocha', 'eslint', 'prettier',
|
|
|
|
// Git
|
|
'git', 'commit', 'push', 'pull', 'merge', 'rebase', 'checkout', 'branch',
|
|
'status', 'log', 'diff', 'add', 'reset', 'stash', 'fetch', 'clone',
|
|
|
|
// Docker
|
|
'docker', 'compose', 'up', 'down', 'ps', 'logs', 'exec', 'container', 'image',
|
|
|
|
// Misc
|
|
'sudo', 'time', 'timeout', 'watch', 'make', 'cargo', 'python', 'python3', 'pip',
|
|
'ruby', 'gem', 'go', 'rust', 'java', 'javac', 'mvn', 'gradle'
|
|
];
|
|
|
|
return keywords.includes(token.toLowerCase());
|
|
}
|
|
|
|
/**
|
|
* Normalize an extracted path
|
|
* - Remove surrounding quotes
|
|
* - Normalize path separators to forward slash
|
|
*
|
|
* @param {string} path - Path to normalize
|
|
* @returns {string} Normalized path
|
|
*/
|
|
function normalizeExtractedPath(path) {
|
|
if (!path) return '';
|
|
|
|
let normalized = path.trim();
|
|
|
|
// Remove surrounding quotes
|
|
if ((normalized.startsWith('"') && normalized.endsWith('"')) ||
|
|
(normalized.startsWith("'") && normalized.endsWith("'"))) {
|
|
normalized = normalized.slice(1, -1);
|
|
}
|
|
|
|
// Strip shell metacharacters from edges (backticks, parens, braces)
|
|
normalized = normalized.replace(/^[`({\[]+/, '').replace(/[`)};\]]+$/, '');
|
|
|
|
// Normalize path separators to forward slash
|
|
normalized = normalized.replace(/\\/g, '/');
|
|
|
|
// Remove trailing slash for consistency
|
|
if (normalized.endsWith('/') && normalized.length > 1) {
|
|
normalized = normalized.slice(0, -1);
|
|
}
|
|
|
|
return normalized;
|
|
}
|
|
|
|
module.exports = {
|
|
extractFromToolInput,
|
|
extractFromCommand,
|
|
looksLikePath,
|
|
isSkippableToken,
|
|
isCommandKeyword,
|
|
isBlockedDirName,
|
|
normalizeExtractedPath,
|
|
BLOCKED_DIR_NAMES,
|
|
EXCLUDE_FLAGS,
|
|
FILESYSTEM_COMMANDS
|
|
};
|