package service import ( "context" "strings" "time" "github.com/ClickHouse/clickhouse-go/v2/lib/driver" "go.uber.org/zap" "github.com/dbiz/cdp/data-layer/api/internal/apperr" "github.com/dbiz/cdp/data-layer/api/internal/model" "github.com/dbiz/cdp/data-layer/api/internal/repo" ) // SQLService backs the Custom SQL sandbox. It applies two layers of guard: // 1. App-level: parse the statement, reject anything that is not a single // SELECT and anything containing DDL/DML keywords. // 2. DB-level: queries run against a SELECT-only ClickHouse account so the // server rejects writes even if app-level checks are bypassed. type SQLService struct { ch driver.Conn // read-only conn log *zap.Logger } func NewSQLService(roConn driver.Conn, log *zap.Logger) *SQLService { return &SQLService{ch: roConn, log: log} } var forbiddenKeywords = []string{ "INSERT", "UPDATE", "DELETE", "DROP", "CREATE", "ALTER", "TRUNCATE", "GRANT", "REVOKE", "ATTACH", "DETACH", "OPTIMIZE", "RENAME", "EXCHANGE", } // validateReadOnly rejects multi-statement input and obvious DDL/DML. func validateReadOnly(sql string) error { trimmed := strings.TrimSpace(sql) if trimmed == "" { return apperr.BadRequest("sql is empty", "sql", nil) } // Reject multiple statements -- the ClickHouse driver also rejects this, // but we want a friendly error before hitting the wire. if strings.Contains(strings.TrimRight(trimmed, ";"), ";") { return apperr.BadRequest("only a single statement is allowed", "sql", nil) } upper := strings.ToUpper(trimmed) if !strings.HasPrefix(upper, "SELECT") && !strings.HasPrefix(upper, "WITH") { return apperr.BadRequest("only SELECT statements are allowed", "sql", nil) } // Token-level keyword scan: \bKW\b to avoid false positives like "created_at". for _, kw := range forbiddenKeywords { if hasWord(upper, kw) { return apperr.BadRequest("statement contains forbidden keyword: "+kw, "sql", nil) } } return nil } func hasWord(s, word string) bool { for { idx := strings.Index(s, word) if idx < 0 { return false } left := idx == 0 || !isIdent(s[idx-1]) right := idx+len(word) == len(s) || !isIdent(s[idx+len(word)]) if left && right { return true } s = s[idx+len(word):] } } func isIdent(c byte) bool { return c == '_' || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') } // Run executes the (validated) SQL against the read-only ClickHouse user. // Results are never cached -- queries are arbitrary. func (s *SQLService) Run(ctx context.Context, sql string) (*model.QueryResult, error) { if err := validateReadOnly(sql); err != nil { return nil, err } start := time.Now() rows, err := s.ch.Query(ctx, sql) if err != nil { // ClickHouse syntax / permission errors are user-visible, not 500. return nil, apperr.BadRequest("clickhouse rejected query: "+err.Error(), "sql", err) } defer rows.Close() res, err := repo.ScanRows(rows) if err != nil { return nil, apperr.Internal(err) } res.DurationMS = time.Since(start).Milliseconds() return res, nil }