99 lines
3.0 KiB
Go
99 lines
3.0 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/ClickHouse/clickhouse-go/v2/lib/driver"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/dbiz/cdp/data-layer/api/internal/apperr"
|
|
"github.com/dbiz/cdp/data-layer/api/internal/model"
|
|
"github.com/dbiz/cdp/data-layer/api/internal/repo"
|
|
)
|
|
|
|
// SQLService backs the Custom SQL sandbox. It applies two layers of guard:
|
|
// 1. App-level: parse the statement, reject anything that is not a single
|
|
// SELECT and anything containing DDL/DML keywords.
|
|
// 2. DB-level: queries run against a SELECT-only ClickHouse account so the
|
|
// server rejects writes even if app-level checks are bypassed.
|
|
type SQLService struct {
|
|
ch driver.Conn // read-only conn
|
|
log *zap.Logger
|
|
}
|
|
|
|
func NewSQLService(roConn driver.Conn, log *zap.Logger) *SQLService {
|
|
return &SQLService{ch: roConn, log: log}
|
|
}
|
|
|
|
var forbiddenKeywords = []string{
|
|
"INSERT", "UPDATE", "DELETE", "DROP", "CREATE", "ALTER", "TRUNCATE",
|
|
"GRANT", "REVOKE", "ATTACH", "DETACH", "OPTIMIZE", "RENAME", "EXCHANGE",
|
|
}
|
|
|
|
// validateReadOnly rejects multi-statement input and obvious DDL/DML.
|
|
func validateReadOnly(sql string) error {
|
|
trimmed := strings.TrimSpace(sql)
|
|
if trimmed == "" {
|
|
return apperr.BadRequest("sql is empty", "sql", nil)
|
|
}
|
|
// Reject multiple statements -- the ClickHouse driver also rejects this,
|
|
// but we want a friendly error before hitting the wire.
|
|
if strings.Contains(strings.TrimRight(trimmed, ";"), ";") {
|
|
return apperr.BadRequest("only a single statement is allowed", "sql", nil)
|
|
}
|
|
upper := strings.ToUpper(trimmed)
|
|
if !strings.HasPrefix(upper, "SELECT") && !strings.HasPrefix(upper, "WITH") {
|
|
return apperr.BadRequest("only SELECT statements are allowed", "sql", nil)
|
|
}
|
|
// Token-level keyword scan: \bKW\b to avoid false positives like "created_at".
|
|
for _, kw := range forbiddenKeywords {
|
|
if hasWord(upper, kw) {
|
|
return apperr.BadRequest("statement contains forbidden keyword: "+kw, "sql", nil)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func hasWord(s, word string) bool {
|
|
for {
|
|
idx := strings.Index(s, word)
|
|
if idx < 0 {
|
|
return false
|
|
}
|
|
left := idx == 0 || !isIdent(s[idx-1])
|
|
right := idx+len(word) == len(s) || !isIdent(s[idx+len(word)])
|
|
if left && right {
|
|
return true
|
|
}
|
|
s = s[idx+len(word):]
|
|
}
|
|
}
|
|
|
|
func isIdent(c byte) bool {
|
|
return c == '_' || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|
|
}
|
|
|
|
// Run executes the (validated) SQL against the read-only ClickHouse user.
|
|
// Results are never cached -- queries are arbitrary.
|
|
func (s *SQLService) Run(ctx context.Context, sql string) (*model.QueryResult, error) {
|
|
if err := validateReadOnly(sql); err != nil {
|
|
return nil, err
|
|
}
|
|
start := time.Now()
|
|
rows, err := s.ch.Query(ctx, sql)
|
|
if err != nil {
|
|
// ClickHouse syntax / permission errors are user-visible, not 500.
|
|
return nil, apperr.BadRequest("clickhouse rejected query: "+err.Error(), "sql", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
res, err := repo.ScanRows(rows)
|
|
if err != nil {
|
|
return nil, apperr.Internal(err)
|
|
}
|
|
res.DurationMS = time.Since(start).Milliseconds()
|
|
return res, nil
|
|
}
|