init ingestion
This commit is contained in:
92
ingestion/ingest/internal/schema/flatten.go
Normal file
92
ingestion/ingest/internal/schema/flatten.go
Normal file
@@ -0,0 +1,92 @@
|
||||
// Package schema turns nested JSON objects into flat key/value maps and
|
||||
// classifies field types for conflict detection.
|
||||
//
|
||||
// Rules:
|
||||
// - keys are joined with "_" : {"a": {"b": 1}} -> {"a_b": 1}
|
||||
// - arrays are preserved as-is and not descended into
|
||||
// - keys are sanitized: lowercase, non-[a-z0-9_] replaced with "_"
|
||||
package schema
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type DataType string
|
||||
|
||||
const (
|
||||
TypeString DataType = "string"
|
||||
TypeNumber DataType = "number"
|
||||
TypeBoolean DataType = "boolean"
|
||||
TypeObject DataType = "object"
|
||||
TypeArray DataType = "array"
|
||||
TypeTimestamp DataType = "timestamp"
|
||||
TypeNull DataType = "null"
|
||||
)
|
||||
|
||||
// Flatten flattens nested objects under a snake_case prefix.
|
||||
// Returns a new map, never mutates input.
|
||||
func Flatten(in map[string]any) map[string]any {
|
||||
out := make(map[string]any, len(in))
|
||||
for k, v := range in {
|
||||
flattenInto(out, sanitize(k), v)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func flattenInto(out map[string]any, prefix string, v any) {
|
||||
switch x := v.(type) {
|
||||
case map[string]any:
|
||||
if len(x) == 0 {
|
||||
out[prefix] = x
|
||||
return
|
||||
}
|
||||
for k, child := range x {
|
||||
flattenInto(out, prefix+"_"+sanitize(k), child)
|
||||
}
|
||||
default:
|
||||
out[prefix] = v
|
||||
}
|
||||
}
|
||||
|
||||
// sanitize replaces characters outside [a-z0-9_] with "_" and lowercases.
|
||||
// Leading underscores are kept; trailing underscores are trimmed.
|
||||
func sanitize(k string) string {
|
||||
if k == "" {
|
||||
return k
|
||||
}
|
||||
var b strings.Builder
|
||||
b.Grow(len(k))
|
||||
for _, r := range k {
|
||||
switch {
|
||||
case unicode.IsLetter(r):
|
||||
b.WriteRune(unicode.ToLower(r))
|
||||
case unicode.IsDigit(r) || r == '_':
|
||||
b.WriteRune(r)
|
||||
default:
|
||||
b.WriteRune('_')
|
||||
}
|
||||
}
|
||||
return strings.TrimRight(b.String(), "_")
|
||||
}
|
||||
|
||||
// Classify maps a Go value (from json.Unmarshal) to a DataType.
|
||||
func Classify(v any) DataType {
|
||||
switch x := v.(type) {
|
||||
case nil:
|
||||
return TypeNull
|
||||
case bool:
|
||||
return TypeBoolean
|
||||
case float64, float32, int, int32, int64, uint, uint32, uint64:
|
||||
return TypeNumber
|
||||
case string:
|
||||
_ = x
|
||||
return TypeString
|
||||
case []any:
|
||||
return TypeArray
|
||||
case map[string]any:
|
||||
return TypeObject
|
||||
default:
|
||||
return TypeString
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user