init ingestion

This commit is contained in:
2026-05-24 22:59:24 +07:00
commit 4e8c11d545
80 changed files with 5639 additions and 0 deletions

View File

@@ -0,0 +1,92 @@
// Package schema turns nested JSON objects into flat key/value maps and
// classifies field types for conflict detection.
//
// Rules:
// - keys are joined with "_" : {"a": {"b": 1}} -> {"a_b": 1}
// - arrays are preserved as-is and not descended into
// - keys are sanitized: lowercase, non-[a-z0-9_] replaced with "_"
package schema
import (
"strings"
"unicode"
)
type DataType string
const (
TypeString DataType = "string"
TypeNumber DataType = "number"
TypeBoolean DataType = "boolean"
TypeObject DataType = "object"
TypeArray DataType = "array"
TypeTimestamp DataType = "timestamp"
TypeNull DataType = "null"
)
// Flatten flattens nested objects under a snake_case prefix.
// Returns a new map, never mutates input.
func Flatten(in map[string]any) map[string]any {
out := make(map[string]any, len(in))
for k, v := range in {
flattenInto(out, sanitize(k), v)
}
return out
}
func flattenInto(out map[string]any, prefix string, v any) {
switch x := v.(type) {
case map[string]any:
if len(x) == 0 {
out[prefix] = x
return
}
for k, child := range x {
flattenInto(out, prefix+"_"+sanitize(k), child)
}
default:
out[prefix] = v
}
}
// sanitize replaces characters outside [a-z0-9_] with "_" and lowercases.
// Leading underscores are kept; trailing underscores are trimmed.
func sanitize(k string) string {
if k == "" {
return k
}
var b strings.Builder
b.Grow(len(k))
for _, r := range k {
switch {
case unicode.IsLetter(r):
b.WriteRune(unicode.ToLower(r))
case unicode.IsDigit(r) || r == '_':
b.WriteRune(r)
default:
b.WriteRune('_')
}
}
return strings.TrimRight(b.String(), "_")
}
// Classify maps a Go value (from json.Unmarshal) to a DataType.
func Classify(v any) DataType {
switch x := v.(type) {
case nil:
return TypeNull
case bool:
return TypeBoolean
case float64, float32, int, int32, int64, uint, uint32, uint64:
return TypeNumber
case string:
_ = x
return TypeString
case []any:
return TypeArray
case map[string]any:
return TypeObject
default:
return TypeString
}
}