init ingestion
This commit is contained in:
92
ingestion/ingest/internal/schema/flatten.go
Normal file
92
ingestion/ingest/internal/schema/flatten.go
Normal file
@@ -0,0 +1,92 @@
|
||||
// Package schema turns nested JSON objects into flat key/value maps and
|
||||
// classifies field types for conflict detection.
|
||||
//
|
||||
// Rules:
|
||||
// - keys are joined with "_" : {"a": {"b": 1}} -> {"a_b": 1}
|
||||
// - arrays are preserved as-is and not descended into
|
||||
// - keys are sanitized: lowercase, non-[a-z0-9_] replaced with "_"
|
||||
package schema
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
type DataType string
|
||||
|
||||
const (
|
||||
TypeString DataType = "string"
|
||||
TypeNumber DataType = "number"
|
||||
TypeBoolean DataType = "boolean"
|
||||
TypeObject DataType = "object"
|
||||
TypeArray DataType = "array"
|
||||
TypeTimestamp DataType = "timestamp"
|
||||
TypeNull DataType = "null"
|
||||
)
|
||||
|
||||
// Flatten flattens nested objects under a snake_case prefix.
|
||||
// Returns a new map, never mutates input.
|
||||
func Flatten(in map[string]any) map[string]any {
|
||||
out := make(map[string]any, len(in))
|
||||
for k, v := range in {
|
||||
flattenInto(out, sanitize(k), v)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func flattenInto(out map[string]any, prefix string, v any) {
|
||||
switch x := v.(type) {
|
||||
case map[string]any:
|
||||
if len(x) == 0 {
|
||||
out[prefix] = x
|
||||
return
|
||||
}
|
||||
for k, child := range x {
|
||||
flattenInto(out, prefix+"_"+sanitize(k), child)
|
||||
}
|
||||
default:
|
||||
out[prefix] = v
|
||||
}
|
||||
}
|
||||
|
||||
// sanitize replaces characters outside [a-z0-9_] with "_" and lowercases.
|
||||
// Leading underscores are kept; trailing underscores are trimmed.
|
||||
func sanitize(k string) string {
|
||||
if k == "" {
|
||||
return k
|
||||
}
|
||||
var b strings.Builder
|
||||
b.Grow(len(k))
|
||||
for _, r := range k {
|
||||
switch {
|
||||
case unicode.IsLetter(r):
|
||||
b.WriteRune(unicode.ToLower(r))
|
||||
case unicode.IsDigit(r) || r == '_':
|
||||
b.WriteRune(r)
|
||||
default:
|
||||
b.WriteRune('_')
|
||||
}
|
||||
}
|
||||
return strings.TrimRight(b.String(), "_")
|
||||
}
|
||||
|
||||
// Classify maps a Go value (from json.Unmarshal) to a DataType.
|
||||
func Classify(v any) DataType {
|
||||
switch x := v.(type) {
|
||||
case nil:
|
||||
return TypeNull
|
||||
case bool:
|
||||
return TypeBoolean
|
||||
case float64, float32, int, int32, int64, uint, uint32, uint64:
|
||||
return TypeNumber
|
||||
case string:
|
||||
_ = x
|
||||
return TypeString
|
||||
case []any:
|
||||
return TypeArray
|
||||
case map[string]any:
|
||||
return TypeObject
|
||||
default:
|
||||
return TypeString
|
||||
}
|
||||
}
|
||||
53
ingestion/ingest/internal/schema/flatten_test.go
Normal file
53
ingestion/ingest/internal/schema/flatten_test.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package schema
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestFlatten_NestedObject(t *testing.T) {
|
||||
in := map[string]any{
|
||||
"user": map[string]any{
|
||||
"id": "u_1",
|
||||
"profile": map[string]any{"age": 30, "name": "Phuoc"},
|
||||
},
|
||||
"plan": "pro",
|
||||
}
|
||||
got := Flatten(in)
|
||||
assert.Equal(t, "u_1", got["user_id"])
|
||||
assert.Equal(t, 30, got["user_profile_age"])
|
||||
assert.Equal(t, "Phuoc", got["user_profile_name"])
|
||||
assert.Equal(t, "pro", got["plan"])
|
||||
}
|
||||
|
||||
func TestFlatten_SanitizesKeys(t *testing.T) {
|
||||
in := map[string]any{
|
||||
"User Email": "x@y",
|
||||
"price.usd": 9.99,
|
||||
"meta!": map[string]any{"X-Y": 1},
|
||||
}
|
||||
got := Flatten(in)
|
||||
assert.Equal(t, "x@y", got["user_email"])
|
||||
assert.Equal(t, 9.99, got["price_usd"])
|
||||
assert.Equal(t, 1, got["meta_x_y"])
|
||||
}
|
||||
|
||||
func TestFlatten_PreservesArrays(t *testing.T) {
|
||||
in := map[string]any{
|
||||
"tags": []any{"a", "b"},
|
||||
}
|
||||
got := Flatten(in)
|
||||
arr, ok := got["tags"].([]any)
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, 2, len(arr))
|
||||
}
|
||||
|
||||
func TestClassify(t *testing.T) {
|
||||
assert.Equal(t, TypeString, Classify("hi"))
|
||||
assert.Equal(t, TypeNumber, Classify(float64(1.5)))
|
||||
assert.Equal(t, TypeBoolean, Classify(true))
|
||||
assert.Equal(t, TypeNull, Classify(nil))
|
||||
assert.Equal(t, TypeArray, Classify([]any{1, 2}))
|
||||
assert.Equal(t, TypeObject, Classify(map[string]any{}))
|
||||
}
|
||||
Reference in New Issue
Block a user