// Package schema turns nested JSON objects into flat key/value maps and // classifies field types for conflict detection. // // Rules: // - keys are joined with "_" : {"a": {"b": 1}} -> {"a_b": 1} // - arrays are preserved as-is and not descended into // - keys are sanitized: lowercase, non-[a-z0-9_] replaced with "_" package schema import ( "strings" "unicode" ) type DataType string const ( TypeString DataType = "string" TypeNumber DataType = "number" TypeBoolean DataType = "boolean" TypeObject DataType = "object" TypeArray DataType = "array" TypeTimestamp DataType = "timestamp" TypeNull DataType = "null" ) // Flatten flattens nested objects under a snake_case prefix. // Returns a new map, never mutates input. func Flatten(in map[string]any) map[string]any { out := make(map[string]any, len(in)) for k, v := range in { flattenInto(out, sanitize(k), v) } return out } func flattenInto(out map[string]any, prefix string, v any) { switch x := v.(type) { case map[string]any: if len(x) == 0 { out[prefix] = x return } for k, child := range x { flattenInto(out, prefix+"_"+sanitize(k), child) } default: out[prefix] = v } } // sanitize replaces characters outside [a-z0-9_] with "_" and lowercases. // Leading underscores are kept; trailing underscores are trimmed. func sanitize(k string) string { if k == "" { return k } var b strings.Builder b.Grow(len(k)) for _, r := range k { switch { case unicode.IsLetter(r): b.WriteRune(unicode.ToLower(r)) case unicode.IsDigit(r) || r == '_': b.WriteRune(r) default: b.WriteRune('_') } } return strings.TrimRight(b.String(), "_") } // Classify maps a Go value (from json.Unmarshal) to a DataType. func Classify(v any) DataType { switch x := v.(type) { case nil: return TypeNull case bool: return TypeBoolean case float64, float32, int, int32, int64, uint, uint32, uint64: return TypeNumber case string: _ = x return TypeString case []any: return TypeArray case map[string]any: return TypeObject default: return TypeString } }