Work in Progress: This page is under development. Use the feedback button on the bottom right to help us improve it.

JSON Functions

Functions for parsing and querying JSON data.

Extracting Values

JSON_GET

Extract a value by path, preserving its type:

SELECT
  JSON_GET(payload, 'user.id') as user_id,
  JSON_GET(payload, 'items[0].name') as first_item,
  JSON_GET(payload, 'metadata.tags[1]') as second_tag
FROM events

Typed Extraction

Get values as specific types (returns default if not found):

FunctionReturnsDefault
JSON_GET_STR(json, path)String''
JSON_GET_INT(json, path)Integer0
JSON_GET_FLOAT(json, path)Float0.0
JSON_GET_BOOL(json, path)Booleanfalse
JSON_GET_JSON(json, path)JSON stringnull
SELECT
  JSON_GET_STR(data, 'name') as name,
  JSON_GET_INT(data, 'count') as count,
  JSON_GET_FLOAT(data, 'price') as price,
  JSON_GET_BOOL(data, 'active') as is_active,
  JSON_GET_JSON(data, 'nested.object') as nested_json
FROM messages

JSON_AS_TEXT

Get any value as a string:

SELECT
  JSON_AS_TEXT(data, 'value') as value_string  -- Works for any type
FROM events

JSON Operators

PostgreSQL-style operators:

OperatorDescriptionExample
->Get JSON elementdata->'user'
->>Get element as textdata->>'name'
?Check key existsdata ? 'email'
SELECT
  payload->'user'->>'name' as user_name,
  payload->'items'->0->>'sku' as first_sku
FROM orders
WHERE payload ? 'user'

Checking Contents

JSON_CONTAINS

Check if a key exists:

SELECT * FROM events
WHERE JSON_CONTAINS(payload, 'error')
 
-- Check for nested key
SELECT * FROM events
WHERE JSON_CONTAINS(payload, 'user.email')

JSON_LENGTH

Get the length of arrays or objects:

SELECT
  JSON_LENGTH(data, 'items') as item_count,
  JSON_LENGTH(data, 'tags') as tag_count
FROM orders
WHERE JSON_LENGTH(data, 'items') > 0

JSONPath Functions

For more complex queries using JSONPath syntax:

EXTRACT_JSON

Extract matching elements as an array:

SELECT
  EXTRACT_JSON(data, '$.items[*].price') as all_prices,
  EXTRACT_JSON(data, '$.users[?(@.active==true)].name') as active_users
FROM events

EXTRACT_JSON_STRING

Extract and return as strings:

SELECT
  EXTRACT_JSON_STRING(data, '$.items[*].name') as item_names
FROM orders

Practical Examples

Parse Event Data

SELECT
  JSON_GET_STR(event, 'type') as event_type,
  JSON_GET_STR(event, 'user.id') as user_id,
  JSON_GET_STR(event, 'user.email') as email,
  JSON_GET_INT(event, 'metadata.version') as version,
  JSON_GET_JSON(event, 'properties') as properties
FROM raw_events

Filter by JSON Fields

-- Filter by nested value
SELECT * FROM orders
WHERE JSON_GET_STR(data, 'status') = 'pending'
 
-- Filter by existence
SELECT * FROM users
WHERE JSON_CONTAINS(profile, 'preferences.notifications')
 
-- Filter by array length
SELECT * FROM orders
WHERE JSON_LENGTH(data, 'items') >= 3

Flatten Nested JSON

SELECT
  JSON_GET_STR(order_data, 'order_id') as order_id,
  JSON_GET_STR(order_data, 'customer.name') as customer_name,
  JSON_GET_STR(order_data, 'customer.address.city') as city,
  JSON_GET_FLOAT(order_data, 'totals.subtotal') as subtotal,
  JSON_GET_FLOAT(order_data, 'totals.tax') as tax,
  JSON_GET_FLOAT(order_data, 'totals.total') as total
FROM raw_orders

Handle Missing Fields

SELECT
  JSON_GET_STR(data, 'id') as id,
  COALESCE(
    NULLIF(JSON_GET_STR(data, 'email'), ''),
    JSON_GET_STR(data, 'contact.email'),
    'unknown'
  ) as email
FROM users

Aggregate JSON Arrays

-- Count items per order
SELECT
  JSON_GET_STR(order_data, 'order_id') as order_id,
  JSON_LENGTH(order_data, 'items') as item_count,
  TUMBLE(interval '1 hour') as window
FROM orders
GROUP BY order_id, window

Process Arrays in JSON

-- Extract from JSON array (when items is an array in JSON)
SELECT
  order_id,
  JSON_GET_STR(item_json, 'sku') as sku,
  JSON_GET_INT(item_json, 'quantity') as quantity,
  JSON_GET_FLOAT(item_json, 'price') as price
FROM orders,
UNNEST(CAST(JSON_GET_JSON(order_data, 'items') as VARCHAR[])) as item_json

Build JSON Output

When your sink expects JSON, construct it:

SELECT
  CONCAT(
    '{"user_id":"', user_id,
    '","event_count":', CAST(COUNT(*) as VARCHAR),
    ',"window_start":"', CAST(window.start as VARCHAR),
    '"}'
  ) as json_output
FROM events
GROUP BY user_id, TUMBLE(interval '1 minute') as window