Working with Nested JSONs

APIs
JSON
Author

Allison Day

Published

November 6, 2022

Imports
# this is code-fold
import pandas as pd
import json

json_raw = "code/json_example.json"
with open(json_raw) as json_data:
    json_example = json.load(json_data)

Lets say pulled an API and it gave you this json:

{
    "Total": 1,
    "Page": 1,
    "Products": [
        {
            "ID": 12345,
            "SKU": "Clownfish",
            "Name": "Nemo",
            "Suppliers": [
                {
                    "SupplierID": "67891",
                    "SupplierName": "Little Fish Inc",
                    "Cost": 5.99
                },
                {
                    "SupplierID": "24601",
                    "SupplierName": "Large Pond Co",
                    "Cost": 5.59
                }
            ],
            "PriceTiers": {
                "Public Retail Price": 24.99,
                "Employee Price": 6.0
            }
        }
    ]
}

Basic

df = pd.json_normalize(
    json_example,
    record_path=["Products"],
    errors="ignore",
)

print(df.to_markdown())
|    |    ID | SKU       | Name   | Suppliers                                                                                                                                          |   PriceTiers.Public Retail Price |   PriceTiers.Employee Price |
|---:|------:|:----------|:-------|:---------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------:|----------------------------:|
|  0 | 12345 | Clownfish | Nemo   | [{'SupplierID': '67891', 'SupplierName': 'Little Fish Inc', 'Cost': 5.99}, {'SupplierID': '24601', 'SupplierName': 'Large Pond Co', 'Cost': 5.59}] |                            24.99 |                           6 |

By Suppliers

df = pd.json_normalize(
    json_example,
    record_path=["Products", "Suppliers"],
    meta=[
        ["Products"],
    ],
    errors="ignore",
)

print(df.to_markdown())
|    |   SupplierID | SupplierName    |   Cost | Products                                                                                                                                                                                                                                                                                |
|---:|-------------:|:----------------|-------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|  0 |        67891 | Little Fish Inc |   5.99 | {'ID': 12345, 'SKU': 'Clownfish', 'Name': 'Nemo', 'Suppliers': [{'SupplierID': '67891', 'SupplierName': 'Little Fish Inc', 'Cost': 5.99}, {'SupplierID': '24601', 'SupplierName': 'Large Pond Co', 'Cost': 5.59}], 'PriceTiers': {'Public Retail Price': 24.99, 'Employee Price': 6.0}} |
|  1 |        24601 | Large Pond Co   |   5.59 | {'ID': 12345, 'SKU': 'Clownfish', 'Name': 'Nemo', 'Suppliers': [{'SupplierID': '67891', 'SupplierName': 'Little Fish Inc', 'Cost': 5.99}, {'SupplierID': '24601', 'SupplierName': 'Large Pond Co', 'Cost': 5.59}], 'PriceTiers': {'Public Retail Price': 24.99, 'Employee Price': 6.0}} |

Get Product Columns

df = pd.json_normalize(
    json_example,
    record_path=["Products", "Suppliers"],
    meta=[
        ["Products", "ID"],
        ["Products", "SKU"],
        ["Products", "Name"],
        ["Products", "PriceTiers"],
    ],
    errors="ignore",
)

print(df.to_markdown())
|    |   SupplierID | SupplierName    |   Cost |   Products.ID | Products.SKU   | Products.Name   | Products.PriceTiers                                   |
|---:|-------------:|:----------------|-------:|--------------:|:---------------|:----------------|:------------------------------------------------------|
|  0 |        67891 | Little Fish Inc |   5.99 |         12345 | Clownfish      | Nemo            | {'Public Retail Price': 24.99, 'Employee Price': 6.0} |
|  1 |        24601 | Large Pond Co   |   5.59 |         12345 | Clownfish      | Nemo            | {'Public Retail Price': 24.99, 'Employee Price': 6.0} |

Include all of the sub dictionary columns too

df = pd.json_normalize(
    json_example,
    record_path=["Products", "Suppliers"],
    meta=[
        ["Products", "ID"],
        ["Products", "SKU"],
        ["Products", "Name"],
        ["Products", "PriceTiers", "Public Retail Price"],
        ["Products", "PriceTiers", "Employee Price"],
    ],
    errors="ignore",
)

print(df.to_markdown())
|    |   SupplierID | SupplierName    |   Cost |   Products.ID | Products.SKU   | Products.Name   |   Products.PriceTiers.Public Retail Price |   Products.PriceTiers.Employee Price |
|---:|-------------:|:----------------|-------:|--------------:|:---------------|:----------------|------------------------------------------:|-------------------------------------:|
|  0 |        67891 | Little Fish Inc |   5.99 |         12345 | Clownfish      | Nemo            |                                     24.99 |                                    6 |
|  1 |        24601 | Large Pond Co   |   5.59 |         12345 | Clownfish      | Nemo            |                                     24.99 |                                    6 |