Flatten JSON

The following function, originally written by Amir Ziai, is designed to flatten complex, hierarchical JSON into something that can be more readily imported into something like a Pandas DataFrame.

For other documentation of this technique, see this medium article, or this one, or the GitHub repo.

def flatten_json(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    
    return out
states = [{'state': 'Florida',
           'shortname': 'FL',
           'info': {'governor': 'Rick Scott'},
           'counties': [{'name': 'Dade', 'population': 12345},
                        {'name': 'Broward', 'population': 40000},
                        {'name': 'Palm Beach', 'population': 60000}]},
          {'state': 'Ohio',
           'shortname': 'OH',
           'info': {'governor': 'John Kasich'},
           'counties': [{'name': 'Summit', 'population': 1234},
                        {'name': 'Cuyahoga', 'population': 1337}]}]
states
[{'state': 'Florida',
  'shortname': 'FL',
  'info': {'governor': 'Rick Scott'},
  'counties': [{'name': 'Dade', 'population': 12345},
   {'name': 'Broward', 'population': 40000},
   {'name': 'Palm Beach', 'population': 60000}]},
 {'state': 'Ohio',
  'shortname': 'OH',
  'info': {'governor': 'John Kasich'},
  'counties': [{'name': 'Summit', 'population': 1234},
   {'name': 'Cuyahoga', 'population': 1337}]}]
states_flat = [flatten_json(state) for state in states]
states_flat
[{'state': 'Florida',
  'shortname': 'FL',
  'info_governor': 'Rick Scott',
  'counties_0_name': 'Dade',
  'counties_0_population': 12345,
  'counties_1_name': 'Broward',
  'counties_1_population': 40000,
  'counties_2_name': 'Palm Beach',
  'counties_2_population': 60000},
 {'state': 'Ohio',
  'shortname': 'OH',
  'info_governor': 'John Kasich',
  'counties_0_name': 'Summit',
  'counties_0_population': 1234,
  'counties_1_name': 'Cuyahoga',
  'counties_1_population': 1337}]

This flattened JSON can be readily converted into a DataFrame.

import pandas as pd

pd.DataFrame(states_flat)

state shortname info_governor counties_0_name counties_0_population counties_1_name counties_1_population counties_2_name counties_2_population
0 Florida FL Rick Scott Dade 12345 Broward 40000 Palm Beach 60000.0
1 Ohio OH John Kasich Summit 1234 Cuyahoga 1337 NaN NaN