python - Extract Values from heavily nested list of dictionaries with duplicate key value pairs -
trying extract total cash , cash equivalent values complex , messy list of dictionaries. shortened version of structure follows below.
i've tried: maps, dataframe.from_dict & .from_records. trying avoid using re.
i'm stumped.
[{u'fields': [], u'reportdate': u'2 june 2016', u'reportid': u'balancesheet', u'reportname': u'balance sheet', u'reporttitles': [u'balance sheet', u'test company', u'as @ 30 june 2016'], u'reporttype': u'balancesheet', u'rows': [{u'cells': [{u'value': u''}, {u'value': u'30 jun 2016'}, {u'value': u'30 jun 2015'}], u'rowtype': u'header'}, {u'rowtype': u'section', u'rows': [], u'title': u'assets'}, {u'rowtype': u'section', u'rows': [{u'cells': [{u'attributes': [{u'id': u'account', u'value': u'c0bxx922-cc31-4d53-b060-cbf23511`2533'}], u'value': u'test bank 1'}, {u'attributes': [{u'id': u'account', u'value': u'c1b4xx22-cc31-4d53-b060-cb45282533'}], u'value': u'5555.20'}, {u'attributes': [{u'id': u'account', u'value': u'c2b44922-cc31-4d53-b060-cbf4532582533'}], u'value': u'5555.20'}], u'rowtype': u'row'}, {u'cells': [{u'attributes': [{u'id': u'account', u'value': u'290c7c3c-a712-4ads6f-9a2f-3d5258aad5a9e'}], u'value': u'test bank 2'}, {u'attributes': [{u'id': u'account', u'value': u'490c7c32-axxxdf6f-9a2f-3db682a3ad5a9e'}], u'value': u'55555.20'}, {u'attributes': [{u'id': u'account', u'value': u'490xxc3c-a71-adsf6f-9a2f-3d3aad5a9e'}], u'value': u'55555.20'}], u'rowtype': u'row'}, {u'cells': [{u'attributes': [{u'id': u'account', u'value': u'c6d4da40-f0df1b0-8f7d-xx45b1405'}], u'value': u'test bank 3'}, {u'attributes': [{u'id': u'account', u'value': u'c6d4da4fg-df-41b0-8f7d-54xx345b1405'}], u'value': u'5555.20'}, {u'attributes': [{u'id': u'account', u'value': u'c6d4dafgss-9-41b0-8f7d-60xx5b1405'}], u'value': u'5555.20'}], u'rowtype': u'row'}, {u'cells': [{u'value': u'total cash , cash equivalents'}, {u'value': u'5555555.20'}, {u'value': u'5555555.20'}], u'rowtype': u'summaryrow'}], u'title': u'cash , cash equivalents'}, {u'rowtype': u'section',
if know data have format above , need these 2 values, can access directly (assuming data
above structure):
print data[0]['rows'][2]['rows'][3]['cells'][1]['value'] print data[0]['rows'][2]['rows'][3]['cells'][2]['value']
however, error prone, both in writing down correct expression , respect changes of order of lists (which might not defined in format). since there semantical structure behind data, translate raw data accessible object. might want change details starting point:
from collections import mapping import pandas pd class report(mapping): def __init__(self, data): self.sections = ordereddict() row in data.pop('rows'): getattr(self, 'make_%s' % row['rowtype'])(row) self.__dict__.update(data) def make_header(self, row): self.header = [c['value'] c in row['cells']] def make_section(self, sec): def make_row(row): cells = [c['value'] c in row['cells']] return pd.series(map(float, cells[1:]), name=cells[0]) self.sections[sec['title']] = pd.dataframe(make_row(r) r in sec['rows']) def __getitem__(self, item): return self.sections[item] def __len__(self): return len(self.sections) def __iter__(self): return iter(self.sections) report = report(data[0]) print report.reportname print report['cash , cash equivalents']