Mercurial > hg > Lemuriformes
comparison lemuriformes/json2csv.py @ 15:0d1b8bb1d97b
SQL + data related functionality
| author | Jeff Hammel <k0scist@gmail.com> |
|---|---|
| date | Sun, 10 Dec 2017 17:16:52 -0800 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 14:756dbd3e391e | 15:0d1b8bb1d97b |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 convert JSON list of hashes to CSV | |
| 5 """ | |
| 6 # Note: we could use https://docs.python.org/2/library/csv.html#csv.DictWriter | |
| 7 # but we're being careful here since we actually want this data structure in code, | |
| 8 # not just for de/serialization | |
| 9 | |
| 10 | |
| 11 import argparse | |
| 12 import csv | |
| 13 import json | |
| 14 import sys | |
| 15 from .cast import unify | |
| 16 from .cli import ConfigurationParser | |
| 17 | |
| 18 | |
| 19 def flatten_list_of_dicts(list_of_dicts, header=None): | |
| 20 """ | |
| 21 flattens a list of dicts into a list of lists. | |
| 22 | |
| 23 Returns (header, list_of_lists) | |
| 24 """ | |
| 25 | |
| 26 if not list_of_dicts: | |
| 27 return [] | |
| 28 | |
| 29 # sanity | |
| 30 keys = list_of_dicts[0].keys() | |
| 31 if header: | |
| 32 if not set(header).issubset(keys): | |
| 33 raise AssertionError("header contains elements not seen in the set: {}".format(', '.format(set(header).difference(keys)))) | |
| 34 for item in list_of_dicts: | |
| 35 # ensure each item has the same keys | |
| 36 if set(keys) != set(item.keys()): | |
| 37 raise AssertionError("Keys not consistent! {} != {}".format(sorted(keys), | |
| 38 sorted(item.keys()))) | |
| 39 | |
| 40 if not header: | |
| 41 header = keys # to sort? | |
| 42 | |
| 43 # flatten it! | |
| 44 retval = [] | |
| 45 for item in list_of_dicts: | |
| 46 retval.append([item[key] for key in header]) | |
| 47 | |
| 48 return (header, retval) | |
| 49 | |
| 50 | |
| 51 def main(args=sys.argv[1:]): | |
| 52 """CLI""" | |
| 53 | |
| 54 # parse command line | |
| 55 parser = ConfigurationParser(description=__doc__) | |
| 56 parser.add_argument('json', type=argparse.FileType('r'), | |
| 57 help="JSON file of list of hashes") | |
| 58 parser.add_argument('-H', '--header', dest='header', nargs='+', | |
| 59 help="use these fields for header") | |
| 60 parser.add_argument('-o', '--output', dest='output', | |
| 61 type=argparse.FileType('w'), default=sys.stdout, | |
| 62 help="path to output, or stdout by default") | |
| 63 options = parser.parse_args(args) | |
| 64 | |
| 65 # read | |
| 66 data = json.load(options.json) | |
| 67 | |
| 68 # flatten | |
| 69 header, flattened = flatten_list_of_dicts(data, header=options.header) | |
| 70 | |
| 71 # write | |
| 72 writer = csv.writer(options.output) | |
| 73 writer.writerow(header) | |
| 74 for row in flattened: | |
| 75 writer.writerow([unify(v) for v in row]) | |
| 76 | |
| 77 if __name__ == '__main__': | |
| 78 main() |
