Mercurial > hg > config
annotate python/html2flux.py @ 379:61a3c07e9f7a
kill reject files
| author | Jeff Hammel <jhammel@mozilla.com> | 
|---|---|
| date | Thu, 25 Jul 2013 03:36:24 -0700 | 
| parents | ee3c1b65d6d1 | 
| children | a43d0205f80b | 
| rev | line source | 
|---|---|
| 
45
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
2 | 
| 292 | 3 """ | 
| 4 transform an HTML <dl> file into a fluxbox menu | |
| 5 if no file give, read from stdin | |
| 6 | |
| 7 <dl><a>submenu name</a> | |
| 8 <dt>program label</dt><dd>command</dd> | |
| 9 <dt>another program label</dt><dd>command2</dd> | |
| 10 </dl> | |
| 11 | |
| 12 x-form -> internal format: | |
| 13 | |
| 14 ('submenu name': [('program label', 'command'), | |
| 15 ('another program label', 'command2')]) | |
| 16 """ | |
| 17 | |
| 18 import optparse | |
| 294 | 19 import os | 
| 
45
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
20 import sys | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
21 from lxml import etree | 
| 292 | 22 from lsex import lsex # local import | 
| 
45
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
23 | 
| 292 | 24 # available executables | 
| 
45
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
25 executables = set([i.rsplit('/', 1)[-1] for i in lsex() ]) | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
26 | 
| 293 | 27 def readmenu(dl, output, top=True): | 
| 296 | 28 """read menu from an <dl> tag""" | 
| 29 # TODO: probably don't really need lxml | |
| 292 | 30 | 
| 31 menu_items = [] | |
| 32 name = None # menu name | |
| 294 | 33 firstchild = True | 
| 34 label = None | |
| 
45
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
35 for child in dl.iterchildren(): | 
| 292 | 36 | 
| 294 | 37 if not top and child.tag == 'a' and firstchild: | 
| 292 | 38 # TODO: better way of labeling this! | 
| 294 | 39 name = child.text.strip() | 
| 292 | 40 | 
| 
45
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
41 if child.tag == 'dt': | 
| 292 | 42 # item label | 
| 43 label = ' '.join([i.strip() for i in child.itertext() if i.strip()]) | |
| 
45
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
44 if child.tag == 'dd': | 
| 294 | 45 # command | 
| 292 | 46 command = ' '.join([i.strip() for i in child.itertext() if i.strip()]) | 
| 294 | 47 # TODO: classes | 
| 
45
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
48 executable = command.split()[0] | 
| 292 | 49 if executable in executables or os.path.isabs(executable): | 
| 294 | 50 menu_items.append((label, command)) | 
| 51 | |
| 52 # submenu | |
| 53 if child.tag == 'dl': | |
| 54 menu_items.append(readmenu(child, output, top=False)) | |
| 55 | |
| 56 return (name, menu_items) | |
| 292 | 57 | 
| 294 | 58 def printflux(name, menu, output, top=True): | 
| 59 """ | |
| 60 - output: file-like object for writing | |
| 61 """ | |
| 296 | 62 | 
| 63 # print [submenu] tag for this menu | |
| 294 | 64 name = name or '' | 
| 296 | 65 if not top: | 
| 66 print >> output, '[submenu] (%s)' % name | |
| 67 | |
| 68 # print menu items | |
| 294 | 69 for name, item in menu: | 
| 70 if isinstance(item, basestring): | |
| 71 # command | |
| 72 print >> output, '[exec] (%s) {%s}' % (name, item) | |
| 73 else: | |
| 74 # submenu | |
| 75 printflux(name, item, output, top=False) | |
| 296 | 76 | 
| 77 # print end of this submenu | |
| 
45
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
78 if not top: | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
79 print >> output, '[end]' | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
80 | 
| 293 | 81 def printmenu(dl, output): | 
| 294 | 82 name, menu = readmenu(dl, output) | 
| 83 printflux(name, menu, output) | |
| 293 | 84 | 
| 292 | 85 def main(args=sys.argv[1:]): | 
| 293 | 86 """command line interface""" | 
| 292 | 87 | 
| 88 # parse command line option | |
| 89 usage = '%prog [options] [menu.html]' | |
| 90 parser = optparse.OptionParser(usage=usage, | |
| 91 description=__doc__) | |
| 92 parser.add_option('--collapse', dest='collapse', | |
| 93 action='store_true', default=False, | |
| 94 help="collapse menus with a single item to that item") | |
| 95 parser.add_option('-o', '--output', dest='output', | |
| 96 help="output file [Default: <stdout>]") | |
| 97 options, args = parser.parse_args(args) | |
| 
45
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
98 | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
99 # setup input, output | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
100 if args: | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
101 htmlfile = file(args[0]) | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
102 else: | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
103 htmlfile = sys.stdin | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
104 html = htmlfile.read() | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
105 fluxout = sys.stdout | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
106 | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
107 # get first element | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
108 dom = etree.fromstring(html) | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
109 dl = dom.find('.//dl') | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
110 | 
| 292 | 111 # print to stdout | 
| 
45
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
112 printmenu(dl, fluxout) | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
113 | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
114 if __name__ == '__main__': | 
| 
 
069a739d88ad
get fluxbox menu from a webpage, i.e. http://k0s.org/programs.html
 
Jeff Hammel <k0scist@gmail.com> 
parents:  
diff
changeset
 | 
115 main() | 
