changeset 39:986f8a20c234

STUB: textshaper/url2txt.py
author Jeff Hammel <k0scist@gmail.com>
date Thu, 03 Jul 2014 13:46:30 -0700
parents 56fa70e2e239
children e1832eeae084
files textshaper/url2txt.py
diffstat 1 files changed, 24 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/textshaper/url2txt.py	Thu Jul 03 13:23:19 2014 -0700
+++ b/textshaper/url2txt.py	Thu Jul 03 13:46:30 2014 -0700
@@ -8,16 +8,30 @@
 
 import argparse
 import sys
+import urlparse
 
-def url2txt(url):
+def url2txt(url, strip_extension=True, replacements=(('_', ' '),)):
     """gets the text equivalent of a URL"""
-    url = url.rstrip('/')
-    if '/' in url:
-        url = url.rsplit('/')[-1]
-    if '.' in url:
-        url = url.split('.', 1)[0]
-    url = url.replace('_', ' ')
-    return url
+
+    # parse the url
+    parsed = urlparse.urlparse(url)
+
+    # process the path, if available
+    path = parsed.path.rstrip('/')
+    if path:
+        text = path.split('/')[-1]
+        if strip_extension:
+            # strip the extension, if desired
+            text = text.split('.', 1)[0]
+    else:
+        # otherwise go with the hostname
+        text = parsed.hostname
+
+    # replace desired items
+    for item, replacement in replacements:
+        text = text.replace(item, replacement)
+
+    return text
 
 
 def main(args=sys.argv[1:]):
@@ -25,8 +39,8 @@
 
     # parse command line
     parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_option('urls', metavar='url', nargs='+',
-                      help="URLs to convert")
+    parser.add_argument('urls', metavar='url', nargs='+',
+                        help="URLs to convert")
     options = parser.parse_args(args)
 
     # convert urls