Mercurial > hg > TextShaper
comparison textshaper/split.py @ 45:ccbdc00d4f0a
stub
| author | Jeff Hammel <k0scist@gmail.com> |
|---|---|
| date | Tue, 12 May 2015 21:21:04 -0700 |
| parents | |
| children | 7e63ca061b6c |
comparison
equal
deleted
inserted
replaced
| 44:8addd6e12b29 | 45:ccbdc00d4f0a |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 """ | |
| 4 split paragraphs, sentences, etc | |
| 5 """ | |
| 6 | |
| 7 # imports | |
| 8 import argparse | |
| 9 import sys | |
| 10 | |
| 11 def split_paragraphs(text): | |
| 12 | |
| 13 lines = [line.strip() for line in text.strip().splitlines()] | |
| 14 lines = [line if line else '\n' | |
| 15 for line in lines] | |
| 16 text = ' '.join(lines).strip() | |
| 17 paragraphs = [' '.join(p) for p in text.split('\n')] | |
| 18 return paragraphs | |
| 19 | |
| 20 def main(args=sys.argv[1:]): | |
| 21 """CLI""" | |
| 22 | |
| 23 # parse command line arguments | |
| 24 parser = argparse.ArgumentParser(description=__doc__) | |
| 25 parser.add_argument('file', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | |
| 26 options = parser.parse_args(args) | |
| 27 | |
| 28 text = options.file.read().strip() | |
| 29 text = ' '.join(text.split()) | |
| 30 # paragraphs = split_paragraphs(text) | |
| 31 | |
| 32 punctuation = ('.',) | |
| 33 | |
| 34 # for paragraph in paragraphs: | |
| 35 # print (paragraph) | |
| 36 | |
| 37 if __name__ == '__main__': | |
| 38 main() |
