Mercurial > hg > TextShaper
comparison textshaper/split.py @ 51:c3b69728f291
finding indices now works
| author | Jeff Hammel <k0scist@gmail.com> |
|---|---|
| date | Sun, 17 May 2015 08:33:23 -0700 |
| parents | 1284c99a94fa |
| children | 8d8c1ac0e8e1 |
comparison
equal
deleted
inserted
replaced
| 50:1284c99a94fa | 51:c3b69728f291 |
|---|---|
| 22 retval.append(index) | 22 retval.append(index) |
| 23 index += len(sub) | 23 index += len(sub) |
| 24 except ValueError: | 24 except ValueError: |
| 25 return retval | 25 return retval |
| 26 | 26 |
| 27 def findindices(_string, values): | 27 def indices(text, values): |
| 28 """ | 28 """ |
| 29 returns ordered list of 2-tuples: | 29 returns ordered list of 2-tuples: |
| 30 (index, value) | 30 (index, value) |
| 31 """ | 31 """ |
| 32 locations = {value: findall(text, value) for value in values} | |
| 33 indices = [] | |
| 34 for key, values in locations.items(): | |
| 35 indices.extend([(value, key) for value in values]) | |
| 36 return sorted(indices, key=lambda x: x[0]) | |
| 32 | 37 |
| 33 def split_sentences(text, ends='.?!'): | 38 def split_sentences(text, ends='.?!'): |
| 34 """split a text into sentences""" | 39 """split a text into sentences""" |
| 35 | 40 |
| 36 def split_paragraphs(text): | 41 def split_paragraphs(text): |
| 56 # paragraphs = split_paragraphs(text) | 61 # paragraphs = split_paragraphs(text) |
| 57 | 62 |
| 58 ends = '.?!' | 63 ends = '.?!' |
| 59 | 64 |
| 60 # find all ending punctuation | 65 # find all ending punctuation |
| 61 indices = {end: findall(text, end) for end in ends} | 66 |
| 62 | 67 |
| 63 | 68 |
| 64 if __name__ == '__main__': | 69 if __name__ == '__main__': |
| 65 main() | 70 main() |
