Mercurial > cgi-bin > hgweb.cgi > curlyq
annotate curlyq @ 27:70e75dd07e03
Add --ligatures mode.
author | David Barts <n5jrn@me.com> |
---|---|
date | Sat, 17 Oct 2020 14:19:27 -0700 |
parents | 3264788aa0c8 |
children | d5bf9985b5c4 |
rev | line source |
---|---|
3 | 1 #!/usr/bin/env python3 |
2 # -*- coding: utf-8 -*- | |
3 | |
22
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
4 # I m p o r t s |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
5 |
3 | 6 import os, sys |
7 import argparse | |
8 import codecs | |
9 | |
4 | 10 from curlers import TextCurler, HtmlCurler, uncurl |
10 | 11 from runes import Workspace |
3 | 12 |
13 # V a r i a b l e s | |
14 | |
15 # Name invoked by | |
16 MYNAME = os.path.basename(sys.argv[0]) | |
17 | |
18 # Streams | |
19 input_fp = None | |
20 output_fp = None | |
21 | |
22
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
22 # Codecs we support |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
23 CODECS_TO_NAME = {} |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
24 for i in [ "UTF-8", "UTF-16", "UTF-16LE", "UTF-16BE", "UTF-32", "UTF-32LE", "UTF-32BE" ]: |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
25 CODECS_TO_NAME[codecs.lookup(i)] = i |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
26 del i |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
27 |
23 | 28 # For feet/inches/min/sec |
29 BACKT = "`" | |
30 FTMIN = "'" | |
31 INSEC = '"' | |
32 | |
26 | 33 # For --tex option |
34 TEX_SUBST = [ ("---", "—"), ("--", "–"), ("...", "…") ] | |
35 | |
27 | 36 # For --ligatures option |
37 LIG_SUBST = [ ("ffi", "\ufb03"), ("ffl", "\ufb04"), ("ff", "\ufb00"), | |
38 ("fi", "\ufb01"), ("fl", "\ufb02") ] | |
39 | |
22
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
40 # C l a s s e s |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
41 |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
42 class SafeWorkspace(Workspace): |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
43 def __getitem__(self, key): |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
44 try: |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
45 return super().__getitem__(key) |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
46 except IndexError: |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
47 return "" |
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
48 |
3 | 49 # F u n c t i o n s |
50 | |
51 def normal(): | |
4 | 52 global input_fp, output_fp, args |
22
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
53 ws = SafeWorkspace() |
10 | 54 curler = TextCurler(ws) |
55 while True: | |
56 line = input_fp.readline() | |
57 ws.append(line) | |
58 if line == "" or line == "\n": | |
59 if args.force: uncurl(ws) | |
60 curler.feed() | |
23 | 61 if args.backtick: fims(ws) |
27 | 62 if args.tex: gsuba(ws, TEX_SUBST) |
63 if args.ligatures: gsuba(ws, LIG_SUBST) | |
10 | 64 output_fp.write(str(ws)) |
65 ws.clear() | |
66 if line == "": | |
67 break | |
3 | 68 |
69 def flowed(): | |
70 global input_fp, output_fp, args | |
22
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
71 ws = SafeWorkspace() |
10 | 72 curler = TextCurler(ws) |
73 while True: | |
74 line = input_fp.readline() | |
75 if line == "": | |
76 break | |
77 ws.append(line) | |
78 if args.force: uncurl(ws) | |
79 curler.feed() | |
23 | 80 if args.backtick: fims(ws) |
27 | 81 if args.tex: gsuba(ws, TEX_SUBST) |
82 if args.ligatures: gsuba(ws, LIG_SUBST) | |
10 | 83 output_fp.write(str(ws)) |
84 ws.clear() | |
3 | 85 |
86 def html(): | |
87 global input_fp, output_fp | |
22
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
88 ws = SafeWorkspace(input_fp.read()) |
10 | 89 curler = HtmlCurler(ws) |
90 if args.force: uncurl(ws) | |
91 curler.feed() | |
92 output_fp.write(str(ws)) | |
3 | 93 |
5 | 94 def do_uncurl(): |
95 global input_fp, output_fp | |
22
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
96 ws = SafeWorkspace(input_fp.read()) |
10 | 97 uncurl(ws) |
98 output_fp.write(str(ws)) | |
5 | 99 |
23 | 100 def fims(buf): |
101 pos = 0 | |
102 while True: | |
24 | 103 pos = buf.find(BACKT, pos) |
23 | 104 if pos < 0: |
105 break | |
106 if buf[pos+1] == BACKT: | |
107 buf[pos:pos+2] = INSEC | |
108 else: | |
109 buf[pos] = FTMIN | |
110 pos += 1 | |
111 | |
26 | 112 def gsub(buf, old, repl): |
113 old = Workspace(old) | |
114 repl = Workspace(repl) | |
115 olen = len(old) | |
116 delta = len(repl) | |
117 pos = 0 | |
118 while True: | |
119 pos = buf.find(old, pos) | |
120 if pos < 0: | |
121 break | |
122 buf[pos:pos+olen] = repl | |
123 pos += delta | |
124 | |
27 | 125 def gsuba(buf, subs): |
126 for i in subs: | |
26 | 127 gsub(buf, i[0], i[1]) |
128 | |
3 | 129 # M a i n P r o g r a m |
130 | |
131 # Parse arguments | |
8 | 132 parser = argparse.ArgumentParser( |
133 description='Make straight quotes curly.', prog=MYNAME) | |
3 | 134 group = parser.add_mutually_exclusive_group() |
23 | 135 parser.add_argument("--backtick", action="store_true", help="Use backticks on input for ft/in/min/sec.") |
3 | 136 group.add_argument("--flowed", action="store_true", help="Input is flowed text.") |
137 group.add_argument("--html", action="store_true", help="Input is HTML.") | |
27 | 138 group.add_argument("--ligatures", action="store_true", help="Use ff, fi, fl, ffi, ffl ligatures.") |
5 | 139 group.add_argument("--uncurl", action="store_true", help="Uncurl quotes instead of curling them.") |
3 | 140 parser.add_argument("--force", action="store_true", help="Force all quotes to straight ones first.") |
8 | 141 parser.add_argument("--icoding", default="UTF-8", help="Input encoding (default UTF-8).") |
3 | 142 parser.add_argument("--inplace", action="store_true", help="Edit file in-place.") |
8 | 143 parser.add_argument("--ocoding", default="UTF-8", help="Output encoding (default UTF-8).") |
26 | 144 parser.add_argument("--tex", action="store_true", help="TeX/LaTeX style dash and ellipsis substitution.") |
3 | 145 parser.add_argument("input", nargs="?", help="Input file.") |
146 parser.add_argument("output", nargs="?", help="Output file.") | |
147 try: | |
148 args = parser.parse_args() | |
149 except SystemExit: | |
150 sys.exit(2) | |
151 | |
26 | 152 # Sanity checks |
23 | 153 if args.html and args.backtick: |
154 sys.stderr.write(MYNAME + ": --backtick not supported in --html mode\n") | |
155 sys.exit(2) | |
26 | 156 if args.html and args.tex: |
157 sys.stderr.write(MYNAME + ": --tex not supported in --html mode\n") | |
158 sys.exit(2) | |
23 | 159 |
3 | 160 # Sanity-check codings |
161 try: | |
162 codec = codecs.lookup(args.icoding) | |
163 codec = codecs.lookup(args.ocoding) | |
164 except LookupError as e: | |
165 sys.stderr.write("{0}: {1!s}\n".format(MYNAME, e)) | |
166 sys.exit(2) | |
22
a771878f6cf4
Remove deadwood, update runes.py.
David Barts <n5jrn@me.com>
parents:
10
diff
changeset
|
167 if codec not in CODECS_TO_NAME: |
8 | 168 sys.stderr.write("{0}: {1!r} output coding does not support Unicode\n".format(MYNAME, args.ocoding)) |
3 | 169 sys.exit(1) |
170 del codec | |
171 | |
172 # Get streams | |
173 try: | |
174 if args.input and (not args.output) and args.inplace: | |
175 args.output = args.input | |
176 args.input += "~" | |
5 | 177 os.rename(args.output, args.input) |
3 | 178 if args.input: |
179 input_fp = open(args.input, "r", encoding=args.icoding) | |
180 else: | |
181 input_fp = open(0, "r", encoding=args.icoding) | |
182 if args.output: | |
183 output_fp = open(args.output, "w", encoding=args.ocoding) | |
184 else: | |
185 output_fp = open(1, "w", encoding=args.ocoding) | |
186 except (OSError, LookupError) as e: | |
187 sys.stderr.write("{0}: {1!s}\n".format(MYNAME, e)) | |
188 sys.exit(1) | |
189 | |
190 # Choose our mode | |
191 if args.flowed: | |
192 flowed() | |
193 elif args.html: | |
194 html() | |
5 | 195 elif args.uncurl: |
196 do_uncurl() | |
3 | 197 else: |
198 normal() |