Mercurial > cgi-bin > hgweb.cgi > curlyq
view curlyq @ 29:d5bf9985b5c4 default tip
Add degree symbol, fix bug in HTML curler.
author | David Barts <n5jrn@me.com> |
---|---|
date | Thu, 07 Oct 2021 11:55:46 -0700 |
parents | 70e75dd07e03 |
children |
line wrap: on
line source
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # I m p o r t s import os, sys import argparse import codecs from curlers import TextCurler, HtmlCurler, uncurl from runes import Workspace # V a r i a b l e s # Name invoked by MYNAME = os.path.basename(sys.argv[0]) # Streams input_fp = None output_fp = None # Codecs we support CODECS_TO_NAME = {} for i in [ "UTF-8", "UTF-16", "UTF-16LE", "UTF-16BE", "UTF-32", "UTF-32LE", "UTF-32BE" ]: CODECS_TO_NAME[codecs.lookup(i)] = i del i # For feet/inches/min/sec BACKT = "`" FTMIN = "'" INSEC = '"' # For --tex option TEX_SUBST = [ ("---", "—"), ("--", "–"), ("...", "…"), ("()", "°") ] # For --ligatures option LIG_SUBST = [ ("ffi", "\ufb03"), ("ffl", "\ufb04"), ("ff", "\ufb00"), ("fi", "\ufb01"), ("fl", "\ufb02") ] # C l a s s e s class SafeWorkspace(Workspace): def __getitem__(self, key): try: return super().__getitem__(key) except IndexError: return "" # F u n c t i o n s def normal(): global input_fp, output_fp, args ws = SafeWorkspace() curler = TextCurler(ws) while True: line = input_fp.readline() ws.append(line) if line == "" or line == "\n": if args.force: uncurl(ws) curler.feed() if args.backtick: fims(ws) if args.tex: gsuba(ws, TEX_SUBST) if args.ligatures: gsuba(ws, LIG_SUBST) output_fp.write(str(ws)) ws.clear() if line == "": break def flowed(): global input_fp, output_fp, args ws = SafeWorkspace() curler = TextCurler(ws) while True: line = input_fp.readline() if line == "": break ws.append(line) if args.force: uncurl(ws) curler.feed() if args.backtick: fims(ws) if args.tex: gsuba(ws, TEX_SUBST) if args.ligatures: gsuba(ws, LIG_SUBST) output_fp.write(str(ws)) ws.clear() def html(): global input_fp, output_fp ws = SafeWorkspace(input_fp.read()) curler = HtmlCurler(ws) if args.force: uncurl(ws) curler.feed() output_fp.write(str(ws)) def do_uncurl(): global input_fp, output_fp ws = SafeWorkspace(input_fp.read()) uncurl(ws) output_fp.write(str(ws)) def fims(buf): pos = 0 while True: pos = buf.find(BACKT, pos) if pos < 0: break if buf[pos+1] == BACKT: buf[pos:pos+2] = INSEC else: buf[pos] = FTMIN pos += 1 def gsub(buf, old, repl): old = Workspace(old) repl = Workspace(repl) olen = len(old) delta = len(repl) pos = 0 while True: pos = buf.find(old, pos) if pos < 0: break buf[pos:pos+olen] = repl pos += delta def gsuba(buf, subs): for i in subs: gsub(buf, i[0], i[1]) # M a i n P r o g r a m # Parse arguments parser = argparse.ArgumentParser( description='Make straight quotes curly.', prog=MYNAME) group = parser.add_mutually_exclusive_group() parser.add_argument("--backtick", action="store_true", help="Use backticks on input for ft/in/min/sec.") group.add_argument("--flowed", action="store_true", help="Input is flowed text.") group.add_argument("--html", action="store_true", help="Input is HTML.") group.add_argument("--ligatures", action="store_true", help="Use ff, fi, fl, ffi, ffl ligatures.") group.add_argument("--uncurl", action="store_true", help="Uncurl quotes instead of curling them.") parser.add_argument("--force", action="store_true", help="Force all quotes to straight ones first.") parser.add_argument("--icoding", default="UTF-8", help="Input encoding (default UTF-8).") parser.add_argument("--inplace", action="store_true", help="Edit file in-place.") parser.add_argument("--ocoding", default="UTF-8", help="Output encoding (default UTF-8).") parser.add_argument("--tex", action="store_true", help="TeX/LaTeX style dash and ellipsis substitution.") parser.add_argument("input", nargs="?", help="Input file.") parser.add_argument("output", nargs="?", help="Output file.") try: args = parser.parse_args() except SystemExit: sys.exit(2) # Sanity checks if args.html and args.backtick: sys.stderr.write(MYNAME + ": --backtick not supported in --html mode\n") sys.exit(2) if args.html and args.tex: sys.stderr.write(MYNAME + ": --tex not supported in --html mode\n") sys.exit(2) # Sanity-check codings try: codec = codecs.lookup(args.icoding) codec = codecs.lookup(args.ocoding) except LookupError as e: sys.stderr.write("{0}: {1!s}\n".format(MYNAME, e)) sys.exit(2) if codec not in CODECS_TO_NAME: sys.stderr.write("{0}: {1!r} output coding does not support Unicode\n".format(MYNAME, args.ocoding)) sys.exit(1) del codec # Get streams try: if args.input and (not args.output) and args.inplace: args.output = args.input args.input += "~" os.rename(args.output, args.input) if args.input: input_fp = open(args.input, "r", encoding=args.icoding) else: input_fp = open(0, "r", encoding=args.icoding) if args.output: output_fp = open(args.output, "w", encoding=args.ocoding) else: output_fp = open(1, "w", encoding=args.ocoding) except (OSError, LookupError) as e: sys.stderr.write("{0}: {1!s}\n".format(MYNAME, e)) sys.exit(1) # Choose our mode if args.flowed: flowed() elif args.html: html() elif args.uncurl: do_uncurl() else: normal()