view curlyq @ 27:70e75dd07e03

Add --ligatures mode.
author David Barts <n5jrn@me.com>
date Sat, 17 Oct 2020 14:19:27 -0700
parents 3264788aa0c8
children d5bf9985b5c4
line wrap: on
line source

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# I m p o r t s

import os, sys
import argparse
import codecs

from curlers import TextCurler, HtmlCurler, uncurl
from runes import Workspace

# V a r i a b l e s

# Name invoked by
MYNAME = os.path.basename(sys.argv[0])

# Streams
input_fp = None
output_fp = None

# Codecs we support
CODECS_TO_NAME = {}
for i in [ "UTF-8", "UTF-16", "UTF-16LE", "UTF-16BE", "UTF-32", "UTF-32LE", "UTF-32BE" ]:
    CODECS_TO_NAME[codecs.lookup(i)] = i
del i

# For feet/inches/min/sec
BACKT = "`"
FTMIN = "'"
INSEC = '"'

# For --tex option
TEX_SUBST = [ ("---", "—"), ("--", "–"), ("...", "…") ]

# For --ligatures option
LIG_SUBST = [ ("ffi", "\ufb03"), ("ffl", "\ufb04"), ("ff", "\ufb00"),
    ("fi", "\ufb01"), ("fl", "\ufb02") ]

# C l a s s e s

class SafeWorkspace(Workspace):
    def __getitem__(self, key):
        try:
            return super().__getitem__(key)
        except IndexError:
            return ""

# F u n c t i o n s

def normal():
    global input_fp, output_fp, args
    ws = SafeWorkspace()
    curler = TextCurler(ws)
    while True:
        line = input_fp.readline()
        ws.append(line)
        if line == "" or line == "\n":
            if args.force: uncurl(ws)
            curler.feed()
            if args.backtick: fims(ws)
            if args.tex: gsuba(ws, TEX_SUBST)
            if args.ligatures: gsuba(ws, LIG_SUBST)
            output_fp.write(str(ws))
            ws.clear()
        if line == "":
            break

def flowed():
    global input_fp, output_fp, args
    ws = SafeWorkspace()
    curler = TextCurler(ws)
    while True:
        line = input_fp.readline()
        if line == "":
            break
        ws.append(line)
        if args.force: uncurl(ws)
        curler.feed()
        if args.backtick: fims(ws)
        if args.tex: gsuba(ws, TEX_SUBST)
        if args.ligatures: gsuba(ws, LIG_SUBST)
        output_fp.write(str(ws))
        ws.clear()

def html():
    global input_fp, output_fp
    ws = SafeWorkspace(input_fp.read())
    curler = HtmlCurler(ws)
    if args.force: uncurl(ws)
    curler.feed()
    output_fp.write(str(ws))

def do_uncurl():
    global input_fp, output_fp
    ws = SafeWorkspace(input_fp.read())
    uncurl(ws)
    output_fp.write(str(ws))

def fims(buf):
    pos = 0
    while True:
        pos = buf.find(BACKT, pos)
        if pos < 0:
            break
        if buf[pos+1] == BACKT:
            buf[pos:pos+2] = INSEC
        else:
            buf[pos] = FTMIN
        pos += 1

def gsub(buf, old, repl):
    old = Workspace(old)
    repl = Workspace(repl)
    olen = len(old)
    delta = len(repl)
    pos = 0
    while True:
        pos = buf.find(old, pos)
        if pos < 0:
            break
        buf[pos:pos+olen] = repl
        pos += delta

def gsuba(buf, subs):
    for i in subs:
        gsub(buf, i[0], i[1])

# M a i n   P r o g r a m

# Parse arguments
parser = argparse.ArgumentParser(
    description='Make straight quotes curly.', prog=MYNAME)
group = parser.add_mutually_exclusive_group()
parser.add_argument("--backtick", action="store_true", help="Use backticks on input for ft/in/min/sec.")
group.add_argument("--flowed", action="store_true", help="Input is flowed text.")
group.add_argument("--html", action="store_true", help="Input is HTML.")
group.add_argument("--ligatures", action="store_true", help="Use ff, fi, fl, ffi, ffl ligatures.")
group.add_argument("--uncurl", action="store_true", help="Uncurl quotes instead of curling them.")
parser.add_argument("--force", action="store_true", help="Force all quotes to straight ones first.")
parser.add_argument("--icoding", default="UTF-8", help="Input encoding (default UTF-8).")
parser.add_argument("--inplace", action="store_true", help="Edit file in-place.")
parser.add_argument("--ocoding", default="UTF-8", help="Output encoding (default UTF-8).")
parser.add_argument("--tex", action="store_true", help="TeX/LaTeX style dash and ellipsis substitution.")
parser.add_argument("input", nargs="?", help="Input file.")
parser.add_argument("output", nargs="?", help="Output file.")
try:
    args = parser.parse_args()
except SystemExit:
    sys.exit(2)

# Sanity checks
if args.html and args.backtick:
    sys.stderr.write(MYNAME + ": --backtick not supported in --html mode\n")
    sys.exit(2)
if args.html and args.tex:
    sys.stderr.write(MYNAME + ": --tex not supported in --html mode\n")
    sys.exit(2)

# Sanity-check codings
try:
    codec = codecs.lookup(args.icoding)
    codec = codecs.lookup(args.ocoding)
except LookupError as e:
    sys.stderr.write("{0}: {1!s}\n".format(MYNAME, e))
    sys.exit(2)
if codec not in CODECS_TO_NAME:
    sys.stderr.write("{0}: {1!r} output coding does not support Unicode\n".format(MYNAME, args.ocoding))
    sys.exit(1)
del codec

# Get streams
try:
    if args.input and (not args.output) and args.inplace:
        args.output = args.input
        args.input += "~"
        os.rename(args.output, args.input)
    if args.input:
        input_fp = open(args.input, "r", encoding=args.icoding)
    else:
        input_fp = open(0, "r", encoding=args.icoding)
    if args.output:
        output_fp = open(args.output, "w", encoding=args.ocoding)
    else:
        output_fp = open(1, "w", encoding=args.ocoding)
except (OSError, LookupError) as e:
    sys.stderr.write("{0}: {1!s}\n".format(MYNAME, e))
    sys.exit(1)

# Choose our mode
if args.flowed:
    flowed()
elif args.html:
    html()
elif args.uncurl:
    do_uncurl()
else:
    normal()