repr

#!/usr/bin/env python3

import itertools as it, operator as op, functools as ft
import os, sys, io, re, signal


def main(args=None):
	import argparse, textwrap
	dd = lambda text: re.sub( r' \t+', ' ',
		textwrap.dedent(text).strip('\n') + '\n' ).replace('\t', '  ')
	parser = argparse.ArgumentParser(
		formatter_class=argparse.RawTextHelpFormatter,
		description='Run python repr() to stdout on every (text) line of an input file.')
	parser.add_argument('file', nargs='*',
		help='Input file path(s). Stdin is used if none specified or in place of "-".')
	parser.add_argument('-a', '--ascii', action='store_true',
		help='Use ascii() representation instead of repr(), i.e. utf-8 chars as \\uXXXX.')
	parser.add_argument('-b', '--binary', action='store_true',
		help=dd('''
			Process file lines in binary mode, instead of decoding it with utf-8.
			Should allow to see BOM and random
				binary goop escaped as individual bytes, not codepoints.
			Anything outside of printable ascii will be encoded
				either as a special escape sequence like \\n or C-style \\xCC.'''))
	parser.add_argument('-n', '--hide-newlines', action='store_true',
		help=dd('''
			Enables "universal newlines" translation and strips last \\n from each line.
			Whether this is sufficient for mismatched line endings
				depends on how aggressive that mode is in converting newlines.'''))
	parser.add_argument('-x', '--error-replace', action='store_true',
		help='When decoding stuff (non-binary mode, convert), replace non-decodable chars.')
	parser.add_argument('-c', '--convert',
		const='utf-8', nargs='?', metavar='((enc-src:)enc-dst)(+opts)',
		help=dd('''
			Instead of repr-output, convert encoding
				from enc-src (default=utf-8-sig) to enc-dst (utf-8) and output the result.
			[[enc-src:]enc-dst] spec can be followed by + and any of the following options:
				+r - output windows \\r\\n newlines,
				+i - overwrite file in-place, same as sed -i would do
				+t - trim whitespace from line endings, and ensure final newline
				+j - json-decode and pretty-print lines, or print as -b/--binary on fail
			Examples: -c utf-8-sig+ri, -c +t, -c cp1251:utf-8+it
			Allows to use stuff like utf-8-sig or any other codec supported by python.'''))
	opts = parser.parse_args(sys.argv[1:] if args is None else args)

	file_list, file_nl = opts.file, None
	if not file_list: file_list = ['-']

	dst_stdout = open(sys.stdout.fileno(), 'wb')
	dec_errors = 'strict' if not opts.error_replace else 'replace'
	if opts.convert:
		enc_src, enc_dst, enc_opts = 'utf-8-sig', opts.convert, ''
		if ':' in enc_dst: enc_src, enc_dst = enc_dst.split(':', 1)
		if '+' in enc_dst: enc_dst, enc_opts = enc_dst.split('+', 1)
		if not enc_src: enc_src = 'utf-8-sig'
		if not enc_dst: enc_dst = 'utf-8'
		assert not set(enc_opts).difference('ritj'),\
			f'Not all encoding options recognized: {enc_opts}'
		if 'j' in enc_opts: import json, pprint

	for sig in 'SIGINT', 'SIGTERM':
		signal.signal(getattr(signal, sig), lambda sig,frm: sys.exit(os.EX_OK))

	for p in file_list:
		p_file, line_end_final = p != '-', None
		dst_buff = io.BytesIO() if opts.convert and 'i' in enc_opts and p_file else None
		with open(p if p_file else sys.stdin.fileno(), mode='rb') as src:
			for line in src:

				if opts.convert:
					line = line.decode(enc_src, dec_errors).encode(enc_dst)
					while b'\r\n' in line: line = line.replace(b'\r\n', b'\n')
					if 't' in enc_opts:
						line_end_final = line.endswith(b'\n')
						line = b'\n'.join(it.chain(
							(line.rstrip() for line in line.rstrip().split(b'\n')) ))
					if 'r' in enc_opts: line = line.replace(b'\n', b'\r\n')
					if 'j' in enc_opts:
						try:
							line = (pprint.pformat( json.loads(line),
								indent=1, width=120, compact=True ) + '\n').encode()
						except: line = (repr(line)[2:-1] + '\n').encode()
					dst = dst_buff or dst_stdout
					dst.write(line)
					dst.flush()

				else:
					if opts.hide_newlines: line = line.rstrip(b'\r\n')
					if opts.binary: line = repr(line)[2:-1] + '\n'
					else:
						line = (ascii if opts.ascii else repr)(
							line.decode('utf-8-sig', dec_errors) )[1:-1] + '\n'
					dst_stdout.write(line.encode())
					dst_stdout.flush()

			if opts.convert:
				if 't' in enc_opts and line_end_final is False:
					(dst_buff or dst_stdout).write(b'\n')
				if dst_buff:
					with open(p, 'wb') as dst: dst.write(dst_buff.getvalue())

if __name__ == '__main__': sys.exit(main())