#!/usr/bin/env python3 import sys import icu import argparse _ARGS = None def parse_args(): ''' Parse the command line arguments. ''' parser = argparse.ArgumentParser( description='A simple tool to test ICU/CLDR style collation rules.') parser.add_argument( '-r', '--rules', nargs='?', type=str, action='store', default=None, help=('File to read the collation rules from. ' + 'default: %(default)s.')) parser.add_argument( '-u', '--unicode_rules', nargs='?', type=str, action='store', default=None, help=('File to write the rules to using Unicode code points ' + 'for everything which is not ASCII. This is useful if one ' + 'wants to write equivalent rules for a glibc locale. ' + 'default: %(default)s.')) parser.add_argument( '-i', '--input', nargs='?', type=str, action='store', default=None, help=('Test file containing the lines which should be sorted. ' + 'default: %(default)s.')) parser.add_argument( '-o', '--output', nargs='?', type=str, action='store', default=None, help=('Test file containing the sorted lines. ' + 'default: %(default)s.')) return parser.parse_args() def is_ascii(text): '''Checks whether all characters in text are ASCII characters Returns “True” if the text is all ASCII, “False” if not. :param text: The text to check :type text: string :rtype: bool Examples: >>> is_ascii('Abc') True >>> is_ascii('Naïve') False ''' try: text.encode('ascii') except UnicodeEncodeError: return False else: return True if __name__ == '__main__': _ARGS = parse_args() rules = '' if _ARGS.rules: with open(_ARGS.rules, 'r') as rulesfile: rules = rulesfile.read() if _ARGS.unicode_rules and rules: code_point_rules = '' for char in rules: if is_ascii(char): code_point_rules += char else: code_point_rules += '{}[{:04X}]'.format(char, ord(char)) if code_point_rules: with open(_ARGS.unicode_rules, 'w') as unicode_rules_file: unicode_rules_file.write(code_point_rules) icucollator = icu.RuleBasedCollator(rules) input_list = [] if _ARGS.input: with open(_ARGS.input, 'r') as inputfile: input_list = inputfile.readlines() else: input_list = sys.stdin.readlines() input_list = [line.rstrip('\n') for line in input_list] output_list = sorted(input_list, key=icucollator.getSortKey) if _ARGS.output: with open (_ARGS.output, 'w') as outputfile: for line in output_list: outputfile.write('%s\n' %line) else: for line in output_list: sys.stdout.write('%s\n' %line) exit(0)