Sourceware Bugzilla – Attachment 10749 Details for
Bug 21547
Tibetan script collation broken (Dzongkha and Tibetan)
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
icu-collation-test.py
icu-collation-test.py (text/plain), 3.01 KB, created by
Mike FABIAN
on 2018-01-15 14:36:24 UTC
(
hide
)
Description:
icu-collation-test.py
Filename:
MIME Type:
Creator:
Mike FABIAN
Created:
2018-01-15 14:36:24 UTC
Size:
3.01 KB
patch
obsolete
>#!/usr/bin/env python3 > >import sys >import icu >import argparse > >_ARGS = None > >def parse_args(): > ''' > Parse the command line arguments. > ''' > parser = argparse.ArgumentParser( > description='A simple tool to test ICU/CLDR style collation rules.') > parser.add_argument( > '-r', '--rules', > nargs='?', > type=str, > action='store', > default=None, > help=('File to read the collation rules from. ' > + 'default: %(default)s.')) > parser.add_argument( > '-u', '--unicode_rules', > nargs='?', > type=str, > action='store', > default=None, > help=('File to write the rules to using <U....> Unicode code points ' > + 'for everything which is not ASCII. This is useful if one ' > + 'wants to write equivalent rules for a glibc locale. ' > + 'default: %(default)s.')) > parser.add_argument( > '-i', '--input', > nargs='?', > type=str, > action='store', > default=None, > help=('Test file containing the lines which should be sorted. ' > + 'default: %(default)s.')) > parser.add_argument( > '-o', '--output', > nargs='?', > type=str, > action='store', > default=None, > help=('Test file containing the sorted lines. ' > + 'default: %(default)s.')) > return parser.parse_args() > >def is_ascii(text): > '''Checks whether all characters in text are ASCII characters > > Returns âTrueâ if the text is all ASCII, âFalseâ if not. > > :param text: The text to check > :type text: string > :rtype: bool > > Examples: > > >>> is_ascii('Abc') > True > > >>> is_ascii('Naïve') > False > ''' > try: > text.encode('ascii') > except UnicodeEncodeError: > return False > else: > return True > >if __name__ == '__main__': > _ARGS = parse_args() > rules = '' > if _ARGS.rules: > with open(_ARGS.rules, 'r') as rulesfile: > rules = rulesfile.read() > > if _ARGS.unicode_rules and rules: > code_point_rules = '' > for char in rules: > if is_ascii(char): > code_point_rules += char > else: > code_point_rules += '{}[{:04X}]'.format(char, ord(char)) > if code_point_rules: > with open(_ARGS.unicode_rules, 'w') as unicode_rules_file: > unicode_rules_file.write(code_point_rules) > > icucollator = icu.RuleBasedCollator(rules) > > input_list = [] > if _ARGS.input: > with open(_ARGS.input, 'r') as inputfile: > input_list = inputfile.readlines() > else: > input_list = sys.stdin.readlines() > > input_list = [line.rstrip('\n') for line in input_list] > > output_list = sorted(input_list, key=icucollator.getSortKey) > > if _ARGS.output: > with open (_ARGS.output, 'w') as outputfile: > for line in output_list: > outputfile.write('%s\n' %line) > else: > for line in output_list: > sys.stdout.write('%s\n' %line) > > exit(0)
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 21547
:
10696
| 10749 |
10755
|
10756