]>
Commit | Line | Data |
---|---|---|
01859b1c UD |
1 | /* punycode.h Declarations for punycode functions. |
2 | * Copyright (C) 2002, 2003 Simon Josefsson | |
3 | * | |
4 | * This file is part of GNU Libidn. | |
5 | * | |
6 | * GNU Libidn is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * GNU Libidn is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
59ba27a6 | 17 | * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>. |
01859b1c UD |
18 | */ |
19 | ||
20 | /* | |
21 | * This file is derived from RFC 3492bis written by Adam M. Costello. | |
22 | * | |
23 | * Disclaimer and license: Regarding this entire document or any | |
24 | * portion of it (including the pseudocode and C code), the author | |
25 | * makes no guarantees and is not responsible for any damage resulting | |
26 | * from its use. The author grants irrevocable permission to anyone | |
27 | * to use, modify, and distribute it in any way that does not diminish | |
28 | * the rights of anyone else to use, modify, and distribute it, | |
29 | * provided that redistributed derivative works do not contain | |
30 | * misleading author or version information. Derivative works need | |
31 | * not be licensed under similar terms. | |
32 | * | |
33 | * Copyright (C) The Internet Society (2003). All Rights Reserved. | |
34 | * | |
35 | * This document and translations of it may be copied and furnished to | |
36 | * others, and derivative works that comment on or otherwise explain it | |
37 | * or assist in its implementation may be prepared, copied, published | |
38 | * and distributed, in whole or in part, without restriction of any | |
39 | * kind, provided that the above copyright notice and this paragraph are | |
40 | * included on all such copies and derivative works. However, this | |
41 | * document itself may not be modified in any way, such as by removing | |
42 | * the copyright notice or references to the Internet Society or other | |
43 | * Internet organizations, except as needed for the purpose of | |
44 | * developing Internet standards in which case the procedures for | |
45 | * copyrights defined in the Internet Standards process must be | |
46 | * followed, or as required to translate it into languages other than | |
47 | * English. | |
48 | * | |
49 | * The limited permissions granted above are perpetual and will not be | |
50 | * revoked by the Internet Society or its successors or assigns. | |
51 | * | |
52 | * This document and the information contained herein is provided on an | |
53 | * "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING | |
54 | * TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING | |
55 | * BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION | |
56 | * HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF | |
57 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. | |
58 | */ | |
59 | ||
60 | #ifndef _PUNYCODE_H | |
61 | #define _PUNYCODE_H | |
62 | ||
63 | #ifdef __cplusplus | |
64 | extern "C" | |
65 | { | |
66 | #endif | |
67 | ||
68 | #include <stddef.h> /* size_t */ | |
69 | #include <stdint.h> /* uint32_t */ | |
70 | ||
71 | enum punycode_status | |
72 | { | |
73 | punycode_success = 0, | |
74 | punycode_bad_input = 1, /* Input is invalid. */ | |
75 | punycode_big_output = 2, /* Output would exceed the space provided. */ | |
76 | punycode_overflow = 3 /* Wider integers needed to process input. */ | |
77 | }; | |
78 | ||
79 | typedef enum | |
80 | { | |
81 | PUNYCODE_SUCCESS = punycode_success, | |
82 | PUNYCODE_BAD_INPUT = punycode_bad_input, | |
83 | PUNYCODE_BIG_OUTPUT = punycode_big_output, | |
84 | PUNYCODE_OVERFLOW = punycode_overflow | |
85 | } Punycode_status; | |
86 | ||
87 | /* punycode_uint needs to be unsigned and needs to be */ | |
88 | /* at least 26 bits wide. */ | |
89 | ||
90 | typedef uint32_t punycode_uint; | |
91 | ||
92 | extern int punycode_encode (size_t input_length, | |
93 | const punycode_uint input[], | |
94 | const unsigned char case_flags[], | |
95 | size_t * output_length, char output[]); | |
96 | ||
97 | /* | |
98 | punycode_encode() converts a sequence of code points (presumed to be | |
99 | Unicode code points) to Punycode. | |
100 | ||
101 | Input arguments (to be supplied by the caller): | |
102 | ||
103 | input_length | |
104 | The number of code points in the input array and the number | |
105 | of flags in the case_flags array. | |
106 | ||
107 | input | |
108 | An array of code points. They are presumed to be Unicode | |
109 | code points, but that is not strictly REQUIRED. The | |
110 | array contains code points, not code units. UTF-16 uses | |
111 | code units D800 through DFFF to refer to code points | |
112 | 10000..10FFFF. The code points D800..DFFF do not occur in | |
113 | any valid Unicode string. The code points that can occur in | |
114 | Unicode strings (0..D7FF and E000..10FFFF) are also called | |
115 | Unicode scalar values. | |
116 | ||
117 | case_flags | |
118 | A null pointer or an array of boolean values parallel to | |
119 | the input array. Nonzero (true, flagged) suggests that the | |
120 | corresponding Unicode character be forced to uppercase after | |
121 | being decoded (if possible), and zero (false, unflagged) | |
122 | suggests that it be forced to lowercase (if possible). | |
123 | ASCII code points (0..7F) are encoded literally, except that | |
124 | ASCII letters are forced to uppercase or lowercase according | |
125 | to the corresponding case flags. If case_flags is a null | |
126 | pointer then ASCII letters are left as they are, and other | |
127 | code points are treated as unflagged. | |
128 | ||
129 | Output arguments (to be filled in by the function): | |
130 | ||
131 | output | |
132 | An array of ASCII code points. It is *not* null-terminated; | |
133 | it will contain zeros if and only if the input contains | |
134 | zeros. (Of course the caller can leave room for a | |
135 | terminator and add one if needed.) | |
136 | ||
137 | Input/output arguments (to be supplied by the caller and overwritten | |
138 | by the function): | |
139 | ||
140 | output_length | |
141 | The caller passes in the maximum number of ASCII code points | |
142 | that it can receive. On successful return it will contain | |
143 | the number of ASCII code points actually output. | |
144 | ||
145 | Return value: | |
146 | ||
147 | Can be any of the punycode_status values defined above except | |
148 | punycode_bad_input. If not punycode_success, then output_size | |
149 | and output might contain garbage. | |
150 | */ | |
151 | ||
152 | extern int punycode_decode (size_t input_length, | |
153 | const char input[], | |
154 | size_t * output_length, | |
155 | punycode_uint output[], | |
156 | unsigned char case_flags[]); | |
157 | ||
158 | /* | |
159 | punycode_decode() converts Punycode to a sequence of code points | |
160 | (presumed to be Unicode code points). | |
161 | ||
162 | Input arguments (to be supplied by the caller): | |
163 | ||
164 | input_length | |
165 | The number of ASCII code points in the input array. | |
166 | ||
167 | input | |
168 | An array of ASCII code points (0..7F). | |
169 | ||
170 | Output arguments (to be filled in by the function): | |
171 | ||
172 | output | |
173 | An array of code points like the input argument of | |
174 | punycode_encode() (see above). | |
175 | ||
176 | case_flags | |
177 | A null pointer (if the flags are not needed by the caller) | |
178 | or an array of boolean values parallel to the output array. | |
179 | Nonzero (true, flagged) suggests that the corresponding | |
180 | Unicode character be forced to uppercase by the caller (if | |
181 | possible), and zero (false, unflagged) suggests that it | |
182 | be forced to lowercase (if possible). ASCII code points | |
183 | (0..7F) are output already in the proper case, but their | |
184 | flags will be set appropriately so that applying the flags | |
185 | would be harmless. | |
186 | ||
187 | Input/output arguments (to be supplied by the caller and overwritten | |
188 | by the function): | |
189 | ||
190 | output_length | |
191 | The caller passes in the maximum number of code points | |
192 | that it can receive into the output array (which is also | |
193 | the maximum number of flags that it can receive into the | |
194 | case_flags array, if case_flags is not a null pointer). On | |
195 | successful return it will contain the number of code points | |
196 | actually output (which is also the number of flags actually | |
197 | output, if case_flags is not a null pointer). The decoder | |
198 | will never need to output more code points than the number | |
199 | of ASCII code points in the input, because of the way the | |
200 | encoding is defined. The number of code points output | |
201 | cannot exceed the maximum possible value of a punycode_uint, | |
202 | even if the supplied output_length is greater than that. | |
203 | ||
204 | Return value: | |
205 | ||
206 | Can be any of the punycode_status values defined above. If not | |
207 | punycode_success, then output_length, output, and case_flags | |
208 | might contain garbage. | |
209 | */ | |
210 | ||
211 | #ifdef __cplusplus | |
212 | } | |
213 | #endif | |
214 | #endif /* _PUNYCODE_H */ |