]> sourceware.org Git - glibc.git/blob - stdlib/chacha20.c
2745a813151b2df3d33911ac5c2be7049b1d0532
[glibc.git] / stdlib / chacha20.c
1 /* Generic ChaCha20 implementation (used on arc4random).
2 Copyright (C) 2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <array_length.h>
20 #include <endian.h>
21 #include <stddef.h>
22 #include <stdint.h>
23 #include <string.h>
24
25 /* 32-bit stream position, then 96-bit nonce. */
26 #define CHACHA20_IV_SIZE 16
27 #define CHACHA20_KEY_SIZE 32
28
29 #define CHACHA20_STATE_LEN 16
30
31 /* The ChaCha20 implementation is based on RFC8439 [1], omitting the final
32 XOR of the keystream with the plaintext because the plaintext is a
33 stream of zeros. */
34
35 enum chacha20_constants
36 {
37 CHACHA20_CONSTANT_EXPA = 0x61707865U,
38 CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
39 CHACHA20_CONSTANT_2_BY = 0x79622d32U,
40 CHACHA20_CONSTANT_TE_K = 0x6b206574U
41 };
42
43 static inline uint32_t
44 read_unaligned_32 (const uint8_t *p)
45 {
46 uint32_t r;
47 memcpy (&r, p, sizeof (r));
48 return r;
49 }
50
51 static inline void
52 write_unaligned_32 (uint8_t *p, uint32_t v)
53 {
54 memcpy (p, &v, sizeof (v));
55 }
56
57 #if __BYTE_ORDER == __BIG_ENDIAN
58 # define read_unaligned_le32(p) __builtin_bswap32 (read_unaligned_32 (p))
59 # define set_state(v) __builtin_bswap32 ((v))
60 #else
61 # define read_unaligned_le32(p) read_unaligned_32 ((p))
62 # define set_state(v) (v)
63 #endif
64
65 static inline void
66 chacha20_init (uint32_t *state, const uint8_t *key, const uint8_t *iv)
67 {
68 state[0] = CHACHA20_CONSTANT_EXPA;
69 state[1] = CHACHA20_CONSTANT_ND_3;
70 state[2] = CHACHA20_CONSTANT_2_BY;
71 state[3] = CHACHA20_CONSTANT_TE_K;
72
73 state[4] = read_unaligned_le32 (key + 0 * sizeof (uint32_t));
74 state[5] = read_unaligned_le32 (key + 1 * sizeof (uint32_t));
75 state[6] = read_unaligned_le32 (key + 2 * sizeof (uint32_t));
76 state[7] = read_unaligned_le32 (key + 3 * sizeof (uint32_t));
77 state[8] = read_unaligned_le32 (key + 4 * sizeof (uint32_t));
78 state[9] = read_unaligned_le32 (key + 5 * sizeof (uint32_t));
79 state[10] = read_unaligned_le32 (key + 6 * sizeof (uint32_t));
80 state[11] = read_unaligned_le32 (key + 7 * sizeof (uint32_t));
81
82 state[12] = read_unaligned_le32 (iv + 0 * sizeof (uint32_t));
83 state[13] = read_unaligned_le32 (iv + 1 * sizeof (uint32_t));
84 state[14] = read_unaligned_le32 (iv + 2 * sizeof (uint32_t));
85 state[15] = read_unaligned_le32 (iv + 3 * sizeof (uint32_t));
86 }
87
88 static inline uint32_t
89 rotl32 (unsigned int shift, uint32_t word)
90 {
91 return (word << (shift & 31)) | (word >> ((-shift) & 31));
92 }
93
94 static void
95 state_final (const uint8_t *src, uint8_t *dst, uint32_t v)
96 {
97 #ifdef CHACHA20_XOR_FINAL
98 v ^= read_unaligned_32 (src);
99 #endif
100 write_unaligned_32 (dst, v);
101 }
102
103 static inline void
104 chacha20_block (uint32_t *state, uint8_t *dst, const uint8_t *src)
105 {
106 uint32_t x0, x1, x2, x3, x4, x5, x6, x7;
107 uint32_t x8, x9, x10, x11, x12, x13, x14, x15;
108
109 x0 = state[0];
110 x1 = state[1];
111 x2 = state[2];
112 x3 = state[3];
113 x4 = state[4];
114 x5 = state[5];
115 x6 = state[6];
116 x7 = state[7];
117 x8 = state[8];
118 x9 = state[9];
119 x10 = state[10];
120 x11 = state[11];
121 x12 = state[12];
122 x13 = state[13];
123 x14 = state[14];
124 x15 = state[15];
125
126 for (int i = 0; i < 20; i += 2)
127 {
128 #define QROUND(_x0, _x1, _x2, _x3) \
129 do { \
130 _x0 = _x0 + _x1; _x3 = rotl32 (16, (_x0 ^ _x3)); \
131 _x2 = _x2 + _x3; _x1 = rotl32 (12, (_x1 ^ _x2)); \
132 _x0 = _x0 + _x1; _x3 = rotl32 (8, (_x0 ^ _x3)); \
133 _x2 = _x2 + _x3; _x1 = rotl32 (7, (_x1 ^ _x2)); \
134 } while(0)
135
136 QROUND (x0, x4, x8, x12);
137 QROUND (x1, x5, x9, x13);
138 QROUND (x2, x6, x10, x14);
139 QROUND (x3, x7, x11, x15);
140
141 QROUND (x0, x5, x10, x15);
142 QROUND (x1, x6, x11, x12);
143 QROUND (x2, x7, x8, x13);
144 QROUND (x3, x4, x9, x14);
145 }
146
147 state_final (&src[0], &dst[0], set_state (x0 + state[0]));
148 state_final (&src[4], &dst[4], set_state (x1 + state[1]));
149 state_final (&src[8], &dst[8], set_state (x2 + state[2]));
150 state_final (&src[12], &dst[12], set_state (x3 + state[3]));
151 state_final (&src[16], &dst[16], set_state (x4 + state[4]));
152 state_final (&src[20], &dst[20], set_state (x5 + state[5]));
153 state_final (&src[24], &dst[24], set_state (x6 + state[6]));
154 state_final (&src[28], &dst[28], set_state (x7 + state[7]));
155 state_final (&src[32], &dst[32], set_state (x8 + state[8]));
156 state_final (&src[36], &dst[36], set_state (x9 + state[9]));
157 state_final (&src[40], &dst[40], set_state (x10 + state[10]));
158 state_final (&src[44], &dst[44], set_state (x11 + state[11]));
159 state_final (&src[48], &dst[48], set_state (x12 + state[12]));
160 state_final (&src[52], &dst[52], set_state (x13 + state[13]));
161 state_final (&src[56], &dst[56], set_state (x14 + state[14]));
162 state_final (&src[60], &dst[60], set_state (x15 + state[15]));
163
164 state[12]++;
165 }
166
167 static void
168 __attribute_maybe_unused__
169 chacha20_crypt_generic (uint32_t *state, uint8_t *dst, const uint8_t *src,
170 size_t bytes)
171 {
172 while (bytes >= CHACHA20_BLOCK_SIZE)
173 {
174 chacha20_block (state, dst, src);
175
176 bytes -= CHACHA20_BLOCK_SIZE;
177 dst += CHACHA20_BLOCK_SIZE;
178 src += CHACHA20_BLOCK_SIZE;
179 }
180
181 if (__glibc_unlikely (bytes != 0))
182 {
183 uint8_t stream[CHACHA20_BLOCK_SIZE];
184 chacha20_block (state, stream, src);
185 memcpy (dst, stream, bytes);
186 explicit_bzero (stream, sizeof stream);
187 }
188 }
189
190 /* Get the architecture optimized version. */
191 #include <chacha20_arch.h>
This page took 0.046008 seconds and 4 git commands to generate.