2 * Copyright (c) 1995 - 2000 Kungliga Tekniska Högskolan
3 * (Royal Institute of Technology, Stockholm, Sweden).
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the Institute nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <afsconfig.h>
34 #include <afs/param.h>
41 #if !defined(AFS_LINUX20_ENV) && !defined(AFS_OBSD_ENV)
42 #include "netinet/in.h"
45 #include "afs/sysincludes.h"
47 #ifdef AFS_LINUX22_ENV
48 #include <asm/byteorder.h>
56 #include <rx/rx_packet.h>
62 #include "private_data.h"
66 * Unrolling of the inner loops helps the most on pentium chips
67 * (ca 18%). On risc machines only expect a modest improvement (ca 5%).
68 * The cost for this is rougly 4k bytes.
70 #define UNROLL_LOOPS 1
72 * Inline assembler gives a boost only to fc_keysched.
73 * On the pentium expect ca 28%.
75 /*#define GNU_ASM 1 (now autoconfed) */
77 #if !defined(inline) && !defined(__GNUC__)
82 * There is usually no memcpy in kernels but gcc will inline all
83 * calls to memcpy in this code anyway.
85 #if defined(KERNEL) && !defined(__GNUC__)
86 #define memcpy(to, from, n) bcopy((from), (to), (n))
89 /* Rotate 32 bit word left */
90 #define ROT32L(x, n) ((((afs_uint32) x) << (n)) | (((afs_uint32) x) >> (32-(n))))
91 #define octetswap32(x) (((ROT32L(x, 16) & 0x00ff00ff)<<8) | ((ROT32L(x, 16)>>8) & 0x00ff00ff))
96 #define NTOH(x) octetswap32(x)
100 * Try to use a good function for ntohl-ing.
102 * The choice is done by autoconf setting EFF_NTOHL to one of:
110 #define EFF_NTOHL(x) ntohl(x)
113 * Sboxes for Feistel network derived from
114 * /afs/transarc.com/public/afsps/afs.rel31b.export-src/rxkad/sboxes.h
118 #define Z(x) NTOH(x << 3)
119 static const afs_uint32 sbox0[256] = {
120 Z(0xea), Z(0x7f), Z(0xb2), Z(0x64), Z(0x9d), Z(0xb0), Z(0xd9), Z(0x11),
121 Z(0xcd), Z(0x86), Z(0x86),
122 Z(0x91), Z(0x0a), Z(0xb2), Z(0x93), Z(0x06), Z(0x0e), Z(0x06), Z(0xd2),
123 Z(0x65), Z(0x73), Z(0xc5),
124 Z(0x28), Z(0x60), Z(0xf2), Z(0x20), Z(0xb5), Z(0x38), Z(0x7e), Z(0xda),
125 Z(0x9f), Z(0xe3), Z(0xd2),
126 Z(0xcf), Z(0xc4), Z(0x3c), Z(0x61), Z(0xff), Z(0x4a), Z(0x4a), Z(0x35),
127 Z(0xac), Z(0xaa), Z(0x5f),
128 Z(0x2b), Z(0xbb), Z(0xbc), Z(0x53), Z(0x4e), Z(0x9d), Z(0x78), Z(0xa3),
129 Z(0xdc), Z(0x09), Z(0x32),
130 Z(0x10), Z(0xc6), Z(0x6f), Z(0x66), Z(0xd6), Z(0xab), Z(0xa9), Z(0xaf),
131 Z(0xfd), Z(0x3b), Z(0x95),
132 Z(0xe8), Z(0x34), Z(0x9a), Z(0x81), Z(0x72), Z(0x80), Z(0x9c), Z(0xf3),
133 Z(0xec), Z(0xda), Z(0x9f),
134 Z(0x26), Z(0x76), Z(0x15), Z(0x3e), Z(0x55), Z(0x4d), Z(0xde), Z(0x84),
135 Z(0xee), Z(0xad), Z(0xc7),
136 Z(0xf1), Z(0x6b), Z(0x3d), Z(0xd3), Z(0x04), Z(0x49), Z(0xaa), Z(0x24),
137 Z(0x0b), Z(0x8a), Z(0x83),
138 Z(0xba), Z(0xfa), Z(0x85), Z(0xa0), Z(0xa8), Z(0xb1), Z(0xd4), Z(0x01),
139 Z(0xd8), Z(0x70), Z(0x64),
140 Z(0xf0), Z(0x51), Z(0xd2), Z(0xc3), Z(0xa7), Z(0x75), Z(0x8c), Z(0xa5),
141 Z(0x64), Z(0xef), Z(0x10),
142 Z(0x4e), Z(0xb7), Z(0xc6), Z(0x61), Z(0x03), Z(0xeb), Z(0x44), Z(0x3d),
143 Z(0xe5), Z(0xb3), Z(0x5b),
144 Z(0xae), Z(0xd5), Z(0xad), Z(0x1d), Z(0xfa), Z(0x5a), Z(0x1e), Z(0x33),
145 Z(0xab), Z(0x93), Z(0xa2),
146 Z(0xb7), Z(0xe7), Z(0xa8), Z(0x45), Z(0xa4), Z(0xcd), Z(0x29), Z(0x63),
147 Z(0x44), Z(0xb6), Z(0x69),
148 Z(0x7e), Z(0x2e), Z(0x62), Z(0x03), Z(0xc8), Z(0xe0), Z(0x17), Z(0xbb),
149 Z(0xc7), Z(0xf3), Z(0x3f),
150 Z(0x36), Z(0xba), Z(0x71), Z(0x8e), Z(0x97), Z(0x65), Z(0x60), Z(0x69),
151 Z(0xb6), Z(0xf6), Z(0xe6),
152 Z(0x6e), Z(0xe0), Z(0x81), Z(0x59), Z(0xe8), Z(0xaf), Z(0xdd), Z(0x95),
153 Z(0x22), Z(0x99), Z(0xfd),
154 Z(0x63), Z(0x19), Z(0x74), Z(0x61), Z(0xb1), Z(0xb6), Z(0x5b), Z(0xae),
155 Z(0x54), Z(0xb3), Z(0x70),
156 Z(0xff), Z(0xc6), Z(0x3b), Z(0x3e), Z(0xc1), Z(0xd7), Z(0xe1), Z(0x0e),
157 Z(0x76), Z(0xe5), Z(0x36),
158 Z(0x4f), Z(0x59), Z(0xc7), Z(0x08), Z(0x6e), Z(0x82), Z(0xa6), Z(0x93),
159 Z(0xc4), Z(0xaa), Z(0x26),
160 Z(0x49), Z(0xe0), Z(0x21), Z(0x64), Z(0x07), Z(0x9f), Z(0x64), Z(0x81),
161 Z(0x9c), Z(0xbf), Z(0xf9),
162 Z(0xd1), Z(0x43), Z(0xf8), Z(0xb6), Z(0xb9), Z(0xf1), Z(0x24), Z(0x75),
163 Z(0x03), Z(0xe4), Z(0xb0),
164 Z(0x99), Z(0x46), Z(0x3d), Z(0xf5), Z(0xd1), Z(0x39), Z(0x72), Z(0x12),
165 Z(0xf6), Z(0xba), Z(0x0c),
166 Z(0x0d), Z(0x42), Z(0x2e)
170 #define Z(x) NTOH((x << 27) | (x >> 5))
171 static const afs_uint32 sbox1[256] = {
172 Z(0x77), Z(0x14), Z(0xa6), Z(0xfe), Z(0xb2), Z(0x5e), Z(0x8c), Z(0x3e),
173 Z(0x67), Z(0x6c), Z(0xa1),
174 Z(0x0d), Z(0xc2), Z(0xa2), Z(0xc1), Z(0x85), Z(0x6c), Z(0x7b), Z(0x67),
175 Z(0xc6), Z(0x23), Z(0xe3),
176 Z(0xf2), Z(0x89), Z(0x50), Z(0x9c), Z(0x03), Z(0xb7), Z(0x73), Z(0xe6),
177 Z(0xe1), Z(0x39), Z(0x31),
178 Z(0x2c), Z(0x27), Z(0x9f), Z(0xa5), Z(0x69), Z(0x44), Z(0xd6), Z(0x23),
179 Z(0x83), Z(0x98), Z(0x7d),
180 Z(0x3c), Z(0xb4), Z(0x2d), Z(0x99), Z(0x1c), Z(0x1f), Z(0x8c), Z(0x20),
181 Z(0x03), Z(0x7c), Z(0x5f),
182 Z(0xad), Z(0xf4), Z(0xfa), Z(0x95), Z(0xca), Z(0x76), Z(0x44), Z(0xcd),
183 Z(0xb6), Z(0xb8), Z(0xa1),
184 Z(0xa1), Z(0xbe), Z(0x9e), Z(0x54), Z(0x8f), Z(0x0b), Z(0x16), Z(0x74),
185 Z(0x31), Z(0x8a), Z(0x23),
186 Z(0x17), Z(0x04), Z(0xfa), Z(0x79), Z(0x84), Z(0xb1), Z(0xf5), Z(0x13),
187 Z(0xab), Z(0xb5), Z(0x2e),
188 Z(0xaa), Z(0x0c), Z(0x60), Z(0x6b), Z(0x5b), Z(0xc4), Z(0x4b), Z(0xbc),
189 Z(0xe2), Z(0xaf), Z(0x45),
190 Z(0x73), Z(0xfa), Z(0xc9), Z(0x49), Z(0xcd), Z(0x00), Z(0x92), Z(0x7d),
191 Z(0x97), Z(0x7a), Z(0x18),
192 Z(0x60), Z(0x3d), Z(0xcf), Z(0x5b), Z(0xde), Z(0xc6), Z(0xe2), Z(0xe6),
193 Z(0xbb), Z(0x8b), Z(0x06),
194 Z(0xda), Z(0x08), Z(0x15), Z(0x1b), Z(0x88), Z(0x6a), Z(0x17), Z(0x89),
195 Z(0xd0), Z(0xa9), Z(0xc1),
196 Z(0xc9), Z(0x70), Z(0x6b), Z(0xe5), Z(0x43), Z(0xf4), Z(0x68), Z(0xc8),
197 Z(0xd3), Z(0x84), Z(0x28),
198 Z(0x0a), Z(0x52), Z(0x66), Z(0xa3), Z(0xca), Z(0xf2), Z(0xe3), Z(0x7f),
199 Z(0x7a), Z(0x31), Z(0xf7),
200 Z(0x88), Z(0x94), Z(0x5e), Z(0x9c), Z(0x63), Z(0xd5), Z(0x24), Z(0x66),
201 Z(0xfc), Z(0xb3), Z(0x57),
202 Z(0x25), Z(0xbe), Z(0x89), Z(0x44), Z(0xc4), Z(0xe0), Z(0x8f), Z(0x23),
203 Z(0x3c), Z(0x12), Z(0x52),
204 Z(0xf5), Z(0x1e), Z(0xf4), Z(0xcb), Z(0x18), Z(0x33), Z(0x1f), Z(0xf8),
205 Z(0x69), Z(0x10), Z(0x9d),
206 Z(0xd3), Z(0xf7), Z(0x28), Z(0xf8), Z(0x30), Z(0x05), Z(0x5e), Z(0x32),
207 Z(0xc0), Z(0xd5), Z(0x19),
208 Z(0xbd), Z(0x45), Z(0x8b), Z(0x5b), Z(0xfd), Z(0xbc), Z(0xe2), Z(0x5c),
209 Z(0xa9), Z(0x96), Z(0xef),
210 Z(0x70), Z(0xcf), Z(0xc2), Z(0x2a), Z(0xb3), Z(0x61), Z(0xad), Z(0x80),
211 Z(0x48), Z(0x81), Z(0xb7),
212 Z(0x1d), Z(0x43), Z(0xd9), Z(0xd7), Z(0x45), Z(0xf0), Z(0xd8), Z(0x8a),
213 Z(0x59), Z(0x7c), Z(0x57),
214 Z(0xc1), Z(0x79), Z(0xc7), Z(0x34), Z(0xd6), Z(0x43), Z(0xdf), Z(0xe4),
215 Z(0x78), Z(0x16), Z(0x06),
216 Z(0xda), Z(0x92), Z(0x76), Z(0x51), Z(0xe1), Z(0xd4), Z(0x70), Z(0x03),
217 Z(0xe0), Z(0x2f), Z(0x96),
218 Z(0x91), Z(0x82), Z(0x80)
222 #define Z(x) NTOH(x << 11)
223 static const afs_uint32 sbox2[256] = {
224 Z(0xf0), Z(0x37), Z(0x24), Z(0x53), Z(0x2a), Z(0x03), Z(0x83), Z(0x86),
225 Z(0xd1), Z(0xec), Z(0x50),
226 Z(0xf0), Z(0x42), Z(0x78), Z(0x2f), Z(0x6d), Z(0xbf), Z(0x80), Z(0x87),
227 Z(0x27), Z(0x95), Z(0xe2),
228 Z(0xc5), Z(0x5d), Z(0xf9), Z(0x6f), Z(0xdb), Z(0xb4), Z(0x65), Z(0x6e),
229 Z(0xe7), Z(0x24), Z(0xc8),
230 Z(0x1a), Z(0xbb), Z(0x49), Z(0xb5), Z(0x0a), Z(0x7d), Z(0xb9), Z(0xe8),
231 Z(0xdc), Z(0xb7), Z(0xd9),
232 Z(0x45), Z(0x20), Z(0x1b), Z(0xce), Z(0x59), Z(0x9d), Z(0x6b), Z(0xbd),
233 Z(0x0e), Z(0x8f), Z(0xa3),
234 Z(0xa9), Z(0xbc), Z(0x74), Z(0xa6), Z(0xf6), Z(0x7f), Z(0x5f), Z(0xb1),
235 Z(0x68), Z(0x84), Z(0xbc),
236 Z(0xa9), Z(0xfd), Z(0x55), Z(0x50), Z(0xe9), Z(0xb6), Z(0x13), Z(0x5e),
237 Z(0x07), Z(0xb8), Z(0x95),
238 Z(0x02), Z(0xc0), Z(0xd0), Z(0x6a), Z(0x1a), Z(0x85), Z(0xbd), Z(0xb6),
239 Z(0xfd), Z(0xfe), Z(0x17),
240 Z(0x3f), Z(0x09), Z(0xa3), Z(0x8d), Z(0xfb), Z(0xed), Z(0xda), Z(0x1d),
241 Z(0x6d), Z(0x1c), Z(0x6c),
242 Z(0x01), Z(0x5a), Z(0xe5), Z(0x71), Z(0x3e), Z(0x8b), Z(0x6b), Z(0xbe),
243 Z(0x29), Z(0xeb), Z(0x12),
244 Z(0x19), Z(0x34), Z(0xcd), Z(0xb3), Z(0xbd), Z(0x35), Z(0xea), Z(0x4b),
245 Z(0xd5), Z(0xae), Z(0x2a),
246 Z(0x79), Z(0x5a), Z(0xa5), Z(0x32), Z(0x12), Z(0x7b), Z(0xdc), Z(0x2c),
247 Z(0xd0), Z(0x22), Z(0x4b),
248 Z(0xb1), Z(0x85), Z(0x59), Z(0x80), Z(0xc0), Z(0x30), Z(0x9f), Z(0x73),
249 Z(0xd3), Z(0x14), Z(0x48),
250 Z(0x40), Z(0x07), Z(0x2d), Z(0x8f), Z(0x80), Z(0x0f), Z(0xce), Z(0x0b),
251 Z(0x5e), Z(0xb7), Z(0x5e),
252 Z(0xac), Z(0x24), Z(0x94), Z(0x4a), Z(0x18), Z(0x15), Z(0x05), Z(0xe8),
253 Z(0x02), Z(0x77), Z(0xa9),
254 Z(0xc7), Z(0x40), Z(0x45), Z(0x89), Z(0xd1), Z(0xea), Z(0xde), Z(0x0c),
255 Z(0x79), Z(0x2a), Z(0x99),
256 Z(0x6c), Z(0x3e), Z(0x95), Z(0xdd), Z(0x8c), Z(0x7d), Z(0xad), Z(0x6f),
257 Z(0xdc), Z(0xff), Z(0xfd),
258 Z(0x62), Z(0x47), Z(0xb3), Z(0x21), Z(0x8a), Z(0xec), Z(0x8e), Z(0x19),
259 Z(0x18), Z(0xb4), Z(0x6e),
260 Z(0x3d), Z(0xfd), Z(0x74), Z(0x54), Z(0x1e), Z(0x04), Z(0x85), Z(0xd8),
261 Z(0xbc), Z(0x1f), Z(0x56),
262 Z(0xe7), Z(0x3a), Z(0x56), Z(0x67), Z(0xd6), Z(0xc8), Z(0xa5), Z(0xf3),
263 Z(0x8e), Z(0xde), Z(0xae),
264 Z(0x37), Z(0x49), Z(0xb7), Z(0xfa), Z(0xc8), Z(0xf4), Z(0x1f), Z(0xe0),
265 Z(0x2a), Z(0x9b), Z(0x15),
266 Z(0xd1), Z(0x34), Z(0x0e), Z(0xb5), Z(0xe0), Z(0x44), Z(0x78), Z(0x84),
267 Z(0x59), Z(0x56), Z(0x68),
268 Z(0x77), Z(0xa5), Z(0x14), Z(0x06), Z(0xf5), Z(0x2f), Z(0x8c), Z(0x8a),
269 Z(0x73), Z(0x80), Z(0x76),
270 Z(0xb4), Z(0x10), Z(0x86)
274 #define Z(x) NTOH(x << 19)
275 static const afs_uint32 sbox3[256] = {
276 Z(0xa9), Z(0x2a), Z(0x48), Z(0x51), Z(0x84), Z(0x7e), Z(0x49), Z(0xe2),
277 Z(0xb5), Z(0xb7), Z(0x42),
278 Z(0x33), Z(0x7d), Z(0x5d), Z(0xa6), Z(0x12), Z(0x44), Z(0x48), Z(0x6d),
279 Z(0x28), Z(0xaa), Z(0x20),
280 Z(0x6d), Z(0x57), Z(0xd6), Z(0x6b), Z(0x5d), Z(0x72), Z(0xf0), Z(0x92),
281 Z(0x5a), Z(0x1b), Z(0x53),
282 Z(0x80), Z(0x24), Z(0x70), Z(0x9a), Z(0xcc), Z(0xa7), Z(0x66), Z(0xa1),
283 Z(0x01), Z(0xa5), Z(0x41),
284 Z(0x97), Z(0x41), Z(0x31), Z(0x82), Z(0xf1), Z(0x14), Z(0xcf), Z(0x53),
285 Z(0x0d), Z(0xa0), Z(0x10),
286 Z(0xcc), Z(0x2a), Z(0x7d), Z(0xd2), Z(0xbf), Z(0x4b), Z(0x1a), Z(0xdb),
287 Z(0x16), Z(0x47), Z(0xf6),
288 Z(0x51), Z(0x36), Z(0xed), Z(0xf3), Z(0xb9), Z(0x1a), Z(0xa7), Z(0xdf),
289 Z(0x29), Z(0x43), Z(0x01),
290 Z(0x54), Z(0x70), Z(0xa4), Z(0xbf), Z(0xd4), Z(0x0b), Z(0x53), Z(0x44),
291 Z(0x60), Z(0x9e), Z(0x23),
292 Z(0xa1), Z(0x18), Z(0x68), Z(0x4f), Z(0xf0), Z(0x2f), Z(0x82), Z(0xc2),
293 Z(0x2a), Z(0x41), Z(0xb2),
294 Z(0x42), Z(0x0c), Z(0xed), Z(0x0c), Z(0x1d), Z(0x13), Z(0x3a), Z(0x3c),
295 Z(0x6e), Z(0x35), Z(0xdc),
296 Z(0x60), Z(0x65), Z(0x85), Z(0xe9), Z(0x64), Z(0x02), Z(0x9a), Z(0x3f),
297 Z(0x9f), Z(0x87), Z(0x96),
298 Z(0xdf), Z(0xbe), Z(0xf2), Z(0xcb), Z(0xe5), Z(0x6c), Z(0xd4), Z(0x5a),
299 Z(0x83), Z(0xbf), Z(0x92),
300 Z(0x1b), Z(0x94), Z(0x00), Z(0x42), Z(0xcf), Z(0x4b), Z(0x00), Z(0x75),
301 Z(0xba), Z(0x8f), Z(0x76),
302 Z(0x5f), Z(0x5d), Z(0x3a), Z(0x4d), Z(0x09), Z(0x12), Z(0x08), Z(0x38),
303 Z(0x95), Z(0x17), Z(0xe4),
304 Z(0x01), Z(0x1d), Z(0x4c), Z(0xa9), Z(0xcc), Z(0x85), Z(0x82), Z(0x4c),
305 Z(0x9d), Z(0x2f), Z(0x3b),
306 Z(0x66), Z(0xa1), Z(0x34), Z(0x10), Z(0xcd), Z(0x59), Z(0x89), Z(0xa5),
307 Z(0x31), Z(0xcf), Z(0x05),
308 Z(0xc8), Z(0x84), Z(0xfa), Z(0xc7), Z(0xba), Z(0x4e), Z(0x8b), Z(0x1a),
309 Z(0x19), Z(0xf1), Z(0xa1),
310 Z(0x3b), Z(0x18), Z(0x12), Z(0x17), Z(0xb0), Z(0x98), Z(0x8d), Z(0x0b),
311 Z(0x23), Z(0xc3), Z(0x3a),
312 Z(0x2d), Z(0x20), Z(0xdf), Z(0x13), Z(0xa0), Z(0xa8), Z(0x4c), Z(0x0d),
313 Z(0x6c), Z(0x2f), Z(0x47),
314 Z(0x13), Z(0x13), Z(0x52), Z(0x1f), Z(0x2d), Z(0xf5), Z(0x79), Z(0x3d),
315 Z(0xa2), Z(0x54), Z(0xbd),
316 Z(0x69), Z(0xc8), Z(0x6b), Z(0xf3), Z(0x05), Z(0x28), Z(0xf1), Z(0x16),
317 Z(0x46), Z(0x40), Z(0xb0),
318 Z(0x11), Z(0xd3), Z(0xb7), Z(0x95), Z(0x49), Z(0xcf), Z(0xc3), Z(0x1d),
319 Z(0x8f), Z(0xd8), Z(0xe1),
320 Z(0x73), Z(0xdb), Z(0xad), Z(0xc8), Z(0xc9), Z(0xa9), Z(0xa1), Z(0xc2),
321 Z(0xc5), Z(0xe3), Z(0xba),
322 Z(0xfc), Z(0x0e), Z(0x25)
326 * This is a 16 round Feistel network with permutation F_ENCRYPT
329 #define F_ENCRYPT(R, L, sched) { \
330 union lc4 { afs_uint32 l; unsigned char c[4]; } un; \
332 L ^= sbox0[un.c[0]] ^ sbox1[un.c[1]] ^ sbox2[un.c[2]] ^ sbox3[un.c[3]]; }
334 #ifndef WORDS_BIGENDIAN
335 /* BEWARE: this code is endian dependent.
336 * This should really be inline assembler on the x86.
339 #define FF(y, shiftN) (((y) >> shiftN) & 0xFF)
340 #define F_ENCRYPT(R, L, sched) { \
343 L ^= sbox0[FF(un, 0)] ^ sbox1[FF(un, 8)] ^ sbox2[FF(un, 16)] ^ sbox3[FF(un, 24)];}
347 fc_ecb_enc(afs_uint32 l, afs_uint32 r, afs_uint32 out[2],
348 const afs_int32 sched[MAXROUNDS])
350 #if !defined(UNROLL_LOOPS)
353 for (i = 0; i < (MAXROUNDS / 4); i++) {
354 F_ENCRYPT(r, l, *sched++);
355 F_ENCRYPT(l, r, *sched++);
356 F_ENCRYPT(r, l, *sched++);
357 F_ENCRYPT(l, r, *sched++);
361 F_ENCRYPT(r, l, *sched++);
362 F_ENCRYPT(l, r, *sched++);
363 F_ENCRYPT(r, l, *sched++);
364 F_ENCRYPT(l, r, *sched++);
365 F_ENCRYPT(r, l, *sched++);
366 F_ENCRYPT(l, r, *sched++);
367 F_ENCRYPT(r, l, *sched++);
368 F_ENCRYPT(l, r, *sched++);
369 F_ENCRYPT(r, l, *sched++);
370 F_ENCRYPT(l, r, *sched++);
371 F_ENCRYPT(r, l, *sched++);
372 F_ENCRYPT(l, r, *sched++);
373 F_ENCRYPT(r, l, *sched++);
374 F_ENCRYPT(l, r, *sched++);
375 F_ENCRYPT(r, l, *sched++);
376 F_ENCRYPT(l, r, *sched++);
377 #endif /* UNROLL_LOOPS */
384 fc_ecb_dec(afs_uint32 l, afs_uint32 r, afs_uint32 out[2],
385 const afs_int32 sched[MAXROUNDS])
387 sched = &sched[MAXROUNDS - 1];
389 #if !defined(UNROLL_LOOPS)
392 for (i = 0; i < (MAXROUNDS / 4); i++) {
393 F_ENCRYPT(l, r, *sched--);
394 F_ENCRYPT(r, l, *sched--);
395 F_ENCRYPT(l, r, *sched--);
396 F_ENCRYPT(r, l, *sched--);
400 F_ENCRYPT(l, r, *sched--);
401 F_ENCRYPT(r, l, *sched--);
402 F_ENCRYPT(l, r, *sched--);
403 F_ENCRYPT(r, l, *sched--);
404 F_ENCRYPT(l, r, *sched--);
405 F_ENCRYPT(r, l, *sched--);
406 F_ENCRYPT(l, r, *sched--);
407 F_ENCRYPT(r, l, *sched--);
408 F_ENCRYPT(l, r, *sched--);
409 F_ENCRYPT(r, l, *sched--);
410 F_ENCRYPT(l, r, *sched--);
411 F_ENCRYPT(r, l, *sched--);
412 F_ENCRYPT(l, r, *sched--);
413 F_ENCRYPT(r, l, *sched--);
414 F_ENCRYPT(l, r, *sched--);
415 F_ENCRYPT(r, l, *sched--);
416 #endif /* UNROLL_LOOPS */
423 fc_cbc_enc(const afs_uint32 * in, afs_uint32 * out, afs_int32 length,
424 const afs_int32 sched[MAXROUNDS], afs_uint32 * iv)
426 afs_int32 xor0 = iv[0], xor1 = iv[1];
428 for (; length > 0; length -= 8) {
430 /* If length < 8 we read to much, usally ok */
433 fc_ecb_enc(xor0, xor1, b8, sched);
434 xor0 = in[0] ^ b8[0];
435 xor1 = in[1] ^ b8[1];
437 /* Out is always a multiple of 8 */
447 fc_cbc_dec(const afs_uint32 * in, afs_uint32 * out, afs_int32 length,
448 const afs_int32 sched[MAXROUNDS], afs_uint32 * iv)
450 afs_int32 xor0 = iv[0], xor1 = iv[1];
452 for (; length > 0; length -= 8) {
454 /* In is always a multiple of 8 */
455 fc_ecb_dec(in[0], in[1], b8, sched);
458 xor0 = in[0] ^ b8[0];
459 xor1 = in[1] ^ b8[1];
461 /* If length < 8 we write to much, this is not always ok */
471 fc_ecb_encrypt(afs_uint32 * in, afs_uint32 * out, fc_KeySchedule sched,
474 INC_RXKAD_STATS(fc_encrypts[encrypt]);
476 fc_ecb_enc(in[0], in[1], out, sched);
478 fc_ecb_dec(in[0], in[1], out, sched);
483 fc_cbc_encrypt(afs_uint32 * in, afs_uint32 * out, afs_int32 length,
484 fc_KeySchedule sched, afs_uint32 * iv, int encrypt)
487 fc_cbc_enc(in, out, length, sched, iv);
489 fc_cbc_dec(in, out, length, sched, iv);
493 /* Rotate two 32 bit numbers as a 56 bit number */
494 #define ROT56R(hi, lo, n) { \
495 afs_uint32 t = lo & ((1<<n)-1); \
496 lo = (lo >> n) | ((hi & ((1<<n)-1)) << (32-n)); \
497 hi = (hi >> n) | (t << (24-n)); }
499 /* Rotate one 64 bit number as a 56 bit number */
500 #define ROT56R64(k, n) { \
501 k = (k >> n) | ((k & ((1<<n) - 1)) << (56-n)); }
504 * Generate a key schedule from key, the least significant bit in each
505 * key byte is parity and shall be ignored. This leaves 56 significant
506 * bits in the key to scatter over the 16 key schedules. For each
507 * schedule extract the low order 32 bits and use as schedule, then
508 * rotate right by 11 bits.
510 * Note that this fc_keysched() generates a schedule in natural byte
511 * order, the Transarc function does not. Therefore it's *not*
512 * possible to mix fc_keysched, fc_ecb_encrypt and fc_cbc_encrypt
513 * from different implementations. Keep them in the same module!
516 fc_keysched(void *key_, fc_KeySchedule sched)
518 const unsigned char *key = key_;
520 /* Do we have 56 bit longs or even longer longs? */
521 afs_uint64 k; /* k holds all 56 non parity bits */
523 /* Compress out parity bits */
540 /* Use lower 32 bits for schedule, rotate by 11 each round (16 times) */
541 *sched++ = EFF_NTOHL((afs_uint32) k);
543 *sched++ = EFF_NTOHL((afs_uint32) k);
545 *sched++ = EFF_NTOHL((afs_uint32) k);
547 *sched++ = EFF_NTOHL((afs_uint32) k);
550 *sched++ = EFF_NTOHL((afs_uint32) k);
552 *sched++ = EFF_NTOHL((afs_uint32) k);
554 *sched++ = EFF_NTOHL((afs_uint32) k);
556 *sched++ = EFF_NTOHL((afs_uint32) k);
559 *sched++ = EFF_NTOHL((afs_uint32) k);
561 *sched++ = EFF_NTOHL((afs_uint32) k);
563 *sched++ = EFF_NTOHL((afs_uint32) k);
565 *sched++ = EFF_NTOHL((afs_uint32) k);
568 *sched++ = EFF_NTOHL((afs_uint32) k);
570 *sched++ = EFF_NTOHL((afs_uint32) k);
572 *sched++ = EFF_NTOHL((afs_uint32) k);
574 *sched++ = EFF_NTOHL((afs_uint32) k);
576 INC_RXKAD_STATS(fc_key_scheds);
581 * Encryption/decryption of Rx packets is pretty straight forward. Run
582 * fc_cbc_encrypt over the packet fragments until len bytes have been
583 * processed. Skip the Rx packet header but not the security header.
586 rxkad_EncryptPacket(const struct rx_connection * rx_connection_not_used,
587 const fc_KeySchedule * sched, const afs_uint32 * iv,
588 int len, struct rx_packet * packet)
592 struct rx_securityClass *obj;
593 struct rxkad_cprivate *tp; /* s & c have type at same offset */
595 obj = rx_SecurityObjectOf(rx_connection_not_used);
596 tp = (struct rxkad_cprivate *)obj->privateData;
597 ADD_RXKAD_STATS(bytesEncrypted[rxkad_TypeIndex(tp->type)],len);
599 /* What is this good for?
600 * It turns out that the security header for auth_enc is of
601 * size 8 bytes and the last 4 bytes are defined to be 0!
603 afs_uint32 *t = (afs_uint32 *) packet->wirevec[1].iov_base;
607 memcpy(ivec, iv, sizeof(ivec)); /* Must use copy of iv */
608 for (frag = &packet->wirevec[1]; len; frag++) {
609 int ilen = frag->iov_len;
610 afs_uint32 *ibas = (afs_uint32 *) frag->iov_base;
612 return RXKADDATALEN; /* Length mismatch */
614 ilen = len; /* Don't process to much data */
615 fc_cbc_enc(ibas, ibas, ilen, sched, ivec);
622 rxkad_DecryptPacket(const struct rx_connection * rx_connection_not_used,
623 const fc_KeySchedule * sched, const afs_uint32 * iv,
624 int len, struct rx_packet * packet)
628 struct rx_securityClass *obj;
629 struct rxkad_cprivate *tp; /* s & c have type at same offset */
631 obj = rx_SecurityObjectOf(rx_connection_not_used);
632 tp = (struct rxkad_cprivate *)obj->privateData;
633 ADD_RXKAD_STATS(bytesDecrypted[rxkad_TypeIndex(tp->type)],len);
634 memcpy(ivec, iv, sizeof(ivec)); /* Must use copy of iv */
635 for (frag = &packet->wirevec[1]; len > 0; frag++) {
636 int ilen = frag->iov_len;
637 afs_uint32 *ibas = (afs_uint32 *) frag->iov_base;
639 return RXKADDATALEN; /* Length mismatch */
641 ilen = len; /* Don't process to much data */
642 fc_cbc_dec(ibas, ibas, ilen, sched, ivec);
648 #if defined(TEST) || defined(TEST_KERNEL)
650 * It is possible to link with the client kernel libafs.a to verify
651 * the test case. Use TEST_KERNEL to get the mangled names.
654 const char the_quick[] = "The quick brown fox jumps over the lazy dogs.\0\0";
656 const unsigned char key1[8] =
657 { 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87 };
658 const char ciph1[] = {
659 0x00, 0xf0, 0xe, 0x11, 0x75, 0xe6, 0x23, 0x82, 0xee, 0xac, 0x98, 0x62,
660 0x44, 0x51, 0xe4, 0x84, 0xc3, 0x59, 0xd8, 0xaa, 0x64, 0x60, 0xae, 0xf7,
661 0xd2, 0xd9, 0x13, 0x79, 0x72, 0xa3, 0x45, 0x03, 0x23, 0xb5, 0x62, 0xd7,
662 0xc, 0xf5, 0x27, 0xd1, 0xf8, 0x91, 0x3c, 0xac, 0x44, 0x22, 0x92, 0xef
665 const unsigned char key2[8] =
666 { 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10 };
667 const char ciph2[] = {
668 0xca, 0x90, 0xf5, 0x9d, 0xcb, 0xd4, 0xd2, 0x3c, 0x01, 0x88, 0x7f, 0x3e,
669 0x31, 0x6e, 0x62, 0x9d, 0xd8, 0xe0, 0x57, 0xa3, 0x06, 0x3a, 0x42, 0x58,
670 0x2a, 0x28, 0xfe, 0x72, 0x52, 0x2f, 0xdd, 0xe0, 0x19, 0x89, 0x09, 0x1c,
671 0x2a, 0x8e, 0x8c, 0x94, 0xfc, 0xc7, 0x68, 0xe4, 0x88, 0xaa, 0xde, 0x0f
675 #define fc_keysched _afs_QTKrFdpoFL
676 #define fc_ecb_encrypt _afs_sDLThwNLok
677 #define fc_cbc_encrypt _afs_fkyCWTvfRS
678 #define rxkad_DecryptPacket _afs_SRWEeqTXrS
679 #define rxkad_EncryptPacket _afs_bpwQbdoghO
691 afs_int32 sched[MAXROUNDS];
692 char ciph[100], clear[100], tmp[100];
695 struct rx_packet packet;
697 if (sizeof(afs_int32) != 4)
698 fprintf(stderr, "error: sizeof(afs_int32) != 4\n");
699 if (sizeof(afs_uint32) != 4)
700 fprintf(stderr, "error: sizeof(afs_uint32) != 4\n");
703 * Use key1 and key2 as iv */
704 fc_keysched(key1, sched);
705 memcpy(iv, key2, sizeof(iv));
706 fc_cbc_encrypt(the_quick, ciph, sizeof(the_quick), sched, iv, ENCRYPT);
707 if (memcmp(ciph1, ciph, sizeof(ciph1)) != 0)
708 fprintf(stderr, "encrypt FAILED\n");
709 memcpy(iv, key2, sizeof(iv));
710 fc_cbc_encrypt(ciph, clear, sizeof(the_quick), sched, iv, DECRYPT);
711 if (strcmp(the_quick, clear) != 0)
712 fprintf(stderr, "crypt decrypt FAILED\n");
715 * Use key2 and key1 as iv
717 fc_keysched(key2, sched);
718 memcpy(iv, key1, sizeof(iv));
719 fc_cbc_encrypt(the_quick, ciph, sizeof(the_quick), sched, iv, ENCRYPT);
720 if (memcmp(ciph2, ciph, sizeof(ciph2)) != 0)
721 fprintf(stderr, "encrypt FAILED\n");
722 memcpy(iv, key1, sizeof(iv));
723 fc_cbc_encrypt(ciph, clear, sizeof(the_quick), sched, iv, DECRYPT);
724 if (strcmp(the_quick, clear) != 0)
725 fprintf(stderr, "crypt decrypt FAILED\n");
728 * Test Encrypt- and Decrypt-Packet, use key1 and key2 as iv
730 fc_keysched(key1, sched);
731 memcpy(iv, key2, sizeof(iv));
732 strcpy(clear, the_quick);
733 packet.wirevec[1].iov_base = clear;
734 packet.wirevec[1].iov_len = sizeof(the_quick);
735 packet.wirevec[2].iov_len = 0;
737 /* For unknown reasons bytes 4-7 are zeroed in rxkad_EncryptPacket */
738 rxkad_EncryptPacket(tmp, sched, iv, sizeof(the_quick), &packet);
739 rxkad_DecryptPacket(tmp, sched, iv, sizeof(the_quick), &packet);
744 if (strcmp(the_quick, clear) != 0)
745 fprintf(stderr, "rxkad_EncryptPacket/rxkad_DecryptPacket FAILED\n");
748 struct timeval start, stop;
751 fc_keysched(key1, sched);
752 gettimeofday(&start, NULL);
753 for (i = 0; i < 1000000; i++)
754 fc_keysched(key1, sched);
755 gettimeofday(&stop, NULL);
756 printf("fc_keysched = %2.2f us\n",
757 (stop.tv_sec - start.tv_sec +
758 (stop.tv_usec - start.tv_usec) / 1e6) * 1);
760 fc_ecb_encrypt(data, data, sched, ENCRYPT);
761 gettimeofday(&start, NULL);
762 for (i = 0; i < 1000000; i++)
763 fc_ecb_encrypt(data, data, sched, ENCRYPT);
764 gettimeofday(&stop, 0);
765 printf("fc_ecb_encrypt = %2.2f us\n",
766 (stop.tv_sec - start.tv_sec +
767 (stop.tv_usec - start.tv_usec) / 1e6) * 1);
769 fc_cbc_encrypt(the_quick, ciph, sizeof(the_quick), sched, iv,
771 gettimeofday(&start, NULL);
772 for (i = 0; i < 100000; i++)
773 fc_cbc_encrypt(the_quick, ciph, sizeof(the_quick), sched, iv,
775 gettimeofday(&stop, NULL);
776 printf("fc_cbc_encrypt = %2.2f us\n",
777 (stop.tv_sec - start.tv_sec +
778 (stop.tv_usec - start.tv_usec) / 1e6) * 10);