00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #ifndef GF128MUL_H
00036 #define GF128MUL_H
00037
00038 #include <stdlib.h>
00039 #include <string.h>
00040
00041 #include "mode_hdr.h"
00042
00043
00044
00045
00046
00047
00048 #if 0
00049 # define TABLES_64K
00050 #endif
00051 #if 1
00052 # define TABLES_8K
00053 #endif
00054 #if 0
00055 # define TABLES_4K
00056 #endif
00057 #if 0
00058 # define TABLES_256
00059 #endif
00060
00061
00062
00063
00064
00065 #if 0
00066 # define USE_INLINES
00067 #endif
00068
00069
00070
00071
00072 #if 0
00073 # define UNROLL_LOOPS
00074 #endif
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104 #define GF_BYTE_LEN 16
00105
00106 #if defined( USE_INLINES )
00107 # if defined( _MSC_VER )
00108 # define gf_inline __inline
00109 # elif defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
00110 # define gf_inline static inline
00111 # else
00112 # define gf_inline static
00113 # endif
00114 #endif
00115
00116 #if defined(__cplusplus)
00117 extern "C"
00118 {
00119 #endif
00120
00121
00122
00123
00124
00125
00126 extern const unsigned short gf_tab[256];
00127
00128 #if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
00129
00130
00131
00132
00133
00134
00135 #if 0
00136
00137
00138
00139
00140
00141
00142 gf_inline void mul_x(void *r, const void *x)
00143 { uint_32t _tt;
00144 bswap32_block(r, x, 4);
00145 _tt = gf_tab[(ui32_ptr(r)[3] << 7) & 0xff];
00146 ui32_ptr(r)[3] = (ui32_ptr(r)[3] >> 1) | (ui32_ptr(r)[2] << 31);
00147 ui32_ptr(r)[2] = (ui32_ptr(r)[2] >> 1) | (ui32_ptr(r)[1] << 31);
00148 ui32_ptr(r)[1] = (ui32_ptr(r)[1] >> 1) | (ui32_ptr(r)[0] << 31);
00149 ui32_ptr(r)[0] = (ui32_ptr(r)[0] >> 1) ^ bswap_32(_tt);
00150 bswap32_block(r, r, 4);
00151 }
00152
00153 #endif
00154
00155 #define VERSION_1
00156
00157 #define MSK_80 (0x80 * (unit_cast(BFR_UNIT,-1) / 0xff))
00158 #define MSK_F0 (0xf0 * (unit_cast(BFR_UNIT,-1) / 0xff))
00159
00160 #if defined( USE_INLINES )
00161
00162 #if BFR_UNIT == 64
00163
00164 gf_inline void mul_x(void *r, const void *x)
00165 { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] >> 49) & MSK_80];
00166
00167 ui64_ptr(r)[1] = (ui64_ptr(x)[1] >> 1) & ~MSK_80 | ((ui64_ptr(x)[1] << 15) | (ui64_ptr(x)[0] >> 49)) & MSK_80;
00168 ui64_ptr(r)[0] = ((ui64_ptr(x)[0] >> 1) & ~MSK_80 | (ui64_ptr(x)[0] << 15) & MSK_80) ^ _tt;
00169 }
00170
00171 #if defined( VERSION_1 )
00172
00173 gf_inline void mul_x4(void *x)
00174 { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] >> 52) & MSK_F0];
00175
00176 ui64_ptr(x)[1] = (ui64_ptr(x)[1] >> 4) & ~MSK_F0 | ((ui64_ptr(x)[1] << 12) | (ui64_ptr(x)[0] >> 52)) & MSK_F0;
00177 ui64_ptr(x)[0] = ((ui64_ptr(x)[0] >> 4) & ~MSK_F0 | (ui64_ptr(x)[0] << 12) & MSK_F0) ^ _tt;
00178 }
00179
00180 #else
00181
00182 gf_inline void mul_x4(void *x)
00183 { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] >> 52) & 0xf0];
00184 bswap64_block(x, x, 2);
00185 ui64_ptr(x)[1] = bswap_64((ui64_ptr(x)[1] >> 4) | (ui64_ptr(x)[0] << 60));
00186 ui64_ptr(x)[0] = bswap_64((ui64_ptr(x)[0] >> 4)) ^ _tt;
00187 }
00188
00189 #endif
00190
00191 gf_inline void mul_x8(void *x)
00192 { uint_64t _tt = gf_tab[ui64_ptr(x)[1] >> 56];
00193 ui64_ptr(x)[1] = (ui64_ptr(x)[1] << 8) | (ui64_ptr(x)[0] >> 56);
00194 ui64_ptr(x)[0] = (ui64_ptr(x)[0] << 8) ^ _tt;
00195 }
00196
00197 #elif BFR_UNIT == 32
00198
00199 gf_inline void mul_x(void *r, const void *x)
00200 { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] >> 17) & MSK_80];
00201
00202 ui32_ptr(r)[3] = (ui32_ptr(x)[3] >> 1) & ~MSK_80 | ((ui32_ptr(x)[3] << 15) | (ui32_ptr(x)[2] >> 17)) & MSK_80;
00203 ui32_ptr(r)[2] = (ui32_ptr(x)[2] >> 1) & ~MSK_80 | ((ui32_ptr(x)[2] << 15) | (ui32_ptr(x)[1] >> 17)) & MSK_80;
00204 ui32_ptr(r)[1] = (ui32_ptr(x)[1] >> 1) & ~MSK_80 | ((ui32_ptr(x)[1] << 15) | (ui32_ptr(x)[0] >> 17)) & MSK_80;
00205 ui32_ptr(r)[0] = ((ui32_ptr(x)[0] >> 1) & ~MSK_80 | (ui32_ptr(x)[0] << 15) & MSK_80) ^ _tt;
00206 }
00207
00208 #if defined( VERSION_1 )
00209
00210 gf_inline void mul_x4(void *x)
00211 { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] >> 20) & MSK_F0];
00212
00213 ui32_ptr(x)[3] = (ui32_ptr(x)[3] >> 4) & ~MSK_F0 | ((ui32_ptr(x)[3] << 12) | (ui32_ptr(x)[2] >> 20)) & MSK_F0;
00214 ui32_ptr(x)[2] = (ui32_ptr(x)[2] >> 4) & ~MSK_F0 | ((ui32_ptr(x)[2] << 12) | (ui32_ptr(x)[1] >> 20)) & MSK_F0;
00215 ui32_ptr(x)[1] = (ui32_ptr(x)[1] >> 4) & ~MSK_F0 | ((ui32_ptr(x)[1] << 12) | (ui32_ptr(x)[0] >> 20)) & MSK_F0;
00216 ui32_ptr(x)[0] = ((ui32_ptr(x)[0] >> 4) & ~MSK_F0 | (ui32_ptr(x)[0] << 12) & MSK_F0) ^ _tt;
00217 }
00218
00219 #else
00220
00221 gf_inline void mul_x4(void *x)
00222 { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] >> 20) & 0xf0];
00223 bswap32_block(x, x, 4);
00224 ui32_ptr(x)[3] = bswap_32((ui32_ptr(x)[3] >> 4) | (ui32_ptr(x)[2] << 28));
00225 ui32_ptr(x)[2] = bswap_32((ui32_ptr(x)[2] >> 4) | (ui32_ptr(x)[1] << 28));
00226 ui32_ptr(x)[1] = bswap_32((ui32_ptr(x)[1] >> 4) | (ui32_ptr(x)[0] << 28));
00227 ui32_ptr(x)[0] = bswap_32((ui32_ptr(x)[0] >> 4)) ^ _tt;
00228 }
00229
00230 #endif
00231
00232 gf_inline void mul_x8(void *x)
00233 { uint_32t _tt = gf_tab[ui32_ptr(x)[3] >> 24];
00234
00235 ui32_ptr(x)[3] = (ui32_ptr(x)[3] << 8) | (ui32_ptr(x)[2] >> 24);
00236 ui32_ptr(x)[2] = (ui32_ptr(x)[2] << 8) | (ui32_ptr(x)[1] >> 24);
00237 ui32_ptr(x)[1] = (ui32_ptr(x)[1] << 8) | (ui32_ptr(x)[0] >> 24);
00238 ui32_ptr(x)[0] = (ui32_ptr(x)[0] << 8) ^ _tt;
00239 }
00240
00241 #else
00242
00243 gf_inline void mul_x(void *r, const void *x)
00244 { uint_8t _tt = ui8_ptr(x)[15] & 1;
00245 ui8_ptr(r)[15] = (ui8_ptr(x)[15] >> 1) | (ui8_ptr(x)[14] << 7);
00246 ui8_ptr(r)[14] = (ui8_ptr(x)[14] >> 1) | (ui8_ptr(x)[13] << 7);
00247 ui8_ptr(r)[13] = (ui8_ptr(x)[13] >> 1) | (ui8_ptr(x)[12] << 7);
00248 ui8_ptr(r)[12] = (ui8_ptr(x)[12] >> 1) | (ui8_ptr(x)[11] << 7);
00249 ui8_ptr(r)[11] = (ui8_ptr(x)[11] >> 1) | (ui8_ptr(x)[10] << 7);
00250 ui8_ptr(r)[10] = (ui8_ptr(x)[10] >> 1) | (ui8_ptr(x)[ 9] << 7);
00251 ui8_ptr(r)[ 9] = (ui8_ptr(x)[ 9] >> 1) | (ui8_ptr(x)[ 8] << 7);
00252 ui8_ptr(r)[ 8] = (ui8_ptr(x)[ 8] >> 1) | (ui8_ptr(x)[ 7] << 7);
00253 ui8_ptr(r)[ 7] = (ui8_ptr(x)[ 7] >> 1) | (ui8_ptr(x)[ 6] << 7);
00254 ui8_ptr(r)[ 6] = (ui8_ptr(x)[ 6] >> 1) | (ui8_ptr(x)[ 5] << 7);
00255 ui8_ptr(r)[ 5] = (ui8_ptr(x)[ 5] >> 1) | (ui8_ptr(x)[ 4] << 7);
00256 ui8_ptr(r)[ 4] = (ui8_ptr(x)[ 4] >> 1) | (ui8_ptr(x)[ 3] << 7);
00257 ui8_ptr(r)[ 3] = (ui8_ptr(x)[ 3] >> 1) | (ui8_ptr(x)[ 2] << 7);
00258 ui8_ptr(r)[ 2] = (ui8_ptr(x)[ 2] >> 1) | (ui8_ptr(x)[ 1] << 7);
00259 ui8_ptr(r)[ 1] = (ui8_ptr(x)[ 1] >> 1) | (ui8_ptr(x)[ 0] << 7);
00260 ui8_ptr(r)[ 0] = (ui8_ptr(x)[ 0] >> 1) ^ (_tt ? 0xe1 : 0x00);
00261 }
00262
00263 gf_inline void mul_x4(void *x)
00264 { uint_16t _tt = gf_tab[(ui8_ptr(x)[15] << 4) & 0xff];
00265 ui8_ptr(x)[15] = (ui8_ptr(x)[15] >> 4) | (ui8_ptr(x)[14] << 4);
00266 ui8_ptr(x)[14] = (ui8_ptr(x)[14] >> 4) | (ui8_ptr(x)[13] << 4);
00267 ui8_ptr(x)[13] = (ui8_ptr(x)[13] >> 4) | (ui8_ptr(x)[12] << 4);
00268 ui8_ptr(x)[12] = (ui8_ptr(x)[12] >> 4) | (ui8_ptr(x)[11] << 4);
00269 ui8_ptr(x)[11] = (ui8_ptr(x)[11] >> 4) | (ui8_ptr(x)[10] << 4);
00270 ui8_ptr(x)[10] = (ui8_ptr(x)[10] >> 4) | (ui8_ptr(x)[ 9] << 4);
00271 ui8_ptr(x)[ 9] = (ui8_ptr(x)[ 9] >> 4) | (ui8_ptr(x)[ 8] << 4);
00272 ui8_ptr(x)[ 8] = (ui8_ptr(x)[ 8] >> 4) | (ui8_ptr(x)[ 7] << 4);
00273 ui8_ptr(x)[ 7] = (ui8_ptr(x)[ 7] >> 4) | (ui8_ptr(x)[ 6] << 4);
00274 ui8_ptr(x)[ 6] = (ui8_ptr(x)[ 6] >> 4) | (ui8_ptr(x)[ 5] << 4);
00275 ui8_ptr(x)[ 5] = (ui8_ptr(x)[ 5] >> 4) | (ui8_ptr(x)[ 4] << 4);
00276 ui8_ptr(x)[ 4] = (ui8_ptr(x)[ 4] >> 4) | (ui8_ptr(x)[ 3] << 4);
00277 ui8_ptr(x)[ 3] = (ui8_ptr(x)[ 3] >> 4) | (ui8_ptr(x)[ 2] << 4);
00278 ui8_ptr(x)[ 2] = (ui8_ptr(x)[ 2] >> 4) | (ui8_ptr(x)[ 1] << 4);
00279 ui8_ptr(x)[ 1] = ((ui8_ptr(x)[ 1] >> 4) | (ui8_ptr(x)[ 0] << 4)) ^ (_tt >> 8);
00280 ui8_ptr(x)[ 0] = (ui8_ptr(x)[ 0] >> 4) ^ (_tt & 0xff);
00281 }
00282
00283 gf_inline void mul_x8(void *x)
00284 { uint_16t _tt = gf_tab[ui8_ptr(x)[15]];
00285 memmove(ui8_ptr(x) + 1, ui8_ptr(x), 15);
00286 ui8_ptr(x)[1] ^= (_tt >> 8);
00287 ui8_ptr(x)[0] = (_tt & 0xff);
00288 }
00289
00290 #endif
00291
00292 #else
00293
00294 #if BFR_UNIT == 64
00295
00296 #define mul_x(r, x) do { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] >> 49) & MSK_80]; \
00297 ui64_ptr(r)[1] = (ui64_ptr(x)[1] >> 1) & ~MSK_80 \
00298 | ((ui64_ptr(x)[1] << 15) | (ui64_ptr(x)[0] >> 49)) & MSK_80; \
00299 ui64_ptr(r)[0] = ((ui64_ptr(x)[0] >> 1) & ~MSK_80 \
00300 | (ui64_ptr(x)[0] << 15) & MSK_80) ^ _tt; \
00301 } while(0)
00302
00303 #if defined( VERSION_1 )
00304
00305 #define mul_x4(x) do { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] >> 52) & MSK_F0]; \
00306 ui64_ptr(x)[1] = (ui64_ptr(x)[1] >> 4) & ~MSK_F0 | ((ui64_ptr(x)[1] << 12) \
00307 | (ui64_ptr(x)[0] >> 52)) & MSK_F0; \
00308 ui64_ptr(x)[0] = ((ui64_ptr(x)[0] >> 4) & ~MSK_F0 \
00309 | (ui64_ptr(x)[0] << 12) & MSK_F0) ^ _tt; \
00310 } while(0)
00311
00312 #else
00313
00314 #define mul_x4(x) do { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] >> 52) & 0xf0]; \
00315 bswap64_block(x, x, 2); \
00316 ui64_ptr(x)[1] = bswap_64((ui64_ptr(x)[1] >> 4) | (ui64_ptr(x)[0] << 60)); \
00317 ui64_ptr(x)[0] = bswap_64((ui64_ptr(x)[0] >> 4)) ^ _tt; \
00318 } while(0)
00319
00320 #endif
00321
00322 #define mul_x8(x) do { uint_64t _tt = gf_tab[ui64_ptr(x)[1] >> 56]; \
00323 ui64_ptr(x)[1] = (ui64_ptr(x)[1] << 8) | (ui64_ptr(x)[0] >> 56); \
00324 ui64_ptr(x)[0] = (ui64_ptr(x)[0] << 8) ^ _tt; \
00325 } while(0)
00326
00327 #elif BFR_UNIT == 32
00328
00329 #define mul_x(r, x) do { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] >> 17) & MSK_80]; \
00330 ui32_ptr(r)[3] = (ui32_ptr(x)[3] >> 1) & ~MSK_80 | ((ui32_ptr(x)[3] << 15) \
00331 | (ui32_ptr(x)[2] >> 17)) & MSK_80; \
00332 ui32_ptr(r)[2] = (ui32_ptr(x)[2] >> 1) & ~MSK_80 | ((ui32_ptr(x)[2] << 15) \
00333 | (ui32_ptr(x)[1] >> 17)) & MSK_80; \
00334 ui32_ptr(r)[1] = (ui32_ptr(x)[1] >> 1) & ~MSK_80 | ((ui32_ptr(x)[1] << 15) \
00335 | (ui32_ptr(x)[0] >> 17)) & MSK_80; \
00336 ui32_ptr(r)[0] = ((ui32_ptr(x)[0] >> 1) & ~MSK_80 \
00337 | (ui32_ptr(x)[0] << 15) & MSK_80) ^ _tt; \
00338 } while(0)
00339
00340 #if defined( VERSION_1 )
00341
00342 #define mul_x4(x) do { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] >> 20) & MSK_F0]; \
00343 ui32_ptr(x)[3] = (ui32_ptr(x)[3] >> 4) & ~MSK_F0 | ((ui32_ptr(x)[3] << 12) \
00344 | (ui32_ptr(x)[2] >> 20)) & MSK_F0; \
00345 ui32_ptr(x)[2] = (ui32_ptr(x)[2] >> 4) & ~MSK_F0 | ((ui32_ptr(x)[2] << 12) \
00346 | (ui32_ptr(x)[1] >> 20)) & MSK_F0; \
00347 ui32_ptr(x)[1] = (ui32_ptr(x)[1] >> 4) & ~MSK_F0 | ((ui32_ptr(x)[1] << 12) \
00348 | (ui32_ptr(x)[0] >> 20)) & MSK_F0; \
00349 ui32_ptr(x)[0] = ((ui32_ptr(x)[0] >> 4) & ~MSK_F0 \
00350 | (ui32_ptr(x)[0] << 12) & MSK_F0) ^ _tt; \
00351 } while(0)
00352
00353 #else
00354
00355 #define mul_x4(x) do { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] >> 20) & 0xf0]; \
00356 bswap32_block(x, x, 4); \
00357 ui32_ptr(x)[3] = bswap_32((ui32_ptr(x)[3] >> 4) | (ui32_ptr(x)[2] << 28)); \
00358 ui32_ptr(x)[2] = bswap_32((ui32_ptr(x)[2] >> 4) | (ui32_ptr(x)[1] << 28)); \
00359 ui32_ptr(x)[1] = bswap_32((ui32_ptr(x)[1] >> 4) | (ui32_ptr(x)[0] << 28)); \
00360 ui32_ptr(x)[0] = bswap_32((ui32_ptr(x)[0] >> 4)) ^ _tt; \
00361 } while(0)
00362
00363 #endif
00364
00365 #define mul_x8(x) do { uint_32t _tt = gf_tab[ui32_ptr(x)[3] >> 24]; \
00366 ui32_ptr(x)[3] = (ui32_ptr(x)[3] << 8) | (ui32_ptr(x)[2] >> 24); \
00367 ui32_ptr(x)[2] = (ui32_ptr(x)[2] << 8) | (ui32_ptr(x)[1] >> 24); \
00368 ui32_ptr(x)[1] = (ui32_ptr(x)[1] << 8) | (ui32_ptr(x)[0] >> 24); \
00369 ui32_ptr(x)[0] = (ui32_ptr(x)[0] << 8) ^ _tt; \
00370 } while(0)
00371
00372 #else
00373
00374 #define mul_x(r, x) do { uint_8t _tt = ui8_ptr(x)[15] & 1; \
00375 ui8_ptr(r)[15] = (ui8_ptr(x)[15] >> 1) | (ui8_ptr(x)[14] << 7); \
00376 ui8_ptr(r)[14] = (ui8_ptr(x)[14] >> 1) | (ui8_ptr(x)[13] << 7); \
00377 ui8_ptr(r)[13] = (ui8_ptr(x)[13] >> 1) | (ui8_ptr(x)[12] << 7); \
00378 ui8_ptr(r)[12] = (ui8_ptr(x)[12] >> 1) | (ui8_ptr(x)[11] << 7); \
00379 ui8_ptr(r)[11] = (ui8_ptr(x)[11] >> 1) | (ui8_ptr(x)[10] << 7); \
00380 ui8_ptr(r)[10] = (ui8_ptr(x)[10] >> 1) | (ui8_ptr(x)[ 9] << 7); \
00381 ui8_ptr(r)[ 9] = (ui8_ptr(x)[ 9] >> 1) | (ui8_ptr(x)[ 8] << 7); \
00382 ui8_ptr(r)[ 8] = (ui8_ptr(x)[ 8] >> 1) | (ui8_ptr(x)[ 7] << 7); \
00383 ui8_ptr(r)[ 7] = (ui8_ptr(x)[ 7] >> 1) | (ui8_ptr(x)[ 6] << 7); \
00384 ui8_ptr(r)[ 6] = (ui8_ptr(x)[ 6] >> 1) | (ui8_ptr(x)[ 5] << 7); \
00385 ui8_ptr(r)[ 5] = (ui8_ptr(x)[ 5] >> 1) | (ui8_ptr(x)[ 4] << 7); \
00386 ui8_ptr(r)[ 4] = (ui8_ptr(x)[ 4] >> 1) | (ui8_ptr(x)[ 3] << 7); \
00387 ui8_ptr(r)[ 3] = (ui8_ptr(x)[ 3] >> 1) | (ui8_ptr(x)[ 2] << 7); \
00388 ui8_ptr(r)[ 2] = (ui8_ptr(x)[ 2] >> 1) | (ui8_ptr(x)[ 1] << 7); \
00389 ui8_ptr(r)[ 1] = (ui8_ptr(x)[ 1] >> 1) | (ui8_ptr(x)[ 0] << 7); \
00390 ui8_ptr(r)[ 0] = (ui8_ptr(x)[ 0] >> 1) ^ (_tt ? 0xe1 : 0x00); \
00391 } while(0)
00392
00393 #define mul_x4(x) do { uint_16t _tt = gf_tab[(ui8_ptr(x)[15] << 4) & 0xff]; \
00394 ui8_ptr(x)[15] = (ui8_ptr(x)[15] >> 4) | (ui8_ptr(x)[14] << 4); \
00395 ui8_ptr(x)[14] = (ui8_ptr(x)[14] >> 4) | (ui8_ptr(x)[13] << 4); \
00396 ui8_ptr(x)[13] = (ui8_ptr(x)[13] >> 4) | (ui8_ptr(x)[12] << 4); \
00397 ui8_ptr(x)[12] = (ui8_ptr(x)[12] >> 4) | (ui8_ptr(x)[11] << 4); \
00398 ui8_ptr(x)[11] = (ui8_ptr(x)[11] >> 4) | (ui8_ptr(x)[10] << 4); \
00399 ui8_ptr(x)[10] = (ui8_ptr(x)[10] >> 4) | (ui8_ptr(x)[ 9] << 4); \
00400 ui8_ptr(x)[ 9] = (ui8_ptr(x)[ 9] >> 4) | (ui8_ptr(x)[ 8] << 4); \
00401 ui8_ptr(x)[ 8] = (ui8_ptr(x)[ 8] >> 4) | (ui8_ptr(x)[ 7] << 4); \
00402 ui8_ptr(x)[ 7] = (ui8_ptr(x)[ 7] >> 4) | (ui8_ptr(x)[ 6] << 4); \
00403 ui8_ptr(x)[ 6] = (ui8_ptr(x)[ 6] >> 4) | (ui8_ptr(x)[ 5] << 4); \
00404 ui8_ptr(x)[ 5] = (ui8_ptr(x)[ 5] >> 4) | (ui8_ptr(x)[ 4] << 4); \
00405 ui8_ptr(x)[ 4] = (ui8_ptr(x)[ 4] >> 4) | (ui8_ptr(x)[ 3] << 4); \
00406 ui8_ptr(x)[ 3] = (ui8_ptr(x)[ 3] >> 4) | (ui8_ptr(x)[ 2] << 4); \
00407 ui8_ptr(x)[ 2] = (ui8_ptr(x)[ 2] >> 4) | (ui8_ptr(x)[ 1] << 4); \
00408 ui8_ptr(x)[ 1] = ((ui8_ptr(x)[ 1] >> 4) | (ui8_ptr(x)[ 0] << 4)) ^ (_tt >> 8); \
00409 ui8_ptr(x)[ 0] = (ui8_ptr(x)[ 0] >> 4) ^ (_tt & 0xff); \
00410 } while(0)
00411
00412 #define mul_x8(x) do { uint_16t _tt = gf_tab[ui8_ptr(x)[15]]; \
00413 memmove(ui8_ptr(x) + 1, ui8_ptr(x), 15); \
00414 ui8_ptr(x)[1] ^= (_tt >> 8); \
00415 ui8_ptr(x)[0] = (_tt & 0xff); \
00416 } while(0)
00417
00418 #endif
00419
00420 #endif
00421
00422 #elif PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
00423
00424 #if defined( USE_INLINES )
00425
00426 #if BFR_UNIT == 64
00427
00428 gf_inline void mul_x(void *r, const void *x)
00429 { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] << 7) & 0xff];
00430 ui64_ptr(r)[1] = (ui64_ptr(x)[1] >> 1) | (ui64_ptr(x)[0] << 63);
00431 ui64_ptr(r)[0] = (ui64_ptr(x)[0] >> 1) ^ (_tt << 48);
00432 }
00433
00434 gf_inline void mul_x4(void *x)
00435 { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] << 4) & 0xff];
00436 ui64_ptr(x)[1] = (ui64_ptr(x)[1] >> 4) | (ui64_ptr(x)[0] << 60);
00437 ui64_ptr(x)[0] = (ui64_ptr(x)[0] >> 4) ^ (_tt << 48);
00438 }
00439
00440 gf_inline void mul_x8(void *x)
00441 { uint_64t _tt = gf_tab[ui64_ptr(x)[1] & 0xff];
00442 ui64_ptr(x)[1] = (ui64_ptr(x)[1] >> 8) | (ui64_ptr(x)[0] << 56);
00443 ui64_ptr(x)[0] = (ui64_ptr(x)[0] >> 8) ^ (_tt << 48);
00444 }
00445
00446 #elif BFR_UNIT == 32
00447
00448 gf_inline void mul_x(void *r, const void *x)
00449 { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] << 7) & 0xff];
00450 ui32_ptr(r)[3] = (ui32_ptr(x)[3] >> 1) | (ui32_ptr(x)[2] << 31);
00451 ui32_ptr(r)[2] = (ui32_ptr(x)[2] >> 1) | (ui32_ptr(x)[1] << 31);
00452 ui32_ptr(r)[1] = (ui32_ptr(x)[1] >> 1) | (ui32_ptr(x)[0] << 31);
00453 ui32_ptr(r)[0] = (ui32_ptr(x)[0] >> 1) ^ (_tt << 16);
00454 }
00455
00456 gf_inline void mul_x4(void *x)
00457 { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] << 4) & 0xff];
00458 ui32_ptr(x)[3] = (ui32_ptr(x)[3] >> 4) | (ui32_ptr(x)[2] << 28);
00459 ui32_ptr(x)[2] = (ui32_ptr(x)[2] >> 4) | (ui32_ptr(x)[1] << 28);
00460 ui32_ptr(x)[1] = (ui32_ptr(x)[1] >> 4) | (ui32_ptr(x)[0] << 28);
00461 ui32_ptr(x)[0] = (ui32_ptr(x)[0] >> 4) ^ (_tt << 16);
00462 }
00463
00464 gf_inline void mul_x8(void *x)
00465 { uint_32t _tt = gf_tab[ui32_ptr(x)[3] & 0xff];
00466 ui32_ptr(x)[3] = (ui32_ptr(x)[3] >> 8) | (ui32_ptr(x)[2] << 24);
00467 ui32_ptr(x)[2] = (ui32_ptr(x)[2] >> 8) | (ui32_ptr(x)[1] << 24);
00468 ui32_ptr(x)[1] = (ui32_ptr(x)[1] >> 8) | (ui32_ptr(x)[0] << 24);
00469 ui32_ptr(x)[0] = (ui32_ptr(x)[0] >> 8) ^ (_tt << 16);
00470 }
00471
00472 #else
00473
00474 gf_inline void mul_x(void *r, const void *x)
00475 { uint_8t _tt = ui8_ptr(x)[15] & 1;
00476 ui8_ptr(r)[15] = (ui8_ptr(x)[15] >> 1) | (ui8_ptr(x)[14] << 7);
00477 ui8_ptr(r)[14] = (ui8_ptr(x)[14] >> 1) | (ui8_ptr(x)[13] << 7);
00478 ui8_ptr(r)[13] = (ui8_ptr(x)[13] >> 1) | (ui8_ptr(x)[12] << 7);
00479 ui8_ptr(r)[12] = (ui8_ptr(x)[12] >> 1) | (ui8_ptr(x)[11] << 7);
00480 ui8_ptr(r)[11] = (ui8_ptr(x)[11] >> 1) | (ui8_ptr(x)[10] << 7);
00481 ui8_ptr(r)[10] = (ui8_ptr(x)[10] >> 1) | (ui8_ptr(x)[ 9] << 7);
00482 ui8_ptr(r)[ 9] = (ui8_ptr(x)[ 9] >> 1) | (ui8_ptr(x)[ 8] << 7);
00483 ui8_ptr(r)[ 8] = (ui8_ptr(x)[ 8] >> 1) | (ui8_ptr(x)[ 7] << 7);
00484 ui8_ptr(r)[ 7] = (ui8_ptr(x)[ 7] >> 1) | (ui8_ptr(x)[ 6] << 7);
00485 ui8_ptr(r)[ 6] = (ui8_ptr(x)[ 6] >> 1) | (ui8_ptr(x)[ 5] << 7);
00486 ui8_ptr(r)[ 5] = (ui8_ptr(x)[ 5] >> 1) | (ui8_ptr(x)[ 4] << 7);
00487 ui8_ptr(r)[ 4] = (ui8_ptr(x)[ 4] >> 1) | (ui8_ptr(x)[ 3] << 7);
00488 ui8_ptr(r)[ 3] = (ui8_ptr(x)[ 3] >> 1) | (ui8_ptr(x)[ 2] << 7);
00489 ui8_ptr(r)[ 2] = (ui8_ptr(x)[ 2] >> 1) | (ui8_ptr(x)[ 1] << 7);
00490 ui8_ptr(r)[ 1] = (ui8_ptr(x)[ 1] >> 1) | (ui8_ptr(x)[ 0] << 7);
00491 ui8_ptr(r)[ 0] = (ui8_ptr(x)[ 0] >> 1) ^ (_tt ? 0xe1 : 0x00);
00492 }
00493
00494 gf_inline void mul_x4(void *x)
00495 {
00496 uint_16t _tt = gf_tab[(ui8_ptr(x)[15] << 4) & 0xff];
00497 ui8_ptr(x)[15] = (ui8_ptr(x)[15] >> 4) | (ui8_ptr(x)[14] << 4);
00498 ui8_ptr(x)[14] = (ui8_ptr(x)[14] >> 4) | (ui8_ptr(x)[13] << 4);
00499 ui8_ptr(x)[13] = (ui8_ptr(x)[13] >> 4) | (ui8_ptr(x)[12] << 4);
00500 ui8_ptr(x)[12] = (ui8_ptr(x)[12] >> 4) | (ui8_ptr(x)[11] << 4);
00501 ui8_ptr(x)[11] = (ui8_ptr(x)[11] >> 4) | (ui8_ptr(x)[10] << 4);
00502 ui8_ptr(x)[10] = (ui8_ptr(x)[10] >> 4) | (ui8_ptr(x)[ 9] << 4);
00503 ui8_ptr(x)[ 9] = (ui8_ptr(x)[ 9] >> 4) | (ui8_ptr(x)[ 8] << 4);
00504 ui8_ptr(x)[ 8] = (ui8_ptr(x)[ 8] >> 4) | (ui8_ptr(x)[ 7] << 4);
00505 ui8_ptr(x)[ 7] = (ui8_ptr(x)[ 7] >> 4) | (ui8_ptr(x)[ 6] << 4);
00506 ui8_ptr(x)[ 6] = (ui8_ptr(x)[ 6] >> 4) | (ui8_ptr(x)[ 5] << 4);
00507 ui8_ptr(x)[ 5] = (ui8_ptr(x)[ 5] >> 4) | (ui8_ptr(x)[ 4] << 4);
00508 ui8_ptr(x)[ 4] = (ui8_ptr(x)[ 4] >> 4) | (ui8_ptr(x)[ 3] << 4);
00509 ui8_ptr(x)[ 3] = (ui8_ptr(x)[ 3] >> 4) | (ui8_ptr(x)[ 2] << 4);
00510 ui8_ptr(x)[ 2] = (ui8_ptr(x)[ 2] >> 4) | (ui8_ptr(x)[ 1] << 4);
00511 ui8_ptr(x)[ 1] = ((ui8_ptr(x)[ 1] >> 4) | (ui8_ptr(x)[ 0] << 4)) ^ (_tt & 0xff);
00512 ui8_ptr(x)[ 0] = (ui8_ptr(x)[ 0] >> 4) ^ (_tt >> 8);
00513 }
00514
00515 gf_inline void mul_x8(void *x)
00516 { uint_16t _tt = gf_tab[ui8_ptr(x)[15]];
00517 memmove(ui8_ptr(x) + 1, ui8_ptr(x), 15);
00518 ui8_ptr(x)[1] ^= (_tt & 0xff);
00519 ui8_ptr(x)[0] = (_tt >> 8);
00520 }
00521
00522 #endif
00523
00524 #else
00525
00526 #if BFR_UNIT == 64
00527
00528 #define mul_x(r, x) do { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] << 7) & 0xff]; \
00529 ui64_ptr(r)[1] = (ui64_ptr(x)[1] >> 1) | (ui64_ptr(x)[0] << 63); \
00530 ui64_ptr(r)[0] = (ui64_ptr(x)[0] >> 1) ^ (_tt << 48); \
00531 } while(0)
00532
00533 #define mul_x4(x) do { uint_64t _tt = gf_tab[(ui64_ptr(x)[1] << 4) & 0xff]; \
00534 ui64_ptr(x)[1] = (ui64_ptr(x)[1] >> 4) | (ui64_ptr(x)[0] << 60); \
00535 ui64_ptr(x)[0] = (ui64_ptr(x)[0] >> 4) ^ (_tt << 48); \
00536 } while(0)
00537
00538 #define mul_x8(x) do { uint_64t _tt = gf_tab[ui64_ptr(x)[1] & 0xff]; \
00539 ui64_ptr(x)[1] = (ui64_ptr(x)[1] >> 8) | (ui64_ptr(x)[0] << 56); \
00540 ui64_ptr(x)[0] = (ui64_ptr(x)[0] >> 8) ^ (_tt << 48); \
00541 } while(0)
00542
00543 #elif BFR_UNIT == 32
00544
00545 #define mul_x(r, x) do { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] << 7) & 0xff]; \
00546 ui32_ptr(r)[3] = (ui32_ptr(x)[3] >> 1) | (ui32_ptr(x)[2] << 31); \
00547 ui32_ptr(r)[2] = (ui32_ptr(x)[2] >> 1) | (ui32_ptr(x)[1] << 31); \
00548 ui32_ptr(r)[1] = (ui32_ptr(x)[1] >> 1) | (ui32_ptr(x)[0] << 31); \
00549 ui32_ptr(r)[0] = (ui32_ptr(x)[0] >> 1) ^ (_tt << 16); \
00550 } while(0)
00551
00552 #define mul_x4(x) do { uint_32t _tt = gf_tab[(ui32_ptr(x)[3] << 4) & 0xff]; \
00553 ui32_ptr(x)[3] = (ui32_ptr(x)[3] >> 4) | (ui32_ptr(x)[2] << 28); \
00554 ui32_ptr(x)[2] = (ui32_ptr(x)[2] >> 4) | (ui32_ptr(x)[1] << 28); \
00555 ui32_ptr(x)[1] = (ui32_ptr(x)[1] >> 4) | (ui32_ptr(x)[0] << 28); \
00556 ui32_ptr(x)[0] = (ui32_ptr(x)[0] >> 4) ^ (_tt << 16); \
00557 } while(0)
00558
00559 #define mul_x8(x) do { uint_32t _tt = gf_tab[ui32_ptr(x)[3] & 0xff]; \
00560 ui32_ptr(x)[3] = (ui32_ptr(x)[3] >> 8) | (ui32_ptr(x)[2] << 24); \
00561 ui32_ptr(x)[2] = (ui32_ptr(x)[2] >> 8) | (ui32_ptr(x)[1] << 24); \
00562 ui32_ptr(x)[1] = (ui32_ptr(x)[1] >> 8) | (ui32_ptr(x)[0] << 24); \
00563 ui32_ptr(x)[0] = (ui32_ptr(x)[0] >> 8) ^ (_tt << 16); \
00564 } while(0)
00565
00566 #else
00567
00568 #define mul_x(r, x) do { uint_8t _tt = ui8_ptr(x)[15] & 1; \
00569 ui8_ptr(r)[15] = (ui8_ptr(x)[15] >> 1) | (ui8_ptr(x)[14] << 7); \
00570 ui8_ptr(r)[14] = (ui8_ptr(x)[14] >> 1) | (ui8_ptr(x)[13] << 7); \
00571 ui8_ptr(r)[13] = (ui8_ptr(x)[13] >> 1) | (ui8_ptr(x)[12] << 7); \
00572 ui8_ptr(r)[12] = (ui8_ptr(x)[12] >> 1) | (ui8_ptr(x)[11] << 7); \
00573 ui8_ptr(r)[11] = (ui8_ptr(x)[11] >> 1) | (ui8_ptr(x)[10] << 7); \
00574 ui8_ptr(r)[10] = (ui8_ptr(x)[10] >> 1) | (ui8_ptr(x)[ 9] << 7); \
00575 ui8_ptr(r)[ 9] = (ui8_ptr(x)[ 9] >> 1) | (ui8_ptr(x)[ 8] << 7); \
00576 ui8_ptr(r)[ 8] = (ui8_ptr(x)[ 8] >> 1) | (ui8_ptr(x)[ 7] << 7); \
00577 ui8_ptr(r)[ 7] = (ui8_ptr(x)[ 7] >> 1) | (ui8_ptr(x)[ 6] << 7); \
00578 ui8_ptr(r)[ 6] = (ui8_ptr(x)[ 6] >> 1) | (ui8_ptr(x)[ 5] << 7); \
00579 ui8_ptr(r)[ 5] = (ui8_ptr(x)[ 5] >> 1) | (ui8_ptr(x)[ 4] << 7); \
00580 ui8_ptr(r)[ 4] = (ui8_ptr(x)[ 4] >> 1) | (ui8_ptr(x)[ 3] << 7); \
00581 ui8_ptr(r)[ 3] = (ui8_ptr(x)[ 3] >> 1) | (ui8_ptr(x)[ 2] << 7); \
00582 ui8_ptr(r)[ 2] = (ui8_ptr(x)[ 2] >> 1) | (ui8_ptr(x)[ 1] << 7); \
00583 ui8_ptr(r)[ 1] = (ui8_ptr(x)[ 1] >> 1) | (ui8_ptr(x)[ 0] << 7); \
00584 ui8_ptr(r)[ 0] = (ui8_ptr(x)[ 0] >> 1) ^ (_tt ? 0xe1 : 0x00); \
00585 } while(0)
00586
00587 #define mul_x4(x) do { uint_16t _tt = gf_tab[(ui8_ptr(x)[15] << 4) & 0xff]; \
00588 ui8_ptr(x)[15] = (ui8_ptr(x)[15] >> 4) | (ui8_ptr(x)[14] << 4); \
00589 ui8_ptr(x)[14] = (ui8_ptr(x)[14] >> 4) | (ui8_ptr(x)[13] << 4); \
00590 ui8_ptr(x)[13] = (ui8_ptr(x)[13] >> 4) | (ui8_ptr(x)[12] << 4); \
00591 ui8_ptr(x)[12] = (ui8_ptr(x)[12] >> 4) | (ui8_ptr(x)[11] << 4); \
00592 ui8_ptr(x)[11] = (ui8_ptr(x)[11] >> 4) | (ui8_ptr(x)[10] << 4); \
00593 ui8_ptr(x)[10] = (ui8_ptr(x)[10] >> 4) | (ui8_ptr(x)[ 9] << 4); \
00594 ui8_ptr(x)[ 9] = (ui8_ptr(x)[ 9] >> 4) | (ui8_ptr(x)[ 8] << 4); \
00595 ui8_ptr(x)[ 8] = (ui8_ptr(x)[ 8] >> 4) | (ui8_ptr(x)[ 7] << 4); \
00596 ui8_ptr(x)[ 7] = (ui8_ptr(x)[ 7] >> 4) | (ui8_ptr(x)[ 6] << 4); \
00597 ui8_ptr(x)[ 6] = (ui8_ptr(x)[ 6] >> 4) | (ui8_ptr(x)[ 5] << 4); \
00598 ui8_ptr(x)[ 5] = (ui8_ptr(x)[ 5] >> 4) | (ui8_ptr(x)[ 4] << 4); \
00599 ui8_ptr(x)[ 4] = (ui8_ptr(x)[ 4] >> 4) | (ui8_ptr(x)[ 3] << 4); \
00600 ui8_ptr(x)[ 3] = (ui8_ptr(x)[ 3] >> 4) | (ui8_ptr(x)[ 2] << 4); \
00601 ui8_ptr(x)[ 2] = (ui8_ptr(x)[ 2] >> 4) | (ui8_ptr(x)[ 1] << 4); \
00602 ui8_ptr(x)[ 1] = ((ui8_ptr(x)[ 1] >> 4) | (ui8_ptr(x)[ 0] << 4)) ^ (_tt & 0xff); \
00603 ui8_ptr(x)[ 0] = (ui8_ptr(x)[ 0] >> 4) ^ (_tt >> 8); \
00604 } while(0)
00605
00606 #define mul_x8(x) do { uint_16t _tt = gf_tab[ui8_ptr(x)[15]]; \
00607 memmove(ui8_ptr(x) + 1, ui8_ptr(x), 15); \
00608 ui8_ptr(x)[1] ^= (_tt & 0xff); \
00609 ui8_ptr(x)[0] = (_tt >> 8); \
00610 } while(0)
00611
00612 #endif
00613
00614 #endif
00615
00616 #else
00617 # error Platform byte order has not been set.
00618 #endif
00619
00620
00621
00622 void gf_mul(void *a, const void* b);
00623
00624
00625
00626
00627
00628
00629
00630
00631
00632
00633
00634 void init_64k_table(unsigned char g[], void *t);
00635 typedef uint_32t (*gf_t64k)[256][GF_BYTE_LEN >> 2];
00636 #define tab64k(x) ((gf_t64k)x)
00637 #define xor_64k(i,a,t,r) xor_block_aligned(r, tab64k(t)[i][a[i]])
00638
00639 #if defined( USE_INLINES )
00640
00641 #if defined( UNROLL_LOOPS )
00642
00643 gf_inline void gf_mul_64k(unsigned char a[], void *t, void *r)
00644 {
00645 move_block_aligned(r, tab64k(t)[0][a[0]]); xor_64k( 1, a, t, r);
00646 xor_64k( 2, a, t, r); xor_64k( 3, a, t, r);
00647 xor_64k( 4, a, t, r); xor_64k( 5, a, t, r);
00648 xor_64k( 6, a, t, r); xor_64k( 7, a, t, r);
00649 xor_64k( 8, a, t, r); xor_64k( 9, a, t, r);
00650 xor_64k(10, a, t, r); xor_64k(11, a, t, r);
00651 xor_64k(12, a, t, r); xor_64k(13, a, t, r);
00652 xor_64k(14, a, t, r); xor_64k(15, a, t, r);
00653 move_block_aligned(a, r);
00654 }
00655
00656 #else
00657
00658 gf_inline void gf_mul_64k(unsigned char a[], void *t, void *r)
00659 { int i;
00660 move_block_aligned(r, tab64k(t)[0][a[0]]);
00661 for(i = 1; i < GF_BYTE_LEN; ++i)
00662 xor_64k(i, a, t, r);
00663 move_block_aligned(a, r);
00664 }
00665
00666 #endif
00667
00668 #else
00669
00670 #if !defined( UNROLL_LOOPS )
00671
00672 #define gf_mul_64k(a, t, r) do { \
00673 move_block_aligned(r, tab64k(t)[0][a[0]]); \
00674 xor_64k( 1, a, t, r); \
00675 xor_64k( 2, a, t, r); xor_64k( 3, a, t, r); \
00676 xor_64k( 4, a, t, r); xor_64k( 5, a, t, r); \
00677 xor_64k( 6, a, t, r); xor_64k( 7, a, t, r); \
00678 xor_64k( 8, a, t, r); xor_64k( 9, a, t, r); \
00679 xor_64k(10, a, t, r); xor_64k(11, a, t, r); \
00680 xor_64k(12, a, t, r); xor_64k(13, a, t, r); \
00681 xor_64k(14, a, t, r); xor_64k(15, a, t, r); \
00682 move_block_aligned(a, r); \
00683 } while(0)
00684
00685 #else
00686
00687 #define gf_mul_64k(a, t, r) do { int i; \
00688 move_block_aligned(r, tab64k(t)[0][a[0]]); \
00689 for(i = 1; i < GF_BYTE_LEN; ++i) \
00690 { xor_64k(i, a, t, r); \
00691 } \
00692 move_block_aligned(a, r); \
00693 } while(0)
00694
00695 #endif
00696
00697 #endif
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709 void init_8k_table(unsigned char g[], void *t);
00710
00711 typedef uint_32t (*gf_t8k)[16][GF_BYTE_LEN >> 2];
00712 #define tab8k(x) ((gf_t8k)x)
00713 #define xor_8k(i,a,t,r) \
00714 xor_block_aligned(r, tab8k(t)[i + i][a[i] & 15]); \
00715 xor_block_aligned(r, tab8k(t)[i + i + 1][a[i] >> 4])
00716
00717 #if defined( USE_INLINES )
00718
00719 #if defined( UNROLL_LOOPS )
00720
00721 gf_inline void gf_mul_8k(unsigned char a[], void *t, void *r)
00722 {
00723 move_block_aligned(r, tab8k(t)[0][a[0] & 15]);
00724 xor_block_aligned(r, tab8k(t)[1][a[0] >> 4]);
00725 xor_8k( 1, a, t, r); xor_8k( 2, a, t, r); xor_8k( 3, a, t, r);
00726 xor_8k( 4, a, t, r); xor_8k( 5, a, t, r); xor_8k( 6, a, t, r); xor_8k( 7, a, t, r);
00727 xor_8k( 8, a, t, r); xor_8k( 9, a, t, r); xor_8k(10, a, t, r); xor_8k(11, a, t, r);
00728 xor_8k(12, a, t, r); xor_8k(13, a, t, r); xor_8k(14, a, t, r); xor_8k(15, a, t, r);
00729 move_block_aligned(a, r);
00730 }
00731
00732 #else
00733
00734 gf_inline void gf_mul_8k(unsigned char a[], void *t, void *r)
00735 { int i;
00736 memcpy(r, tab8k(t)[0][a[0] & 15], GF_BYTE_LEN);
00737 xor_block_aligned(r, tab8k(t)[1][a[0] >> 4]);
00738 for(i = 1; i < GF_BYTE_LEN; ++i)
00739 { xor_8k(i, a, t, r);
00740 }
00741 memcpy(a, r, GF_BYTE_LEN);
00742 }
00743
00744 #endif
00745
00746 #else
00747
00748 #if defined( UNROLL_LOOPS )
00749
00750 #define gf_mul_8k(a, t, r) do { \
00751 move_block_aligned(r, tab8k(t)[0][a[0] & 15]); \
00752 xor_block_aligned(r, tab8k(t)[1][a[0] >> 4]); \
00753 xor_8k( 1, a, t, r); xor_8k( 2, a, t, r); \
00754 xor_8k( 3, a, t, r); xor_8k( 4, a, t, r); \
00755 xor_8k( 5, a, t, r); xor_8k( 6, a, t, r); \
00756 xor_8k( 7, a, t, r); xor_8k( 8, a, t, r); \
00757 xor_8k( 9, a, t, r); xor_8k(10, a, t, r); \
00758 xor_8k(11, a, t, r); xor_8k(12, a, t, r); \
00759 xor_8k(13, a, t, r); xor_8k(14, a, t, r); \
00760 xor_8k(15, a, t, r); move_block_aligned(a, r); \
00761 } while(0)
00762
00763 #else
00764
00765 #define gf_mul_8k(a, t, r) do { int i; \
00766 memcpy(r, tab8k(t)[0][a[0] & 15], GF_BYTE_LEN); \
00767 xor_block_aligned(r, tab8k(t)[1][a[0] >> 4]); \
00768 for(i = 1; i < GF_BYTE_LEN; ++i) \
00769 { xor_8k(i, a, t, r); \
00770 } \
00771 memcpy(a, r, GF_BYTE_LEN); \
00772 } while(0)
00773
00774 #endif
00775
00776 #endif
00777
00778
00779
00780
00781
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794
00795 void init_4k_table(unsigned char g[], void *t);
00796
00797 typedef uint_32t (*gf_t4k)[GF_BYTE_LEN >> 2];
00798 #define tab4k(x) ((gf_t4k)x)
00799 #define xor_4k(i,a,t,r) mul_x8(r); xor_block_aligned(r, tab4k(t)[a[i]])
00800
00801 #if defined( USE_INLINES )
00802
00803 #if defined( UNROLL_LOOPS )
00804
00805 gf_inline void gf_mul_4k(unsigned char a[], void *t, void *r)
00806 {
00807 move_block_aligned(r,tab4k(t)[a[15]]);
00808 xor_4k(14, a, t, r); xor_4k(13, a, t, r); xor_4k(12, a, t, r);
00809 xor_4k(11, a, t, r); xor_4k(10, a, t, r); xor_4k( 9, a, t, r);
00810 xor_4k( 8, a, t, r); xor_4k( 7, a, t, r); xor_4k( 6, a, t, r);
00811 xor_4k( 5, a, t, r); xor_4k( 4, a, t, r); xor_4k( 3, a, t, r);
00812 xor_4k( 2, a, t, r); xor_4k( 1, a, t, r); xor_4k( 0, a, t, r);
00813 move_block_aligned(a, r);
00814 }
00815
00816 #else
00817
00818 gf_inline void gf_mul_4k(unsigned char a[], void *t, void *r)
00819 { int i = 15;
00820 move_block_aligned(r,tab4k(t)[a[15]]);
00821 while(i--)
00822 {
00823 xor_4k(i, a, t, r);
00824 }
00825 move_block_aligned(a, r);
00826 }
00827
00828 #endif
00829
00830 #else
00831
00832 #if defined( UNROLL_LOOPS )
00833
00834 #define gf_mul_4k(a, t, r) do { \
00835 move_block_aligned(r,tab4k(t)[a[15]]); \
00836 xor_4k(14, a, t, r); xor_4k(13, a, t, r); xor_4k(12, a, t, r); \
00837 xor_4k(11, a, t, r); xor_4k(10, a, t, r); xor_4k( 9, a, t, r); \
00838 xor_4k( 8, a, t, r); xor_4k( 7, a, t, r); xor_4k( 6, a, t, r); \
00839 xor_4k( 5, a, t, r); xor_4k( 4, a, t, r); xor_4k( 3, a, t, r); \
00840 xor_4k( 2, a, t, r); xor_4k( 1, a, t, r); xor_4k( 0, a, t, r); \
00841 move_block_aligned(a, r); \
00842 } while(0)
00843
00844 #else
00845
00846 #define gf_mul_4k(a, t, r) do { int i = 15; \
00847 move_block_aligned(r,tab4k(t)[a[15]]); \
00848 while(i--) \
00849 { xor_4k(i, a, t, r); \
00850 } \
00851 move_block_aligned(a, r); \
00852 } while(0)
00853
00854 #endif
00855
00856 #endif
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876 void init_256_table(unsigned char g[], void *t);
00877
00878 typedef uint_32t (*gf_t256)[GF_BYTE_LEN >> 2];
00879 #define tab256(t) ((gf_t256)t)
00880 #define xor_256(i,a,t,r) \
00881 mul_x4(r); xor_block_aligned(r, tab256(t)[a[i] & 15]); \
00882 mul_x4(r); xor_block_aligned(r, tab256(t)[a[i] >> 4])
00883
00884 #if defined( USE_INLINES )
00885
00886 #if defined( UNROLL_LOOPS )
00887
00888 gf_inline void gf_mul_256(unsigned char a[], void *t, void *r)
00889 {
00890 move_block_aligned(r,tab256(t)[a[15] & 15]); mul_x4(r);
00891 xor_block_aligned(r, tab256(t)[a[15] >> 4]);
00892 xor_256(14, a, t, r); xor_256(13, a, t, r);
00893 xor_256(12, a, t, r); xor_256(11, a, t, r);
00894 xor_256(10, a, t, r); xor_256( 9, a, t, r);
00895 xor_256( 8, a, t, r); xor_256( 7, a, t, r);
00896 xor_256( 6, a, t, r); xor_256( 5, a, t, r);
00897 xor_256( 4, a, t, r); xor_256( 3, a, t, r);
00898 xor_256( 2, a, t, r); xor_256( 1, a, t, r);
00899 xor_256( 0, a, t, r); move_block_aligned(a, r);
00900 }
00901
00902 #else
00903
00904 gf_inline void gf_mul_256(unsigned char a[], void *t, void *r)
00905 { int i = 15;
00906 move_block_aligned(r,tab256(t)[a[15] & 15]); mul_x4(r);
00907 xor_block_aligned(r, tab256(t)[a[15] >> 4]);
00908 while(i--)
00909 { xor_256(i, a, t, r);
00910 }
00911 move_block_aligned(a, r);
00912 }
00913
00914 #endif
00915
00916 #else
00917
00918 #if defined( UNROLL_LOOPS )
00919
00920 #define gf_mul_256(a, t, r) do { \
00921 move_block_aligned(r,tab256(t)[a[15] & 15]); mul_x4(r); \
00922 xor_block_aligned(r, tab256(t)[a[15] >> 4]); \
00923 xor_256(14, a, t, r); xor_256(13, a, t, r); \
00924 xor_256(12, a, t, r); xor_256(11, a, t, r); \
00925 xor_256(10, a, t, r); xor_256( 9, a, t, r); \
00926 xor_256( 8, a, t, r); xor_256( 7, a, t, r); \
00927 xor_256( 6, a, t, r); xor_256( 5, a, t, r); \
00928 xor_256( 4, a, t, r); xor_256( 3, a, t, r); \
00929 xor_256( 2, a, t, r); xor_256( 1, a, t, r); \
00930 xor_256( 0, a, t, r); move_block_aligned(a, r); \
00931 } while(0)
00932
00933 #else
00934
00935 #define gf_mul_256(a, t, r) do { int i = 15; \
00936 move_block_aligned(r,tab256(t)[a[15] & 15]); mul_x4(r); \
00937 xor_block_aligned(r, tab256(t)[a[15] >> 4]); \
00938 while(i--) \
00939 { xor_256(i, a, t, r); \
00940 } \
00941 move_block_aligned(a, r); \
00942 } while(0)
00943
00944 #endif
00945
00946 #endif
00947
00948 #if defined(__cplusplus)
00949 }
00950 #endif
00951
00952 #endif