From 466ce76ecaf1efeb18d20ed082818e9f54806d59 Mon Sep 17 00:00:00 2001 From: Linus Yang Date: Fri, 18 Aug 2017 17:38:24 +0800 Subject: [PATCH] Port OpenSSL asm code --- lib/aesacc.c | 112 +- lib/aesacc.h | 8 + lib/asm/arm.S | 1194 ++++++++++++++++ lib/asm/arm_arch.h | 83 ++ lib/asm/mips.S | 1835 +++++++++++++++++++++++++ lib/asm/x86.S | 3244 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 6461 insertions(+), 15 deletions(-) create mode 100644 lib/asm/arm.S create mode 100644 lib/asm/arm_arch.h create mode 100644 lib/asm/mips.S create mode 100644 lib/asm/x86.S diff --git a/lib/aesacc.c b/lib/aesacc.c index c845576..9939bc7 100644 --- a/lib/aesacc.c +++ b/lib/aesacc.c @@ -11,13 +11,19 @@ #if defined(AES256) && (AES256 == 1) #define AES_KEYSIZE 256 -#define aes_setkey_enc aesni_setkey_enc_256 +#ifdef HAVE_AMD64 + #define aes_setkey_enc aesni_setkey_enc_256 +#endif #elif defined(AES192) && (AES192 == 1) #define AES_KEYSIZE 192 -#define aes_setkey_enc aesni_setkey_enc_192 +#ifdef HAVE_AMD64 + #define aes_setkey_enc aesni_setkey_enc_192 +#endif #else #define AES_KEYSIZE 128 -#define aes_setkey_enc aesni_setkey_enc_128 +#ifdef HAVE_AMD64 + #define aes_setkey_enc aesni_setkey_enc_128 +#endif #endif #define AES_NR ((AES_KEYSIZE >> 5) + 6) @@ -156,7 +162,87 @@ static void aes_inverse_key(uint8_t *invkey, const uint8_t *fwdkey) #endif /* HAVE_ARM64 */ +#ifdef HAVE_ASM + +#define AES_MAXNR 14 + +typedef struct { + uint32_t rd_key[4 * (AES_MAXNR + 1)]; + int rounds; +} AES_KEY; + +#ifdef __cplusplus +extern "C" { +#endif + +int AES_set_encrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); +int AES_set_decrypt_key(const unsigned char *userKey, const int bits, + AES_KEY *key); + +void AES_encrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); +void AES_decrypt(const unsigned char *in, unsigned char *out, + const AES_KEY *key); + +#ifdef __cplusplus +} +#endif + +static int aes_supported(void) +{ + return 2; +} + +static void aes_crypt_ecb( int nr, + unsigned char *rk, + int mode, + const unsigned char input[16], + unsigned char output[16] ) +{ + AES_KEY *ctx; + ctx = (AES_KEY *) rk; + ctx->rounds = nr; + if (mode == AES_DECRYPT) { + AES_decrypt(input, output, ctx); + } else { + AES_encrypt(input, output, ctx); + } +} + +static void aes_setkey_enc(uint8_t *rk, const uint8_t *key) +{ + AES_KEY *ctx; + ctx = (AES_KEY *) rk; + ctx->rounds = AES_NR; + AES_set_encrypt_key(key, AES_KEYSIZE, ctx); +} + +static void aes_setkey_dec(uint8_t *rk, const uint8_t *key) +{ + AES_KEY *ctx; + ctx = (AES_KEY *) rk; + ctx->rounds = AES_NR; + AES_set_decrypt_key(key, AES_KEYSIZE, ctx); +} + +#endif + #ifdef HAVE_HARDAES + +static void aes_setkey_dec(uint8_t *rk, const uint8_t *key) +{ + uint8_t rk_tmp[AES_RKSIZE]; + aes_setkey_enc(rk_tmp, key); + aes_inverse_key(rk, rk_tmp); +} + +#endif + +#if defined(HAVE_HARDAES) || defined(HAVE_ASM) + +#define HAVE_ACC 1 + /* * AESNI-CBC buffer encryption/decryption */ @@ -204,11 +290,11 @@ static void aes_crypt_cbc( int mode, } } -#endif /* HAVE_HARDAES */ +#endif /* HAVE_HARDAES or HAVE_ASM */ int AESACC_supported(void) { -#if defined(HAVE_HARDAES) +#if defined(HAVE_ACC) return aes_supported(); #else return 0; @@ -217,7 +303,7 @@ int AESACC_supported(void) void AESACC_CBC_encrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv) { -#if defined(HAVE_HARDAES) +#if defined(HAVE_ACC) uint8_t iv_tmp[16]; uint8_t rk[AES_RKSIZE]; @@ -240,10 +326,9 @@ void AESACC_CBC_encrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, void AESACC_CBC_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv) { -#if defined(HAVE_HARDAES) +#if defined(HAVE_ACC) uint8_t iv_tmp[16]; uint8_t rk[AES_RKSIZE]; - uint8_t rk_tmp[AES_RKSIZE]; if (aes_supported()) { @@ -252,8 +337,7 @@ void AESACC_CBC_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, return; } memcpy(iv_tmp, iv, 16); - aes_setkey_enc(rk_tmp, key); - aes_inverse_key(rk, rk_tmp); + aes_setkey_dec(rk, key); aes_crypt_cbc(AES_DECRYPT, rk, \ length, iv_tmp, input, output); return; @@ -265,7 +349,7 @@ void AESACC_CBC_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, void AESACC_ECB_encrypt(const uint8_t* input, const uint8_t* key, uint8_t* output, const uint32_t length) { -#if defined(HAVE_HARDAES) +#if defined(HAVE_ACC) uint8_t rk[AES_RKSIZE]; if (aes_supported()) @@ -285,9 +369,8 @@ void AESACC_ECB_encrypt(const uint8_t* input, const uint8_t* key, uint8_t* outpu void AESACC_ECB_decrypt(const uint8_t* input, const uint8_t* key, uint8_t *output, const uint32_t length) { -#if defined(HAVE_HARDAES) +#if defined(HAVE_ACC) uint8_t rk[AES_RKSIZE]; - uint8_t rk_tmp[AES_RKSIZE]; if (aes_supported()) { @@ -295,8 +378,7 @@ void AESACC_ECB_decrypt(const uint8_t* input, const uint8_t* key, uint8_t *outpu { return; } - aes_setkey_enc(rk_tmp, key); - aes_inverse_key(rk, rk_tmp); + aes_setkey_dec(rk, key); aes_crypt_ecb(AES_NR, rk, AES_DECRYPT, input, output); return; } diff --git a/lib/aesacc.h b/lib/aesacc.h index 61012e6..afc36d0 100644 --- a/lib/aesacc.h +++ b/lib/aesacc.h @@ -3,10 +3,18 @@ #include +#ifdef __cplusplus +extern "C" { +#endif + int AESACC_supported(void); void AESACC_ECB_encrypt(const uint8_t* input, const uint8_t* key, uint8_t *output, const uint32_t length); void AESACC_ECB_decrypt(const uint8_t* input, const uint8_t* key, uint8_t *output, const uint32_t length); void AESACC_CBC_encrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv); void AESACC_CBC_decrypt_buffer(uint8_t* output, uint8_t* input, uint32_t length, const uint8_t* key, const uint8_t* iv); +#ifdef __cplusplus +} +#endif + #endif /* _AESACC_H_ */ diff --git a/lib/asm/arm.S b/lib/asm/arm.S new file mode 100644 index 0000000..fd752ba --- /dev/null +++ b/lib/asm/arm.S @@ -0,0 +1,1194 @@ +@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. +@ +@ Licensed under the OpenSSL license (the "License"). You may not use +@ this file except in compliance with the License. You can obtain a copy +@ in the file LICENSE in the source distribution or at +@ https://www.openssl.org/source/license.html + + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ ==================================================================== + +@ AES for ARMv4 + +@ January 2007. +@ +@ Code uses single 1K S-box and is >2 times faster than code generated +@ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which +@ allows to merge logical or arithmetic operation with shift or rotate +@ in one instruction and emit combined result every cycle. The module +@ is endian-neutral. The performance is ~42 cycles/byte for 128-bit +@ key [on single-issue Xscale PXA250 core]. + +@ May 2007. +@ +@ AES_set_[en|de]crypt_key is added. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 12% improvement on +@ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~21.5 cycles per byte. + +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +#endif + +.text +#if defined(__thumb2__) && !defined(__APPLE__) +.syntax unified +.thumb +#else +.code 32 +#undef __thumb2__ +#endif + + +.align 5 +AES_Te: +.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d +.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554 +.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d +.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a +.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87 +.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b +.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea +.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b +.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a +.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f +.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108 +.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f +.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e +.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5 +.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d +.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f +.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e +.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb +.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce +.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497 +.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c +.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed +.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b +.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a +.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16 +.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594 +.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81 +.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3 +.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a +.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504 +.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163 +.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d +.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f +.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739 +.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47 +.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395 +.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f +.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883 +.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c +.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76 +.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e +.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4 +.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6 +.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b +.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7 +.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0 +.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25 +.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818 +.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72 +.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651 +.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21 +.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85 +.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa +.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12 +.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0 +.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9 +.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133 +.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7 +.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920 +.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a +.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17 +.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8 +.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11 +.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a +@ Te4[256] +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +@ rcon[] +.word 0x01000000, 0x02000000, 0x04000000, 0x08000000 +.word 0x10000000, 0x20000000, 0x40000000, 0x80000000 +.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 + + +@ void AES_encrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.globl AES_encrypt + +.align 5 +AES_encrypt: +#ifndef __thumb2__ + sub r3,pc,#8 @ AES_encrypt +#else + adr r3,AES_encrypt +#endif + stmdb sp!,{r1,r4-r12,lr} +#ifdef __APPLE__ + adr r10,AES_Te +#else + sub r10,r3,#AES_encrypt-AES_Te @ Te +#endif + mov r12,r0 @ inp + mov r11,r2 +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_encrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif + + + +.align 2 +_armv4_AES_encrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4,r5,r6,r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0 + and r8,lr,r0,lsr#8 + and r9,lr,r0,lsr#16 + mov r0,r0,lsr#24 +Lenc_loop: + ldr r4,[r10,r7,lsl#2] @ Te3[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldr r5,[r10,r8,lsl#2] @ Te2[s0>>8] + and r8,lr,r1 + ldr r6,[r10,r9,lsl#2] @ Te1[s0>>16] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Te0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Te1[s1>>16] + ldr r8,[r10,r8,lsl#2] @ Te3[s1>>0] + ldr r9,[r10,r9,lsl#2] @ Te2[s1>>8] + eor r0,r0,r7,ror#8 + ldr r1,[r10,r1,lsl#2] @ Te0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,ror#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r6,r9,ror#8 + and r9,lr,r2 + ldr r7,[r10,r7,lsl#2] @ Te2[s2>>8] + eor r1,r1,r4,ror#24 + ldr r8,[r10,r8,lsl#2] @ Te1[s2>>16] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te3[s2>>0] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Te0[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,ror#8 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r6,r9,ror#16 + and r9,lr,r3,lsr#16 @ i2 + ldr r7,[r10,r7,lsl#2] @ Te3[s3>>0] + eor r2,r2,r5,ror#16 + ldr r8,[r10,r8,lsl#2] @ Te2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Te1[s3>>16] + eor r0,r0,r7,ror#24 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Te0[s3>>24] + eor r2,r2,r9,ror#8 + ldr r4,[r11,#-12] + eor r3,r3,r6,ror#8 + + ldr r5,[r11,#-8] + eor r0,r0,r7 + ldr r6,[r11,#-4] + and r7,lr,r0 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0,lsr#16 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne Lenc_loop + + add r10,r10,#2 + + ldrb r4,[r10,r7,lsl#2] @ Te4[s0>>0] + and r7,lr,r1,lsr#16 @ i0 + ldrb r5,[r10,r8,lsl#2] @ Te4[s0>>8] + and r8,lr,r1 + ldrb r6,[r10,r9,lsl#2] @ Te4[s0>>16] + and r9,lr,r1,lsr#8 + ldrb r0,[r10,r0,lsl#2] @ Te4[s0>>24] + mov r1,r1,lsr#24 + + ldrb r7,[r10,r7,lsl#2] @ Te4[s1>>16] + ldrb r8,[r10,r8,lsl#2] @ Te4[s1>>0] + ldrb r9,[r10,r9,lsl#2] @ Te4[s1>>8] + eor r0,r7,r0,lsl#8 + ldrb r1,[r10,r1,lsl#2] @ Te4[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,lsl#8 + and r8,lr,r2,lsr#16 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s2>>8] + eor r1,r4,r1,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s2>>16] + mov r2,r2,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s2>>0] + eor r0,r7,r0,lsl#8 + ldrb r2,[r10,r2,lsl#2] @ Te4[s2>>24] + and r7,lr,r3 @ i0 + eor r1,r1,r8,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,lsl#8 + and r9,lr,r3,lsr#16 @ i2 + ldrb r7,[r10,r7,lsl#2] @ Te4[s3>>0] + eor r2,r5,r2,lsl#24 + ldrb r8,[r10,r8,lsl#2] @ Te4[s3>>8] + mov r3,r3,lsr#24 + + ldrb r9,[r10,r9,lsl#2] @ Te4[s3>>16] + eor r0,r7,r0,lsl#8 + ldr r7,[r11,#0] + ldrb r3,[r10,r3,lsl#2] @ Te4[s3>>24] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r2,r9,lsl#16 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#2 + ldr pc,[sp],#4 @ pop and return + + +.globl AES_set_encrypt_key + +.align 5 +AES_set_encrypt_key: +_armv4_AES_set_encrypt_key: +#ifndef __thumb2__ + sub r3,pc,#8 @ AES_set_encrypt_key +#else + adr r3,AES_set_encrypt_key +#endif + teq r0,#0 +#ifdef __thumb2__ + itt eq @ Thumb2 thing, sanity check in ARM +#endif + moveq r0,#-1 + beq Labrt + teq r2,#0 +#ifdef __thumb2__ + itt eq @ Thumb2 thing, sanity check in ARM +#endif + moveq r0,#-1 + beq Labrt + + teq r1,#128 + beq Lok + teq r1,#192 + beq Lok + teq r1,#256 +#ifdef __thumb2__ + itt ne @ Thumb2 thing, sanity check in ARM +#endif + movne r0,#-1 + bne Labrt + +Lok: stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + mov r12,r0 @ inp + mov lr,r1 @ bits + mov r11,r2 @ key + +#ifdef __APPLE__ + adr r10,AES_Te+1024 @ Te4 +#else + sub r10,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 +#endif + +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + str r0,[r11],#16 + orr r3,r3,r5,lsl#16 + str r1,[r11,#-12] + orr r3,r3,r6,lsl#24 + str r2,[r11,#-8] + str r3,[r11,#-4] +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r11],#16 + str r1,[r11,#-12] + str r2,[r11,#-8] + str r3,[r11,#-4] +#endif + + teq lr,#128 + bne Lnot128 + mov r12,#10 + str r12,[r11,#240-16] + add r6,r10,#256 @ rcon + mov lr,#255 + +L128_loop: + and r5,lr,r3,lsr#24 + and r7,lr,r3,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r3 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r5,r5,r4 + eor r0,r0,r5 @ rk[4]=rk[0]^... + eor r1,r1,r0 @ rk[5]=rk[1]^rk[4] + str r0,[r11],#16 + eor r2,r2,r1 @ rk[6]=rk[2]^rk[5] + str r1,[r11,#-12] + eor r3,r3,r2 @ rk[7]=rk[3]^rk[6] + str r2,[r11,#-8] + subs r12,r12,#1 + str r3,[r11,#-4] + bne L128_loop + sub r2,r11,#176 + b Ldone + +Lnot128: +#if __ARM_ARCH__<7 + ldrb r8,[r12,#19] + ldrb r4,[r12,#18] + ldrb r5,[r12,#17] + ldrb r6,[r12,#16] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#23] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#22] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#21] + ldrb r6,[r12,#20] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#16] + ldr r9,[r12,#20] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + teq lr,#192 + bne Lnot192 + mov r12,#12 + str r12,[r11,#240-24] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#8 + +L192_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[6]=rk[0]^... + eor r1,r1,r0 @ rk[7]=rk[1]^rk[6] + str r0,[r11],#24 + eor r2,r2,r1 @ rk[8]=rk[2]^rk[7] + str r1,[r11,#-20] + eor r3,r3,r2 @ rk[9]=rk[3]^rk[8] + str r2,[r11,#-16] + subs r12,r12,#1 + str r3,[r11,#-12] +#ifdef __thumb2__ + itt eq @ Thumb2 thing, sanity check in ARM +#endif + subeq r2,r11,#216 + beq Ldone + + ldr r7,[r11,#-32] + ldr r8,[r11,#-28] + eor r7,r7,r3 @ rk[10]=rk[4]^rk[9] + eor r9,r8,r7 @ rk[11]=rk[5]^rk[10] + str r7,[r11,#-8] + str r9,[r11,#-4] + b L192_loop + +Lnot192: +#if __ARM_ARCH__<7 + ldrb r8,[r12,#27] + ldrb r4,[r12,#26] + ldrb r5,[r12,#25] + ldrb r6,[r12,#24] + orr r8,r8,r4,lsl#8 + ldrb r9,[r12,#31] + orr r8,r8,r5,lsl#16 + ldrb r4,[r12,#30] + orr r8,r8,r6,lsl#24 + ldrb r5,[r12,#29] + ldrb r6,[r12,#28] + orr r9,r9,r4,lsl#8 + orr r9,r9,r5,lsl#16 + str r8,[r11],#8 + orr r9,r9,r6,lsl#24 + str r9,[r11,#-4] +#else + ldr r8,[r12,#24] + ldr r9,[r12,#28] +#ifdef __ARMEL__ + rev r8,r8 + rev r9,r9 +#endif + str r8,[r11],#8 + str r9,[r11,#-4] +#endif + + mov r12,#14 + str r12,[r11,#240-32] + add r6,r10,#256 @ rcon + mov lr,#255 + mov r12,#7 + +L256_loop: + and r5,lr,r9,lsr#24 + and r7,lr,r9,lsr#16 + ldrb r5,[r10,r5] + and r8,lr,r9,lsr#8 + ldrb r7,[r10,r7] + and r9,lr,r9 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#24 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r6],#4 @ rcon[i++] + orr r5,r5,r9,lsl#8 + eor r9,r5,r4 + eor r0,r0,r9 @ rk[8]=rk[0]^... + eor r1,r1,r0 @ rk[9]=rk[1]^rk[8] + str r0,[r11],#32 + eor r2,r2,r1 @ rk[10]=rk[2]^rk[9] + str r1,[r11,#-28] + eor r3,r3,r2 @ rk[11]=rk[3]^rk[10] + str r2,[r11,#-24] + subs r12,r12,#1 + str r3,[r11,#-20] +#ifdef __thumb2__ + itt eq @ Thumb2 thing, sanity check in ARM +#endif + subeq r2,r11,#256 + beq Ldone + + and r5,lr,r3 + and r7,lr,r3,lsr#8 + ldrb r5,[r10,r5] + and r8,lr,r3,lsr#16 + ldrb r7,[r10,r7] + and r9,lr,r3,lsr#24 + ldrb r8,[r10,r8] + orr r5,r5,r7,lsl#8 + ldrb r9,[r10,r9] + orr r5,r5,r8,lsl#16 + ldr r4,[r11,#-48] + orr r5,r5,r9,lsl#24 + + ldr r7,[r11,#-44] + ldr r8,[r11,#-40] + eor r4,r4,r5 @ rk[12]=rk[4]^... + ldr r9,[r11,#-36] + eor r7,r7,r4 @ rk[13]=rk[5]^rk[12] + str r4,[r11,#-16] + eor r8,r8,r7 @ rk[14]=rk[6]^rk[13] + str r7,[r11,#-12] + eor r9,r9,r8 @ rk[15]=rk[7]^rk[14] + str r8,[r11,#-8] + str r9,[r11,#-4] + b L256_loop + +.align 2 +Ldone: mov r0,#0 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} +Labrt: +#if __ARM_ARCH__>=5 + bx lr @ .word 0xe12fff1e +#else + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif + + +.globl AES_set_decrypt_key + +.align 5 +AES_set_decrypt_key: + str lr,[sp,#-4]! @ push lr + bl _armv4_AES_set_encrypt_key + teq r0,#0 + ldr lr,[sp],#4 @ pop lr + bne Labrt + + mov r0,r2 @ AES_set_encrypt_key preserves r2, + mov r1,r2 @ which is AES_KEY *key + b _armv4_AES_set_enc2dec_key + + +@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out) +.globl AES_set_enc2dec_key + +.align 5 +AES_set_enc2dec_key: +_armv4_AES_set_enc2dec_key: + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + + ldr r12,[r0,#240] + mov r7,r0 @ input + add r8,r0,r12,lsl#4 + mov r11,r1 @ output + add r10,r1,r12,lsl#4 + str r12,[r1,#240] + +Linv: ldr r0,[r7],#16 + ldr r1,[r7,#-12] + ldr r2,[r7,#-8] + ldr r3,[r7,#-4] + ldr r4,[r8],#-16 + ldr r5,[r8,#16+4] + ldr r6,[r8,#16+8] + ldr r9,[r8,#16+12] + str r0,[r10],#-16 + str r1,[r10,#16+4] + str r2,[r10,#16+8] + str r3,[r10,#16+12] + str r4,[r11],#16 + str r5,[r11,#-12] + str r6,[r11,#-8] + str r9,[r11,#-4] + teq r7,r8 + bne Linv + + ldr r0,[r7] + ldr r1,[r7,#4] + ldr r2,[r7,#8] + ldr r3,[r7,#12] + str r0,[r11] + str r1,[r11,#4] + str r2,[r11,#8] + str r3,[r11,#12] + sub r11,r11,r12,lsl#3 + ldr r0,[r11,#16]! @ prefetch tp1 + mov r7,#0x80 + mov r8,#0x1b + orr r7,r7,#0x8000 + orr r8,r8,#0x1b00 + orr r7,r7,r7,lsl#16 + orr r8,r8,r8,lsl#16 + sub r12,r12,#1 + mvn r9,r7 + mov r12,r12,lsl#2 @ (rounds-1)*4 + +Lmix: and r4,r0,r7 + and r1,r0,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r1,r4,r1,lsl#1 @ tp2 + + and r4,r1,r7 + and r2,r1,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r2,r4,r2,lsl#1 @ tp4 + + and r4,r2,r7 + and r3,r2,r9 + sub r4,r4,r4,lsr#7 + and r4,r4,r8 + eor r3,r4,r3,lsl#1 @ tp8 + + eor r4,r1,r2 + eor r5,r0,r3 @ tp9 + eor r4,r4,r3 @ tpe + eor r4,r4,r1,ror#24 + eor r4,r4,r5,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8) + eor r4,r4,r2,ror#16 + eor r4,r4,r5,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16) + eor r4,r4,r5,ror#8 @ ^= ROTATE(tp9,24) + + ldr r0,[r11,#4] @ prefetch tp1 + str r4,[r11],#4 + subs r12,r12,#1 + bne Lmix + + mov r0,#0 +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif + + + +.align 5 +AES_Td: +.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96 +.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393 +.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25 +.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f +.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1 +.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6 +.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da +.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844 +.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd +.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4 +.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45 +.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94 +.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7 +.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a +.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5 +.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c +.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1 +.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a +.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75 +.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051 +.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46 +.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff +.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77 +.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb +.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000 +.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e +.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927 +.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a +.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e +.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16 +.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d +.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8 +.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd +.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34 +.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163 +.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120 +.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d +.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0 +.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422 +.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef +.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36 +.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4 +.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662 +.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5 +.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3 +.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b +.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8 +.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6 +.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6 +.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0 +.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815 +.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f +.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df +.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f +.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e +.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713 +.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89 +.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c +.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf +.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86 +.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f +.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541 +.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190 +.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 +@ Td4[256] +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + + +@ void AES_decrypt(const unsigned char *in, unsigned char *out, +@ const AES_KEY *key) { +.globl AES_decrypt + +.align 5 +AES_decrypt: +#ifndef __thumb2__ + sub r3,pc,#8 @ AES_decrypt +#else + adr r3,AES_decrypt +#endif + stmdb sp!,{r1,r4-r12,lr} +#ifdef __APPLE__ + adr r10,AES_Td +#else + sub r10,r3,#AES_decrypt-AES_Td @ Td +#endif + mov r12,r0 @ inp + mov r11,r2 +#if __ARM_ARCH__<7 + ldrb r0,[r12,#3] @ load input data in endian-neutral + ldrb r4,[r12,#2] @ manner... + ldrb r5,[r12,#1] + ldrb r6,[r12,#0] + orr r0,r0,r4,lsl#8 + ldrb r1,[r12,#7] + orr r0,r0,r5,lsl#16 + ldrb r4,[r12,#6] + orr r0,r0,r6,lsl#24 + ldrb r5,[r12,#5] + ldrb r6,[r12,#4] + orr r1,r1,r4,lsl#8 + ldrb r2,[r12,#11] + orr r1,r1,r5,lsl#16 + ldrb r4,[r12,#10] + orr r1,r1,r6,lsl#24 + ldrb r5,[r12,#9] + ldrb r6,[r12,#8] + orr r2,r2,r4,lsl#8 + ldrb r3,[r12,#15] + orr r2,r2,r5,lsl#16 + ldrb r4,[r12,#14] + orr r2,r2,r6,lsl#24 + ldrb r5,[r12,#13] + ldrb r6,[r12,#12] + orr r3,r3,r4,lsl#8 + orr r3,r3,r5,lsl#16 + orr r3,r3,r6,lsl#24 +#else + ldr r0,[r12,#0] + ldr r1,[r12,#4] + ldr r2,[r12,#8] + ldr r3,[r12,#12] +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif +#endif + bl _armv4_AES_decrypt + + ldr r12,[sp],#4 @ pop out +#if __ARM_ARCH__>=7 +#ifdef __ARMEL__ + rev r0,r0 + rev r1,r1 + rev r2,r2 + rev r3,r3 +#endif + str r0,[r12,#0] + str r1,[r12,#4] + str r2,[r12,#8] + str r3,[r12,#12] +#else + mov r4,r0,lsr#24 @ write output in endian-neutral + mov r5,r0,lsr#16 @ manner... + mov r6,r0,lsr#8 + strb r4,[r12,#0] + strb r5,[r12,#1] + mov r4,r1,lsr#24 + strb r6,[r12,#2] + mov r5,r1,lsr#16 + strb r0,[r12,#3] + mov r6,r1,lsr#8 + strb r4,[r12,#4] + strb r5,[r12,#5] + mov r4,r2,lsr#24 + strb r6,[r12,#6] + mov r5,r2,lsr#16 + strb r1,[r12,#7] + mov r6,r2,lsr#8 + strb r4,[r12,#8] + strb r5,[r12,#9] + mov r4,r3,lsr#24 + strb r6,[r12,#10] + mov r5,r3,lsr#16 + strb r2,[r12,#11] + mov r6,r3,lsr#8 + strb r4,[r12,#12] + strb r5,[r12,#13] + strb r6,[r12,#14] + strb r3,[r12,#15] +#endif +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif + + + +.align 2 +_armv4_AES_decrypt: + str lr,[sp,#-4]! @ push lr + ldmia r11!,{r4,r5,r6,r7} + eor r0,r0,r4 + ldr r12,[r11,#240-16] + eor r1,r1,r5 + eor r2,r2,r6 + eor r3,r3,r7 + sub r12,r12,#1 + mov lr,#255 + + and r7,lr,r0,lsr#16 + and r8,lr,r0,lsr#8 + and r9,lr,r0 + mov r0,r0,lsr#24 +Ldec_loop: + ldr r4,[r10,r7,lsl#2] @ Td1[s0>>16] + and r7,lr,r1 @ i0 + ldr r5,[r10,r8,lsl#2] @ Td2[s0>>8] + and r8,lr,r1,lsr#16 + ldr r6,[r10,r9,lsl#2] @ Td3[s0>>0] + and r9,lr,r1,lsr#8 + ldr r0,[r10,r0,lsl#2] @ Td0[s0>>24] + mov r1,r1,lsr#24 + + ldr r7,[r10,r7,lsl#2] @ Td3[s1>>0] + ldr r8,[r10,r8,lsl#2] @ Td1[s1>>16] + ldr r9,[r10,r9,lsl#2] @ Td2[s1>>8] + eor r0,r0,r7,ror#24 + ldr r1,[r10,r1,lsl#2] @ Td0[s1>>24] + and r7,lr,r2,lsr#8 @ i0 + eor r5,r8,r5,ror#8 + and r8,lr,r2 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r2,lsr#16 + ldr r7,[r10,r7,lsl#2] @ Td2[s2>>8] + eor r1,r1,r4,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td3[s2>>0] + mov r2,r2,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td1[s2>>16] + eor r0,r0,r7,ror#16 + ldr r2,[r10,r2,lsl#2] @ Td0[s2>>24] + and r7,lr,r3,lsr#16 @ i0 + eor r1,r1,r8,ror#24 + and r8,lr,r3,lsr#8 @ i1 + eor r6,r9,r6,ror#8 + and r9,lr,r3 @ i2 + ldr r7,[r10,r7,lsl#2] @ Td1[s3>>16] + eor r2,r2,r5,ror#8 + ldr r8,[r10,r8,lsl#2] @ Td2[s3>>8] + mov r3,r3,lsr#24 + + ldr r9,[r10,r9,lsl#2] @ Td3[s3>>0] + eor r0,r0,r7,ror#8 + ldr r7,[r11],#16 + eor r1,r1,r8,ror#16 + ldr r3,[r10,r3,lsl#2] @ Td0[s3>>24] + eor r2,r2,r9,ror#24 + + ldr r4,[r11,#-12] + eor r0,r0,r7 + ldr r5,[r11,#-8] + eor r3,r3,r6,ror#8 + ldr r6,[r11,#-4] + and r7,lr,r0,lsr#16 + eor r1,r1,r4 + and r8,lr,r0,lsr#8 + eor r2,r2,r5 + and r9,lr,r0 + eor r3,r3,r6 + mov r0,r0,lsr#24 + + subs r12,r12,#1 + bne Ldec_loop + + add r10,r10,#1024 + + ldr r5,[r10,#0] @ prefetch Td4 + ldr r6,[r10,#32] + ldr r4,[r10,#64] + ldr r5,[r10,#96] + ldr r6,[r10,#128] + ldr r4,[r10,#160] + ldr r5,[r10,#192] + ldr r6,[r10,#224] + + ldrb r0,[r10,r0] @ Td4[s0>>24] + ldrb r4,[r10,r7] @ Td4[s0>>16] + and r7,lr,r1 @ i0 + ldrb r5,[r10,r8] @ Td4[s0>>8] + and r8,lr,r1,lsr#16 + ldrb r6,[r10,r9] @ Td4[s0>>0] + and r9,lr,r1,lsr#8 + + add r1,r10,r1,lsr#24 + ldrb r7,[r10,r7] @ Td4[s1>>0] + ldrb r1,[r1] @ Td4[s1>>24] + ldrb r8,[r10,r8] @ Td4[s1>>16] + eor r0,r7,r0,lsl#24 + ldrb r9,[r10,r9] @ Td4[s1>>8] + eor r1,r4,r1,lsl#8 + and r7,lr,r2,lsr#8 @ i0 + eor r5,r5,r8,lsl#8 + and r8,lr,r2 @ i1 + ldrb r7,[r10,r7] @ Td4[s2>>8] + eor r6,r6,r9,lsl#8 + ldrb r8,[r10,r8] @ Td4[s2>>0] + and r9,lr,r2,lsr#16 + + add r2,r10,r2,lsr#24 + ldrb r2,[r2] @ Td4[s2>>24] + eor r0,r0,r7,lsl#8 + ldrb r9,[r10,r9] @ Td4[s2>>16] + eor r1,r8,r1,lsl#16 + and r7,lr,r3,lsr#16 @ i0 + eor r2,r5,r2,lsl#16 + and r8,lr,r3,lsr#8 @ i1 + ldrb r7,[r10,r7] @ Td4[s3>>16] + eor r6,r6,r9,lsl#16 + ldrb r8,[r10,r8] @ Td4[s3>>8] + and r9,lr,r3 @ i2 + + add r3,r10,r3,lsr#24 + ldrb r9,[r10,r9] @ Td4[s3>>0] + ldrb r3,[r3] @ Td4[s3>>24] + eor r0,r0,r7,lsl#16 + ldr r7,[r11,#0] + eor r1,r1,r8,lsl#8 + ldr r4,[r11,#4] + eor r2,r9,r2,lsl#8 + ldr r5,[r11,#8] + eor r3,r6,r3,lsl#24 + ldr r6,[r11,#12] + + eor r0,r0,r7 + eor r1,r1,r4 + eor r2,r2,r5 + eor r3,r3,r6 + + sub r10,r10,#1024 + ldr pc,[sp],#4 @ pop and return + +.byte 65,69,83,32,102,111,114,32,65,82,77,118,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 diff --git a/lib/asm/arm_arch.h b/lib/asm/arm_arch.h new file mode 100644 index 0000000..3fc9e69 --- /dev/null +++ b/lib/asm/arm_arch.h @@ -0,0 +1,83 @@ +/* + * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the OpenSSL license (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +#ifndef __ARM_ARCH_H__ +# define __ARM_ARCH_H__ + +# if !defined(__ARM_ARCH__) +# if defined(__CC_ARM) +# define __ARM_ARCH__ __TARGET_ARCH_ARM +# if defined(__BIG_ENDIAN) +# define __ARMEB__ +# else +# define __ARMEL__ +# endif +# elif defined(__GNUC__) +# if defined(__aarch64__) +# define __ARM_ARCH__ 8 +# if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ +# define __ARMEB__ +# else +# define __ARMEL__ +# endif + /* + * Why doesn't gcc define __ARM_ARCH__? Instead it defines + * bunch of below macros. See all_architectires[] table in + * gcc/config/arm/arm.c. On a side note it defines + * __ARMEL__/__ARMEB__ for little-/big-endian. + */ +# elif defined(__ARM_ARCH) +# define __ARM_ARCH__ __ARM_ARCH +# elif defined(__ARM_ARCH_8A__) +# define __ARM_ARCH__ 8 +# elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__) || \ + defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH__ 7 +# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__)|| defined(__ARM_ARCH_6M__) || \ + defined(__ARM_ARCH_6Z__)|| defined(__ARM_ARCH_6ZK__) || \ + defined(__ARM_ARCH_6T2__) +# define __ARM_ARCH__ 6 +# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) || \ + defined(__ARM_ARCH_5E__)|| defined(__ARM_ARCH_5TE__) || \ + defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH__ 5 +# elif defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__) +# define __ARM_ARCH__ 4 +# else +# error "unsupported ARM architecture" +# endif +# endif +# endif + +# if !defined(__ARM_MAX_ARCH__) +# define __ARM_MAX_ARCH__ __ARM_ARCH__ +# endif + +# if __ARM_MAX_ARCH__<__ARM_ARCH__ +# error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__" +# elif __ARM_MAX_ARCH__!=__ARM_ARCH__ +# if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__) +# error "can't build universal big-endian binary" +# endif +# endif + +# if !__ASSEMBLER__ +extern unsigned int OPENSSL_armcap_P; +# endif + +# define ARMV7_NEON (1<<0) +# define ARMV7_TICK (1<<1) +# define ARMV8_AES (1<<2) +# define ARMV8_SHA1 (1<<3) +# define ARMV8_SHA256 (1<<4) +# define ARMV8_PMULL (1<<5) + +#endif diff --git a/lib/asm/mips.S b/lib/asm/mips.S new file mode 100644 index 0000000..4aa0e4c --- /dev/null +++ b/lib/asm/mips.S @@ -0,0 +1,1835 @@ +.text +#ifdef OPENSSL_FIPSCANISTER +# include +#endif + +#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2) +#define _MIPS_ARCH_MIPS32R2 +#endif + +#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) +.option pic2 +#endif +.set noat +.align 5 +.ent _mips_AES_encrypt +_mips_AES_encrypt: + .frame $29,0,$31 + .set reorder + lw $12,0($6) + lw $13,4($6) + lw $14,8($6) + lw $15,12($6) + lw $30,240($6) + add $3,$6,16 + + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + + sub $30,1 +#if defined(__mips_smartmips) + ext $1,$9,8,8 +.Loop_enc: + ext $2,$10,8,8 + ext $24,$11,8,8 + ext $25,$8,8,8 + lwxs $12,$1($7) # Te1[s1>>16] + ext $1,$10,16,8 + lwxs $13,$2($7) # Te1[s2>>16] + ext $2,$11,16,8 + lwxs $14,$24($7) # Te1[s3>>16] + ext $24,$8,16,8 + lwxs $15,$25($7) # Te1[s0>>16] + ext $25,$9,16,8 + + lwxs $16,$1($7) # Te2[s2>>8] + ext $1,$11,24,8 + lwxs $17,$2($7) # Te2[s3>>8] + ext $2,$8,24,8 + lwxs $18,$24($7) # Te2[s0>>8] + ext $24,$9,24,8 + lwxs $19,$25($7) # Te2[s1>>8] + ext $25,$10,24,8 + + lwxs $20,$1($7) # Te3[s3] + ext $1,$8,0,8 + lwxs $21,$2($7) # Te3[s0] + ext $2,$9,0,8 + lwxs $22,$24($7) # Te3[s1] + ext $24,$10,0,8 + lwxs $23,$25($7) # Te3[s2] + ext $25,$11,0,8 + + rotr $12,$12,24 + rotr $13,$13,24 + rotr $14,$14,24 + rotr $15,$15,24 + + rotr $16,$16,16 + rotr $17,$17,16 + rotr $18,$18,16 + rotr $19,$19,16 + + xor $12,$16 + lwxs $16,$1($7) # Te0[s0>>24] + xor $13,$17 + lwxs $17,$2($7) # Te0[s1>>24] + xor $14,$18 + lwxs $18,$24($7) # Te0[s2>>24] + xor $15,$19 + lwxs $19,$25($7) # Te0[s3>>24] + + rotr $20,$20,8 + lw $8,0($3) + rotr $21,$21,8 + lw $9,4($3) + rotr $22,$22,8 + lw $10,8($3) + rotr $23,$23,8 + lw $11,12($3) + + xor $12,$20 + xor $13,$21 + xor $14,$22 + xor $15,$23 + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + + sub $30,1 + add $3,16 + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + .set noreorder + bnez $30,.Loop_enc + ext $1,$9,8,8 + + srl $1,$9,6 +#else + srl $1,$9,6 +.Loop_enc: + srl $2,$10,6 + srl $24,$11,6 + srl $25,$8,6 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + lw $12,0($1) # Te1[s1>>16] + srl $1,$10,14 + lw $13,0($2) # Te1[s2>>16] + srl $2,$11,14 + lw $14,0($24) # Te1[s3>>16] + srl $24,$8,14 + lw $15,0($25) # Te1[s0>>16] + srl $25,$9,14 +#else + lwl $12,2($1) # Te1[s1>>16] + lwl $13,2($2) # Te1[s2>>16] + lwl $14,2($24) # Te1[s3>>16] + lwl $15,2($25) # Te1[s0>>16] + lwr $12,3($1) # Te1[s1>>16] + srl $1,$10,14 + lwr $13,3($2) # Te1[s2>>16] + srl $2,$11,14 + lwr $14,3($24) # Te1[s3>>16] + srl $24,$8,14 + lwr $15,3($25) # Te1[s0>>16] + srl $25,$9,14 +#endif + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + rotr $12,$12,24 + rotr $13,$13,24 + rotr $14,$14,24 + rotr $15,$15,24 +# if defined(_MIPSEL) + lw $16,0($1) # Te2[s2>>8] + srl $1,$11,22 + lw $17,0($2) # Te2[s3>>8] + srl $2,$8,22 + lw $18,0($24) # Te2[s0>>8] + srl $24,$9,22 + lw $19,0($25) # Te2[s1>>8] + srl $25,$10,22 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lw $20,0($1) # Te3[s3] + ins $1,$8,2,8 + lw $21,0($2) # Te3[s0] + ins $2,$9,2,8 + lw $22,0($24) # Te3[s1] + ins $24,$10,2,8 + lw $23,0($25) # Te3[s2] + ins $25,$11,2,8 +# else + lw $16,0($1) # Te2[s2>>8] + ins $1,$11,2,8 + lw $17,0($2) # Te2[s3>>8] + ins $2,$8,2,8 + lw $18,0($24) # Te2[s0>>8] + ins $24,$9,2,8 + lw $19,0($25) # Te2[s1>>8] + ins $25,$10,2,8 + + lw $20,0($1) # Te3[s3] + sll $1,$8,2 + lw $21,0($2) # Te3[s0] + sll $2,$9,2 + lw $22,0($24) # Te3[s1] + sll $24,$10,2 + lw $23,0($25) # Te3[s2] + sll $25,$11,2 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +# endif + rotr $16,$16,16 + rotr $17,$17,16 + rotr $18,$18,16 + rotr $19,$19,16 + + rotr $20,$20,8 + rotr $21,$21,8 + rotr $22,$22,8 + rotr $23,$23,8 +#else + lwl $16,1($1) # Te2[s2>>8] + lwl $17,1($2) # Te2[s3>>8] + lwl $18,1($24) # Te2[s0>>8] + lwl $19,1($25) # Te2[s1>>8] + lwr $16,2($1) # Te2[s2>>8] + srl $1,$11,22 + lwr $17,2($2) # Te2[s3>>8] + srl $2,$8,22 + lwr $18,2($24) # Te2[s0>>8] + srl $24,$9,22 + lwr $19,2($25) # Te2[s1>>8] + srl $25,$10,22 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lwl $20,0($1) # Te3[s3] + lwl $21,0($2) # Te3[s0] + lwl $22,0($24) # Te3[s1] + lwl $23,0($25) # Te3[s2] + lwr $20,1($1) # Te3[s3] + sll $1,$8,2 + lwr $21,1($2) # Te3[s0] + sll $2,$9,2 + lwr $22,1($24) # Te3[s1] + sll $24,$10,2 + lwr $23,1($25) # Te3[s2] + sll $25,$11,2 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +#endif + xor $12,$16 + lw $16,0($1) # Te0[s0>>24] + xor $13,$17 + lw $17,0($2) # Te0[s1>>24] + xor $14,$18 + lw $18,0($24) # Te0[s2>>24] + xor $15,$19 + lw $19,0($25) # Te0[s3>>24] + + xor $12,$20 + lw $8,0($3) + xor $13,$21 + lw $9,4($3) + xor $14,$22 + lw $10,8($3) + xor $15,$23 + lw $11,12($3) + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + + sub $30,1 + add $3,16 + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + .set noreorder + bnez $30,.Loop_enc + srl $1,$9,6 +#endif + + .set reorder + srl $2,$10,6 + srl $24,$11,6 + srl $25,$8,6 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $12,2($1) # Te4[s1>>16] + srl $1,$10,14 + lbu $13,2($2) # Te4[s2>>16] + srl $2,$11,14 + lbu $14,2($24) # Te4[s3>>16] + srl $24,$8,14 + lbu $15,2($25) # Te4[s0>>16] + srl $25,$9,14 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) +# if defined(_MIPSEL) + lbu $16,2($1) # Te4[s2>>8] + ins $1,$8,2,8 + lbu $17,2($2) # Te4[s3>>8] + ins $2,$9,2,8 + lbu $18,2($24) # Te4[s0>>8] + ins $24,$10,2,8 + lbu $19,2($25) # Te4[s1>>8] + ins $25,$11,2,8 + + lbu $20,2($1) # Te4[s0>>24] + srl $1,$11,22 + lbu $21,2($2) # Te4[s1>>24] + srl $2,$8,22 + lbu $22,2($24) # Te4[s2>>24] + srl $24,$9,22 + lbu $23,2($25) # Te4[s3>>24] + srl $25,$10,22 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +# else + lbu $16,2($1) # Te4[s2>>8] + sll $1,$8,2 + lbu $17,2($2) # Te4[s3>>8] + sll $2,$9,2 + lbu $18,2($24) # Te4[s0>>8] + sll $24,$10,2 + lbu $19,2($25) # Te4[s1>>8] + sll $25,$11,2 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $20,2($1) # Te4[s0>>24] + ins $1,$11,2,8 + lbu $21,2($2) # Te4[s1>>24] + ins $2,$8,2,8 + lbu $22,2($24) # Te4[s2>>24] + ins $24,$9,2,8 + lbu $23,2($25) # Te4[s3>>24] + ins $25,$10,2,8 +# endif + sll $12,$12,8 + sll $13,$13,8 + sll $14,$14,8 + sll $15,$15,8 + + ins $12,$16,16,8 + lbu $16,2($1) # Te4[s3] + ins $13,$17,16,8 + lbu $17,2($2) # Te4[s0] + ins $14,$18,16,8 + lbu $18,2($24) # Te4[s1] + ins $15,$19,16,8 + lbu $19,2($25) # Te4[s2] + + ins $12,$20,0,8 + lw $8,0($3) + ins $13,$21,0,8 + lw $9,4($3) + ins $14,$22,0,8 + lw $10,8($3) + ins $15,$23,0,8 + lw $11,12($3) + + ins $12,$16,24,8 + ins $13,$17,24,8 + ins $14,$18,24,8 + ins $15,$19,24,8 +#else + lbu $16,2($1) # Te4[s2>>8] + sll $1,$8,2 + lbu $17,2($2) # Te4[s3>>8] + sll $2,$9,2 + lbu $18,2($24) # Te4[s0>>8] + sll $24,$10,2 + lbu $19,2($25) # Te4[s1>>8] + sll $25,$11,2 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $20,2($1) # Te4[s0>>24] + srl $1,$11,22 + lbu $21,2($2) # Te4[s1>>24] + srl $2,$8,22 + lbu $22,2($24) # Te4[s2>>24] + srl $24,$9,22 + lbu $23,2($25) # Te4[s3>>24] + srl $25,$10,22 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + + sll $12,$12,8 + sll $13,$13,8 + sll $14,$14,8 + sll $15,$15,8 + + sll $16,$16,16 + sll $17,$17,16 + sll $18,$18,16 + sll $19,$19,16 + + xor $12,$16 + lbu $16,2($1) # Te4[s3] + xor $13,$17 + lbu $17,2($2) # Te4[s0] + xor $14,$18 + lbu $18,2($24) # Te4[s1] + xor $15,$19 + lbu $19,2($25) # Te4[s2] + + #sll $20,$20,0 + lw $8,0($3) + #sll $21,$21,0 + lw $9,4($3) + #sll $22,$22,0 + lw $10,8($3) + #sll $23,$23,0 + lw $11,12($3) + + xor $12,$20 + xor $13,$21 + xor $14,$22 + xor $15,$23 + + sll $16,$16,24 + sll $17,$17,24 + sll $18,$18,24 + sll $19,$19,24 + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 +#endif + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + + jr $31 +.end _mips_AES_encrypt + +.align 5 +.globl AES_encrypt +.ent AES_encrypt +AES_encrypt: + .frame $29,64,$31 + .mask 0xc0ff0000,-4 + .set noreorder + sub $29,64 + sw $31,64-1*4($29) + sw $30,64-2*4($29) + sw $23,64-3*4($29) + sw $22,64-4*4($29) + sw $21,64-5*4($29) + sw $20,64-6*4($29) + sw $19,64-7*4($29) + sw $18,64-8*4($29) + sw $17,64-9*4($29) + sw $16,64-10*4($29) + .cplocal $7 + .cpsetup $25,$0,AES_encrypt + .set reorder + la $7,AES_Te # PIC-ified 'load address' + + lwl $8,0+3($4) + lwl $9,4+3($4) + lwl $10,8+3($4) + lwl $11,12+3($4) + lwr $8,0+0($4) + lwr $9,4+0($4) + lwr $10,8+0($4) + lwr $11,12+0($4) + + bal _mips_AES_encrypt + + swr $8,0+0($5) + swr $9,4+0($5) + swr $10,8+0($5) + swr $11,12+0($5) + swl $8,0+3($5) + swl $9,4+3($5) + swl $10,8+3($5) + swl $11,12+3($5) + + .set noreorder + lw $31,64-1*4($29) + lw $30,64-2*4($29) + lw $23,64-3*4($29) + lw $22,64-4*4($29) + lw $21,64-5*4($29) + lw $20,64-6*4($29) + lw $19,64-7*4($29) + lw $18,64-8*4($29) + lw $17,64-9*4($29) + lw $16,64-10*4($29) + jr $31 + add $29,64 +.end AES_encrypt +.align 5 +.ent _mips_AES_decrypt +_mips_AES_decrypt: + .frame $29,0,$31 + .set reorder + lw $12,0($6) + lw $13,4($6) + lw $14,8($6) + lw $15,12($6) + lw $30,240($6) + add $3,$6,16 + + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + + sub $30,1 +#if defined(__mips_smartmips) + ext $1,$11,8,8 +.Loop_dec: + ext $2,$8,8,8 + ext $24,$9,8,8 + ext $25,$10,8,8 + lwxs $12,$1($7) # Td1[s3>>16] + ext $1,$10,16,8 + lwxs $13,$2($7) # Td1[s0>>16] + ext $2,$11,16,8 + lwxs $14,$24($7) # Td1[s1>>16] + ext $24,$8,16,8 + lwxs $15,$25($7) # Td1[s2>>16] + ext $25,$9,16,8 + + lwxs $16,$1($7) # Td2[s2>>8] + ext $1,$9,24,8 + lwxs $17,$2($7) # Td2[s3>>8] + ext $2,$10,24,8 + lwxs $18,$24($7) # Td2[s0>>8] + ext $24,$11,24,8 + lwxs $19,$25($7) # Td2[s1>>8] + ext $25,$8,24,8 + + lwxs $20,$1($7) # Td3[s1] + ext $1,$8,0,8 + lwxs $21,$2($7) # Td3[s2] + ext $2,$9,0,8 + lwxs $22,$24($7) # Td3[s3] + ext $24,$10,0,8 + lwxs $23,$25($7) # Td3[s0] + ext $25,$11,0,8 + + rotr $12,$12,24 + rotr $13,$13,24 + rotr $14,$14,24 + rotr $15,$15,24 + + rotr $16,$16,16 + rotr $17,$17,16 + rotr $18,$18,16 + rotr $19,$19,16 + + xor $12,$16 + lwxs $16,$1($7) # Td0[s0>>24] + xor $13,$17 + lwxs $17,$2($7) # Td0[s1>>24] + xor $14,$18 + lwxs $18,$24($7) # Td0[s2>>24] + xor $15,$19 + lwxs $19,$25($7) # Td0[s3>>24] + + rotr $20,$20,8 + lw $8,0($3) + rotr $21,$21,8 + lw $9,4($3) + rotr $22,$22,8 + lw $10,8($3) + rotr $23,$23,8 + lw $11,12($3) + + xor $12,$20 + xor $13,$21 + xor $14,$22 + xor $15,$23 + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + + sub $30,1 + add $3,16 + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + .set noreorder + bnez $30,.Loop_dec + ext $1,$11,8,8 + + srl $1,$11,6 +#else + srl $1,$11,6 +.Loop_dec: + srl $2,$8,6 + srl $24,$9,6 + srl $25,$10,6 + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + lw $12,0($1) # Td1[s3>>16] + srl $1,$10,14 + lw $13,0($2) # Td1[s0>>16] + srl $2,$11,14 + lw $14,0($24) # Td1[s1>>16] + srl $24,$8,14 + lw $15,0($25) # Td1[s2>>16] + srl $25,$9,14 +#else + lwl $12,2($1) # Td1[s3>>16] + lwl $13,2($2) # Td1[s0>>16] + lwl $14,2($24) # Td1[s1>>16] + lwl $15,2($25) # Td1[s2>>16] + lwr $12,3($1) # Td1[s3>>16] + srl $1,$10,14 + lwr $13,3($2) # Td1[s0>>16] + srl $2,$11,14 + lwr $14,3($24) # Td1[s1>>16] + srl $24,$8,14 + lwr $15,3($25) # Td1[s2>>16] + srl $25,$9,14 +#endif + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + rotr $12,$12,24 + rotr $13,$13,24 + rotr $14,$14,24 + rotr $15,$15,24 +# if defined(_MIPSEL) + lw $16,0($1) # Td2[s2>>8] + srl $1,$9,22 + lw $17,0($2) # Td2[s3>>8] + srl $2,$10,22 + lw $18,0($24) # Td2[s0>>8] + srl $24,$11,22 + lw $19,0($25) # Td2[s1>>8] + srl $25,$8,22 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lw $20,0($1) # Td3[s1] + ins $1,$8,2,8 + lw $21,0($2) # Td3[s2] + ins $2,$9,2,8 + lw $22,0($24) # Td3[s3] + ins $24,$10,2,8 + lw $23,0($25) # Td3[s0] + ins $25,$11,2,8 +#else + lw $16,0($1) # Td2[s2>>8] + ins $1,$9,2,8 + lw $17,0($2) # Td2[s3>>8] + ins $2,$10,2,8 + lw $18,0($24) # Td2[s0>>8] + ins $24,$11,2,8 + lw $19,0($25) # Td2[s1>>8] + ins $25,$8,2,8 + + lw $20,0($1) # Td3[s1] + sll $1,$8,2 + lw $21,0($2) # Td3[s2] + sll $2,$9,2 + lw $22,0($24) # Td3[s3] + sll $24,$10,2 + lw $23,0($25) # Td3[s0] + sll $25,$11,2 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +#endif + rotr $16,$16,16 + rotr $17,$17,16 + rotr $18,$18,16 + rotr $19,$19,16 + + rotr $20,$20,8 + rotr $21,$21,8 + rotr $22,$22,8 + rotr $23,$23,8 +#else + lwl $16,1($1) # Td2[s2>>8] + lwl $17,1($2) # Td2[s3>>8] + lwl $18,1($24) # Td2[s0>>8] + lwl $19,1($25) # Td2[s1>>8] + lwr $16,2($1) # Td2[s2>>8] + srl $1,$9,22 + lwr $17,2($2) # Td2[s3>>8] + srl $2,$10,22 + lwr $18,2($24) # Td2[s0>>8] + srl $24,$11,22 + lwr $19,2($25) # Td2[s1>>8] + srl $25,$8,22 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lwl $20,0($1) # Td3[s1] + lwl $21,0($2) # Td3[s2] + lwl $22,0($24) # Td3[s3] + lwl $23,0($25) # Td3[s0] + lwr $20,1($1) # Td3[s1] + sll $1,$8,2 + lwr $21,1($2) # Td3[s2] + sll $2,$9,2 + lwr $22,1($24) # Td3[s3] + sll $24,$10,2 + lwr $23,1($25) # Td3[s0] + sll $25,$11,2 + + and $1,0x3fc + and $2,0x3fc + and $24,0x3fc + and $25,0x3fc + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +#endif + + xor $12,$16 + lw $16,0($1) # Td0[s0>>24] + xor $13,$17 + lw $17,0($2) # Td0[s1>>24] + xor $14,$18 + lw $18,0($24) # Td0[s2>>24] + xor $15,$19 + lw $19,0($25) # Td0[s3>>24] + + xor $12,$20 + lw $8,0($3) + xor $13,$21 + lw $9,4($3) + xor $14,$22 + lw $10,8($3) + xor $15,$23 + lw $11,12($3) + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 + + sub $30,1 + add $3,16 + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + .set noreorder + bnez $30,.Loop_dec + srl $1,$11,6 +#endif + + .set reorder + lw $16,1024($7) # prefetch Td4 + srl $1,$11,8 + lw $17,1024+32($7) + srl $2,$8,8 + lw $18,1024+64($7) + srl $24,$9,8 + lw $19,1024+96($7) + srl $25,$10,8 + lw $20,1024+128($7) + and $1,0xff + lw $21,1024+160($7) + and $2,0xff + lw $22,1024+192($7) + and $24,0xff + lw $23,1024+224($7) + and $25,0xff + + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $12,1024($1) # Td4[s3>>16] + srl $1,$10,16 + lbu $13,1024($2) # Td4[s0>>16] + srl $2,$11,16 + lbu $14,1024($24) # Td4[s1>>16] + srl $24,$8,16 + lbu $15,1024($25) # Td4[s2>>16] + srl $25,$9,16 + + and $1,0xff + and $2,0xff + and $24,0xff + and $25,0xff + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) +# if defined(_MIPSEL) + lbu $16,1024($1) # Td4[s2>>8] + ins $1,$8,0,8 + lbu $17,1024($2) # Td4[s3>>8] + ins $2,$9,0,8 + lbu $18,1024($24) # Td4[s0>>8] + ins $24,$10,0,8 + lbu $19,1024($25) # Td4[s1>>8] + ins $25,$11,0,8 + + lbu $20,1024($1) # Td4[s0>>24] + srl $1,$9,24 + lbu $21,1024($2) # Td4[s1>>24] + srl $2,$10,24 + lbu $22,1024($24) # Td4[s2>>24] + srl $24,$11,24 + lbu $23,1024($25) # Td4[s3>>24] + srl $25,$8,24 + + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 +# else + lbu $16,1024($1) # Td4[s2>>8] + and $1,$8,0xff + lbu $17,1024($2) # Td4[s3>>8] + and $2,$9,0xff + lbu $18,1024($24) # Td4[s0>>8] + and $24,$10,0xff + lbu $19,1024($25) # Td4[s1>>8] + and $25,$11,0xff + + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $20,1024($1) # Td4[s0>>24] + ins $1,$9,0,8 + lbu $21,1024($2) # Td4[s1>>24] + ins $2,$10,0,8 + lbu $22,1024($24) # Td4[s2>>24] + ins $24,$11,0,8 + lbu $23,1024($25) # Td4[s3>>24] + ins $25,$8,0,8 +# endif + sll $12,$12,8 + sll $13,$13,8 + sll $14,$14,8 + sll $15,$15,8 + + ins $12,$16,16,8 + lbu $16,1024($1) # Td4[s1] + ins $13,$17,16,8 + lbu $17,1024($2) # Td4[s2] + ins $14,$18,16,8 + lbu $18,1024($24) # Td4[s3] + ins $15,$19,16,8 + lbu $19,1024($25) # Td4[s0] + + ins $12,$20,0,8 + lw $8,0($3) + ins $13,$21,0,8 + lw $9,4($3) + ins $14,$22,0,8 + lw $10,8($3) + ins $15,$23,0,8 + lw $11,12($3) + + ins $12,$16,24,8 + ins $13,$17,24,8 + ins $14,$18,24,8 + ins $15,$19,24,8 +#else + lbu $16,1024($1) # Td4[s2>>8] + and $1,$8,0xff + lbu $17,1024($2) # Td4[s3>>8] + and $2,$9,0xff + lbu $18,1024($24) # Td4[s0>>8] + and $24,$10,0xff + lbu $19,1024($25) # Td4[s1>>8] + and $25,$11,0xff + + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $20,1024($1) # Td4[s0>>24] + srl $1,$9,24 + lbu $21,1024($2) # Td4[s1>>24] + srl $2,$10,24 + lbu $22,1024($24) # Td4[s2>>24] + srl $24,$11,24 + lbu $23,1024($25) # Td4[s3>>24] + srl $25,$8,24 + + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + + sll $12,$12,8 + sll $13,$13,8 + sll $14,$14,8 + sll $15,$15,8 + + sll $16,$16,16 + sll $17,$17,16 + sll $18,$18,16 + sll $19,$19,16 + + xor $12,$16 + lbu $16,1024($1) # Td4[s1] + xor $13,$17 + lbu $17,1024($2) # Td4[s2] + xor $14,$18 + lbu $18,1024($24) # Td4[s3] + xor $15,$19 + lbu $19,1024($25) # Td4[s0] + + #sll $20,$20,0 + lw $8,0($3) + #sll $21,$21,0 + lw $9,4($3) + #sll $22,$22,0 + lw $10,8($3) + #sll $23,$23,0 + lw $11,12($3) + + xor $12,$20 + xor $13,$21 + xor $14,$22 + xor $15,$23 + + sll $16,$16,24 + sll $17,$17,24 + sll $18,$18,24 + sll $19,$19,24 + + xor $12,$16 + xor $13,$17 + xor $14,$18 + xor $15,$19 +#endif + + xor $8,$12 + xor $9,$13 + xor $10,$14 + xor $11,$15 + + jr $31 +.end _mips_AES_decrypt + +.align 5 +.globl AES_decrypt +.ent AES_decrypt +AES_decrypt: + .frame $29,64,$31 + .mask 0xc0ff0000,-4 + .set noreorder + sub $29,64 + sw $31,64-1*4($29) + sw $30,64-2*4($29) + sw $23,64-3*4($29) + sw $22,64-4*4($29) + sw $21,64-5*4($29) + sw $20,64-6*4($29) + sw $19,64-7*4($29) + sw $18,64-8*4($29) + sw $17,64-9*4($29) + sw $16,64-10*4($29) + .cplocal $7 + .cpsetup $25,$0,AES_decrypt + .set reorder + la $7,AES_Td # PIC-ified 'load address' + + lwl $8,0+3($4) + lwl $9,4+3($4) + lwl $10,8+3($4) + lwl $11,12+3($4) + lwr $8,0+0($4) + lwr $9,4+0($4) + lwr $10,8+0($4) + lwr $11,12+0($4) + + bal _mips_AES_decrypt + + swr $8,0+0($5) + swr $9,4+0($5) + swr $10,8+0($5) + swr $11,12+0($5) + swl $8,0+3($5) + swl $9,4+3($5) + swl $10,8+3($5) + swl $11,12+3($5) + + .set noreorder + lw $31,64-1*4($29) + lw $30,64-2*4($29) + lw $23,64-3*4($29) + lw $22,64-4*4($29) + lw $21,64-5*4($29) + lw $20,64-6*4($29) + lw $19,64-7*4($29) + lw $18,64-8*4($29) + lw $17,64-9*4($29) + lw $16,64-10*4($29) + jr $31 + add $29,64 +.end AES_decrypt +.align 5 +.ent _mips_AES_set_encrypt_key +_mips_AES_set_encrypt_key: + .frame $29,0,$31 + .set noreorder + beqz $4,.Lekey_done + li $2,-1 + beqz $6,.Lekey_done + add $3,$7,256 + + .set reorder + lwl $8,0+3($4) # load 128 bits + lwl $9,4+3($4) + lwl $10,8+3($4) + lwl $11,12+3($4) + li $1,128 + lwr $8,0+0($4) + lwr $9,4+0($4) + lwr $10,8+0($4) + lwr $11,12+0($4) + .set noreorder + beq $5,$1,.L128bits + li $30,10 + + .set reorder + lwl $12,16+3($4) # load 192 bits + lwl $13,20+3($4) + li $1,192 + lwr $12,16+0($4) + lwr $13,20+0($4) + .set noreorder + beq $5,$1,.L192bits + li $30,8 + + .set reorder + lwl $14,24+3($4) # load 256 bits + lwl $15,28+3($4) + li $1,256 + lwr $14,24+0($4) + lwr $15,28+0($4) + .set noreorder + beq $5,$1,.L256bits + li $30,7 + + b .Lekey_done + li $2,-2 + +.align 4 +.L128bits: + .set reorder + srl $1,$11,16 + srl $2,$11,8 + and $1,0xff + and $2,0xff + and $24,$11,0xff + srl $25,$11,24 + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $1,0($1) + lbu $2,0($2) + lbu $24,0($24) + lbu $25,0($25) + + sw $8,0($6) + sw $9,4($6) + sw $10,8($6) + sw $11,12($6) + sub $30,1 + add $6,16 + + sll $1,$1,8 + #sll $2,$2,0 + sll $24,$24,24 + sll $25,$25,16 + + xor $8,$1 + lw $1,0($3) + xor $8,$2 + xor $8,$24 + xor $8,$25 + xor $8,$1 + + xor $9,$8 + xor $10,$9 + xor $11,$10 + + .set noreorder + bnez $30,.L128bits + add $3,4 + + sw $8,0($6) + sw $9,4($6) + sw $10,8($6) + li $30,10 + sw $11,12($6) + li $2,0 + sw $30,80($6) + b .Lekey_done + sub $6,10*16 + +.align 4 +.L192bits: + .set reorder + srl $1,$13,16 + srl $2,$13,8 + and $1,0xff + and $2,0xff + and $24,$13,0xff + srl $25,$13,24 + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $1,0($1) + lbu $2,0($2) + lbu $24,0($24) + lbu $25,0($25) + + sw $8,0($6) + sw $9,4($6) + sw $10,8($6) + sw $11,12($6) + sw $12,16($6) + sw $13,20($6) + sub $30,1 + add $6,24 + + sll $1,$1,8 + #sll $2,$2,0 + sll $24,$24,24 + sll $25,$25,16 + + xor $8,$1 + lw $1,0($3) + xor $8,$2 + xor $8,$24 + xor $8,$25 + xor $8,$1 + + xor $9,$8 + xor $10,$9 + xor $11,$10 + xor $12,$11 + xor $13,$12 + + .set noreorder + bnez $30,.L192bits + add $3,4 + + sw $8,0($6) + sw $9,4($6) + sw $10,8($6) + li $30,12 + sw $11,12($6) + li $2,0 + sw $30,48($6) + b .Lekey_done + sub $6,12*16 + +.align 4 +.L256bits: + .set reorder + srl $1,$15,16 + srl $2,$15,8 + and $1,0xff + and $2,0xff + and $24,$15,0xff + srl $25,$15,24 + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $1,0($1) + lbu $2,0($2) + lbu $24,0($24) + lbu $25,0($25) + + sw $8,0($6) + sw $9,4($6) + sw $10,8($6) + sw $11,12($6) + sw $12,16($6) + sw $13,20($6) + sw $14,24($6) + sw $15,28($6) + sub $30,1 + + sll $1,$1,8 + #sll $2,$2,0 + sll $24,$24,24 + sll $25,$25,16 + + xor $8,$1 + lw $1,0($3) + xor $8,$2 + xor $8,$24 + xor $8,$25 + xor $8,$1 + + xor $9,$8 + xor $10,$9 + xor $11,$10 + beqz $30,.L256bits_done + + srl $1,$11,24 + srl $2,$11,16 + srl $24,$11,8 + and $25,$11,0xff + and $2,0xff + and $24,0xff + add $1,$7 + add $2,$7 + add $24,$7 + add $25,$7 + lbu $1,0($1) + lbu $2,0($2) + lbu $24,0($24) + lbu $25,0($25) + sll $1,24 + sll $2,16 + sll $24,8 + + xor $12,$1 + xor $12,$2 + xor $12,$24 + xor $12,$25 + + xor $13,$12 + xor $14,$13 + xor $15,$14 + + add $6,32 + .set noreorder + b .L256bits + add $3,4 + +.L256bits_done: + sw $8,32($6) + sw $9,36($6) + sw $10,40($6) + li $30,14 + sw $11,44($6) + li $2,0 + sw $30,48($6) + sub $6,12*16 + +.Lekey_done: + jr $31 + nop +.end _mips_AES_set_encrypt_key + +.globl AES_set_encrypt_key +.ent AES_set_encrypt_key +AES_set_encrypt_key: + .frame $29,32,$31 + .mask 0xc0000000,-4 + .set noreorder + sub $29,32 + sw $31,32-1*4($29) + sw $30,32-2*4($29) + .cplocal $7 + .cpsetup $25,$0,AES_set_encrypt_key + .set reorder + la $7,AES_Te4 # PIC-ified 'load address' + + bal _mips_AES_set_encrypt_key + + .set noreorder + move $4,$2 + lw $31,32-1*4($29) + lw $30,32-2*4($29) + jr $31 + add $29,32 +.end AES_set_encrypt_key +.align 5 +.globl AES_set_decrypt_key +.ent AES_set_decrypt_key +AES_set_decrypt_key: + .frame $29,32,$31 + .mask 0xc0000000,-4 + .set noreorder + sub $29,32 + sw $31,32-1*4($29) + sw $30,32-2*4($29) + .cplocal $7 + .cpsetup $25,$0,AES_set_decrypt_key + .set reorder + la $7,AES_Te4 # PIC-ified 'load address' + + bal _mips_AES_set_encrypt_key + + bltz $2,.Ldkey_done + + sll $1,$30,4 + add $4,$6,0 + add $5,$6,$1 +.align 4 +.Lswap: + lw $8,0($4) + lw $9,4($4) + lw $10,8($4) + lw $11,12($4) + lw $12,0($5) + lw $13,4($5) + lw $14,8($5) + lw $15,12($5) + sw $8,0($5) + sw $9,4($5) + sw $10,8($5) + sw $11,12($5) + add $4,16 + sub $5,16 + sw $12,-16($4) + sw $13,-12($4) + sw $14,-8($4) + sw $15,-4($4) + bne $4,$5,.Lswap + + lw $8,16($6) # modulo-scheduled + lui $2,0x8080 + sub $30,1 + or $2,0x8080 + sll $30,2 + add $6,16 + lui $25,0x1b1b + nor $24,$0,$2 + or $25,0x1b1b +.align 4 +.Lmix: + and $1,$8,$2 + and $9,$8,$24 + srl $10,$1,7 + addu $9,$9 # tp2<<1 + subu $1,$10 + and $1,$25 + xor $9,$1 + + and $1,$9,$2 + and $10,$9,$24 + srl $11,$1,7 + addu $10,$10 # tp4<<1 + subu $1,$11 + and $1,$25 + xor $10,$1 + + and $1,$10,$2 + and $11,$10,$24 + srl $12,$1,7 + addu $11,$11 # tp8<<1 + subu $1,$12 + and $1,$25 + xor $11,$1 + + xor $12,$11,$8 + xor $15,$11,$10 + xor $13,$12,$9 + xor $14,$12,$10 + +#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2) + rotr $8,$14,16 + xor $15,$9 + rotr $9,$12,24 + xor $15,$8 + rotr $10,$13,8 + xor $15,$9 + lw $8,4($6) # modulo-scheduled + xor $15,$10 +#else + sll $8,$14,16 + xor $15,$9 + srl $9,$14,16 + xor $15,$8 + sll $8,$12,8 + xor $15,$9 + srl $9,$12,24 + xor $15,$8 + sll $8,$13,24 + xor $15,$9 + srl $9,$13,8 + xor $15,$8 + lw $8,4($6) # modulo-scheduled + xor $15,$9 +#endif + sub $30,1 + sw $15,0($6) + add $6,4 + bnez $30,.Lmix + + li $2,0 +.Ldkey_done: + .set noreorder + move $4,$2 + lw $31,32-1*4($29) + lw $30,32-2*4($29) + jr $31 + add $29,32 +.end AES_set_decrypt_key +.rdata +.align 10 +AES_Te: +.byte 0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84 # Te0 +.byte 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d +.byte 0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd +.byte 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54 +.byte 0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03 +.byte 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d +.byte 0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62 +.byte 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a +.byte 0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d +.byte 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87 +.byte 0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb +.byte 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b +.byte 0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67 +.byte 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea +.byte 0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7 +.byte 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b +.byte 0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c +.byte 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a +.byte 0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41 +.byte 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f +.byte 0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4 +.byte 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08 +.byte 0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73 +.byte 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f +.byte 0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52 +.byte 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e +.byte 0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1 +.byte 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5 +.byte 0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36 +.byte 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d +.byte 0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69 +.byte 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f +.byte 0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e +.byte 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e +.byte 0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2 +.byte 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb +.byte 0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d +.byte 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce +.byte 0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e +.byte 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97 +.byte 0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68 +.byte 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c +.byte 0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f +.byte 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed +.byte 0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46 +.byte 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b +.byte 0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4 +.byte 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a +.byte 0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a +.byte 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16 +.byte 0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7 +.byte 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94 +.byte 0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10 +.byte 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81 +.byte 0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44 +.byte 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3 +.byte 0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe +.byte 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a +.byte 0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc +.byte 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04 +.byte 0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1 +.byte 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63 +.byte 0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a +.byte 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d +.byte 0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14 +.byte 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f +.byte 0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2 +.byte 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39 +.byte 0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2 +.byte 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47 +.byte 0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7 +.byte 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95 +.byte 0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98 +.byte 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f +.byte 0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e +.byte 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83 +.byte 0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29 +.byte 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c +.byte 0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2 +.byte 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76 +.byte 0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56 +.byte 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e +.byte 0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a +.byte 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4 +.byte 0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e +.byte 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6 +.byte 0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4 +.byte 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b +.byte 0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43 +.byte 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7 +.byte 0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64 +.byte 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0 +.byte 0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa +.byte 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25 +.byte 0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e +.byte 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18 +.byte 0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88 +.byte 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72 +.byte 0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1 +.byte 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51 +.byte 0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c +.byte 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21 +.byte 0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc +.byte 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85 +.byte 0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42 +.byte 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa +.byte 0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05 +.byte 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12 +.byte 0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f +.byte 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0 +.byte 0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58 +.byte 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9 +.byte 0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13 +.byte 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33 +.byte 0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70 +.byte 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7 +.byte 0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22 +.byte 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20 +.byte 0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff +.byte 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a +.byte 0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8 +.byte 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17 +.byte 0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31 +.byte 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8 +.byte 0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0 +.byte 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11 +.byte 0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc +.byte 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a + +AES_Td: +.byte 0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53 # Td0 +.byte 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96 +.byte 0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1 +.byte 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93 +.byte 0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6 +.byte 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25 +.byte 0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7 +.byte 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f +.byte 0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67 +.byte 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1 +.byte 0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12 +.byte 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6 +.byte 0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95 +.byte 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda +.byte 0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3 +.byte 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44 +.byte 0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78 +.byte 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd +.byte 0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17 +.byte 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4 +.byte 0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82 +.byte 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45 +.byte 0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84 +.byte 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94 +.byte 0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19 +.byte 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7 +.byte 0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2 +.byte 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a +.byte 0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03 +.byte 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5 +.byte 0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2 +.byte 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c +.byte 0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92 +.byte 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1 +.byte 0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5 +.byte 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a +.byte 0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0 +.byte 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75 +.byte 0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa +.byte 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51 +.byte 0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d +.byte 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46 +.byte 0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05 +.byte 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff +.byte 0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97 +.byte 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77 +.byte 0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88 +.byte 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb +.byte 0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9 +.byte 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00 +.byte 0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48 +.byte 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e +.byte 0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56 +.byte 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27 +.byte 0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21 +.byte 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a +.byte 0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f +.byte 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e +.byte 0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2 +.byte 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16 +.byte 0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5 +.byte 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d +.byte 0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad +.byte 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8 +.byte 0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c +.byte 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd +.byte 0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc +.byte 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34 +.byte 0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc +.byte 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63 +.byte 0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10 +.byte 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20 +.byte 0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8 +.byte 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d +.byte 0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3 +.byte 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0 +.byte 0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99 +.byte 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22 +.byte 0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a +.byte 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef +.byte 0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1 +.byte 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36 +.byte 0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28 +.byte 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4 +.byte 0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d +.byte 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62 +.byte 0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8 +.byte 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5 +.byte 0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c +.byte 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3 +.byte 0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7 +.byte 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b +.byte 0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4 +.byte 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8 +.byte 0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e +.byte 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6 +.byte 0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce +.byte 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6 +.byte 0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31 +.byte 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0 +.byte 0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6 +.byte 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15 +.byte 0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7 +.byte 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f +.byte 0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d +.byte 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf +.byte 0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b +.byte 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f +.byte 0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d +.byte 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e +.byte 0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52 +.byte 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13 +.byte 0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a +.byte 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89 +.byte 0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35 +.byte 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c +.byte 0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f +.byte 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf +.byte 0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b +.byte 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86 +.byte 0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e +.byte 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f +.byte 0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c +.byte 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41 +.byte 0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde +.byte 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90 +.byte 0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70 +.byte 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42 + +.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 # Td4 +.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb +.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 +.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb +.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d +.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e +.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 +.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 +.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 +.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 +.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda +.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 +.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a +.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 +.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 +.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b +.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea +.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 +.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 +.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e +.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 +.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b +.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 +.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 +.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 +.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f +.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d +.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef +.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 +.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 +.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 +.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + +AES_Te4: +.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 # Te4 +.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 +.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 +.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 +.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc +.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 +.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a +.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 +.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 +.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 +.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b +.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf +.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 +.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 +.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 +.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 +.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 +.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 +.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 +.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb +.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c +.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 +.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 +.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 +.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 +.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a +.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e +.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e +.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 +.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf +.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 +.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + +.byte 0x01,0x00,0x00,0x00, 0x02,0x00,0x00,0x00 # rcon +.byte 0x04,0x00,0x00,0x00, 0x08,0x00,0x00,0x00 +.byte 0x10,0x00,0x00,0x00, 0x20,0x00,0x00,0x00 +.byte 0x40,0x00,0x00,0x00, 0x80,0x00,0x00,0x00 +.byte 0x1B,0x00,0x00,0x00, 0x36,0x00,0x00,0x00 diff --git a/lib/asm/x86.S b/lib/asm/x86.S new file mode 100644 index 0000000..2e89193 --- /dev/null +++ b/lib/asm/x86.S @@ -0,0 +1,3244 @@ +.file "aes-586.s" +.text +.type _x86_AES_encrypt_compact,@function +.align 16 +_x86_AES_encrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 16 +.L000loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %ecx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %ecx,%edi + xorl %esi,%ecx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%ecx + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%ecx + andl %edx,%ebp + leal (%edx,%edx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %edx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %edx,%edi + xorl %esi,%edx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%edx + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%edx + andl %eax,%ebp + leal (%eax,%eax,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %eax,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %eax,%edi + xorl %esi,%eax + rorl $24,%edi + xorl %ebp,%esi + roll $24,%eax + xorl %edi,%esi + movl $2155905152,%ebp + xorl %esi,%eax + andl %ebx,%ebp + leal (%ebx,%ebx,1),%edi + movl %ebp,%esi + shrl $7,%ebp + andl $4278124286,%edi + subl %ebp,%esi + movl %ebx,%ebp + andl $454761243,%esi + rorl $16,%ebp + xorl %edi,%esi + movl %ebx,%edi + xorl %esi,%ebx + rorl $24,%edi + xorl %ebp,%esi + roll $24,%ebx + xorl %edi,%esi + xorl %esi,%ebx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L000loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movzbl -128(%ebp,%esi,1),%esi + movzbl %ch,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ah,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $8,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movzbl -128(%ebp,%ecx,1),%ecx + shll $24,%ecx + xorl %ecx,%edx + movl %esi,%ecx + + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.size _x86_AES_encrypt_compact,.-_x86_AES_encrypt_compact +.type _sse_AES_encrypt_compact,@function +.align 16 +_sse_AES_encrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 16 +.L001loop: + pshufw $8,%mm0,%mm1 + pshufw $13,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi + movzbl %ah,%edx + pshufw $13,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi + movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $16,%esi + pshufw $8,%mm4,%mm6 + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $24,%esi + shrl $16,%ebx + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $8,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + movd %mm2,%eax + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + movd %mm6,%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $8,%esi + shrl $16,%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shrl $16,%eax + movd %ecx,%mm1 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %ah,%edi + shll $16,%ecx + andl $255,%eax + orl %esi,%ecx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + andl $255,%ebx + movzbl -128(%ebp,%eax,1),%eax + orl %esi,%ecx + shll $16,%eax + movzbl -128(%ebp,%edi,1),%esi + orl %eax,%edx + shll $8,%esi + movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + orl %ebx,%edx + movl 20(%esp),%edi + movd %ecx,%mm4 + movd %edx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja .L002out + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + movq %mm0,%mm1 + movq %mm4,%mm5 + pcmpgtb %mm0,%mm3 + pcmpgtb %mm4,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + pshufw $177,%mm0,%mm2 + pshufw $177,%mm4,%mm6 + paddb %mm0,%mm0 + paddb %mm4,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pshufw $177,%mm2,%mm3 + pshufw $177,%mm6,%mm7 + pxor %mm0,%mm1 + pxor %mm4,%mm5 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm3,%mm2 + movq %mm7,%mm6 + pslld $8,%mm3 + pslld $8,%mm7 + psrld $24,%mm2 + psrld $24,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + psrld $8,%mm1 + psrld $8,%mm5 + movl -128(%ebp),%eax + pslld $24,%mm3 + pslld $24,%mm7 + movl -64(%ebp),%ebx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl (%ebp),%ecx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl 64(%ebp),%edx + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp .L001loop +.align 16 +.L002out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.size _sse_AES_encrypt_compact,.-_sse_AES_encrypt_compact +.type _x86_AES_encrypt,@function +.align 16 +_x86_AES_encrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 16 +.L003loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl (%ebp,%esi,8),%esi + movzbl %ch,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movzbl %bh,%edi + xorl 1(%ebp,%edi,8),%esi + + movl 20(%esp),%edi + movl (%ebp,%edx,8),%edx + movzbl %ah,%eax + xorl 3(%ebp,%eax,8),%edx + movl 4(%esp),%eax + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + xorl 1(%ebp,%ecx,8),%edx + movl %esi,%ecx + + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L003loop + movl %eax,%esi + andl $255,%esi + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %bh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + shrl $16,%ebx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %ch,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $24,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + shrl $24,%ecx + movl 2(%ebp,%esi,8),%esi + andl $255,%esi + movzbl %dh,%edi + movl (%ebp,%edi,8),%edi + andl $65280,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edx + andl $255,%edi + movl (%ebp,%edi,8),%edi + andl $16711680,%edi + xorl %edi,%esi + movzbl %bh,%edi + movl 2(%ebp,%edi,8),%edi + andl $4278190080,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movl 2(%ebp,%edx,8),%edx + andl $255,%edx + movzbl %ah,%eax + movl (%ebp,%eax,8),%eax + andl $65280,%eax + xorl %eax,%edx + movl 4(%esp),%eax + andl $255,%ebx + movl (%ebp,%ebx,8),%ebx + andl $16711680,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + movl 2(%ebp,%ecx,8),%ecx + andl $4278190080,%ecx + xorl %ecx,%edx + movl %esi,%ecx + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 64 +.LAES_Te: +.long 2774754246,2774754246 +.long 2222750968,2222750968 +.long 2574743534,2574743534 +.long 2373680118,2373680118 +.long 234025727,234025727 +.long 3177933782,3177933782 +.long 2976870366,2976870366 +.long 1422247313,1422247313 +.long 1345335392,1345335392 +.long 50397442,50397442 +.long 2842126286,2842126286 +.long 2099981142,2099981142 +.long 436141799,436141799 +.long 1658312629,1658312629 +.long 3870010189,3870010189 +.long 2591454956,2591454956 +.long 1170918031,1170918031 +.long 2642575903,2642575903 +.long 1086966153,1086966153 +.long 2273148410,2273148410 +.long 368769775,368769775 +.long 3948501426,3948501426 +.long 3376891790,3376891790 +.long 200339707,200339707 +.long 3970805057,3970805057 +.long 1742001331,1742001331 +.long 4255294047,4255294047 +.long 3937382213,3937382213 +.long 3214711843,3214711843 +.long 4154762323,4154762323 +.long 2524082916,2524082916 +.long 1539358875,1539358875 +.long 3266819957,3266819957 +.long 486407649,486407649 +.long 2928907069,2928907069 +.long 1780885068,1780885068 +.long 1513502316,1513502316 +.long 1094664062,1094664062 +.long 49805301,49805301 +.long 1338821763,1338821763 +.long 1546925160,1546925160 +.long 4104496465,4104496465 +.long 887481809,887481809 +.long 150073849,150073849 +.long 2473685474,2473685474 +.long 1943591083,1943591083 +.long 1395732834,1395732834 +.long 1058346282,1058346282 +.long 201589768,201589768 +.long 1388824469,1388824469 +.long 1696801606,1696801606 +.long 1589887901,1589887901 +.long 672667696,672667696 +.long 2711000631,2711000631 +.long 251987210,251987210 +.long 3046808111,3046808111 +.long 151455502,151455502 +.long 907153956,907153956 +.long 2608889883,2608889883 +.long 1038279391,1038279391 +.long 652995533,652995533 +.long 1764173646,1764173646 +.long 3451040383,3451040383 +.long 2675275242,2675275242 +.long 453576978,453576978 +.long 2659418909,2659418909 +.long 1949051992,1949051992 +.long 773462580,773462580 +.long 756751158,756751158 +.long 2993581788,2993581788 +.long 3998898868,3998898868 +.long 4221608027,4221608027 +.long 4132590244,4132590244 +.long 1295727478,1295727478 +.long 1641469623,1641469623 +.long 3467883389,3467883389 +.long 2066295122,2066295122 +.long 1055122397,1055122397 +.long 1898917726,1898917726 +.long 2542044179,2542044179 +.long 4115878822,4115878822 +.long 1758581177,1758581177 +.long 0,0 +.long 753790401,753790401 +.long 1612718144,1612718144 +.long 536673507,536673507 +.long 3367088505,3367088505 +.long 3982187446,3982187446 +.long 3194645204,3194645204 +.long 1187761037,1187761037 +.long 3653156455,3653156455 +.long 1262041458,1262041458 +.long 3729410708,3729410708 +.long 3561770136,3561770136 +.long 3898103984,3898103984 +.long 1255133061,1255133061 +.long 1808847035,1808847035 +.long 720367557,720367557 +.long 3853167183,3853167183 +.long 385612781,385612781 +.long 3309519750,3309519750 +.long 3612167578,3612167578 +.long 1429418854,1429418854 +.long 2491778321,2491778321 +.long 3477423498,3477423498 +.long 284817897,284817897 +.long 100794884,100794884 +.long 2172616702,2172616702 +.long 4031795360,4031795360 +.long 1144798328,1144798328 +.long 3131023141,3131023141 +.long 3819481163,3819481163 +.long 4082192802,4082192802 +.long 4272137053,4272137053 +.long 3225436288,3225436288 +.long 2324664069,2324664069 +.long 2912064063,2912064063 +.long 3164445985,3164445985 +.long 1211644016,1211644016 +.long 83228145,83228145 +.long 3753688163,3753688163 +.long 3249976951,3249976951 +.long 1977277103,1977277103 +.long 1663115586,1663115586 +.long 806359072,806359072 +.long 452984805,452984805 +.long 250868733,250868733 +.long 1842533055,1842533055 +.long 1288555905,1288555905 +.long 336333848,336333848 +.long 890442534,890442534 +.long 804056259,804056259 +.long 3781124030,3781124030 +.long 2727843637,2727843637 +.long 3427026056,3427026056 +.long 957814574,957814574 +.long 1472513171,1472513171 +.long 4071073621,4071073621 +.long 2189328124,2189328124 +.long 1195195770,1195195770 +.long 2892260552,2892260552 +.long 3881655738,3881655738 +.long 723065138,723065138 +.long 2507371494,2507371494 +.long 2690670784,2690670784 +.long 2558624025,2558624025 +.long 3511635870,3511635870 +.long 2145180835,2145180835 +.long 1713513028,1713513028 +.long 2116692564,2116692564 +.long 2878378043,2878378043 +.long 2206763019,2206763019 +.long 3393603212,3393603212 +.long 703524551,703524551 +.long 3552098411,3552098411 +.long 1007948840,1007948840 +.long 2044649127,2044649127 +.long 3797835452,3797835452 +.long 487262998,487262998 +.long 1994120109,1994120109 +.long 1004593371,1004593371 +.long 1446130276,1446130276 +.long 1312438900,1312438900 +.long 503974420,503974420 +.long 3679013266,3679013266 +.long 168166924,168166924 +.long 1814307912,1814307912 +.long 3831258296,3831258296 +.long 1573044895,1573044895 +.long 1859376061,1859376061 +.long 4021070915,4021070915 +.long 2791465668,2791465668 +.long 2828112185,2828112185 +.long 2761266481,2761266481 +.long 937747667,937747667 +.long 2339994098,2339994098 +.long 854058965,854058965 +.long 1137232011,1137232011 +.long 1496790894,1496790894 +.long 3077402074,3077402074 +.long 2358086913,2358086913 +.long 1691735473,1691735473 +.long 3528347292,3528347292 +.long 3769215305,3769215305 +.long 3027004632,3027004632 +.long 4199962284,4199962284 +.long 133494003,133494003 +.long 636152527,636152527 +.long 2942657994,2942657994 +.long 2390391540,2390391540 +.long 3920539207,3920539207 +.long 403179536,403179536 +.long 3585784431,3585784431 +.long 2289596656,2289596656 +.long 1864705354,1864705354 +.long 1915629148,1915629148 +.long 605822008,605822008 +.long 4054230615,4054230615 +.long 3350508659,3350508659 +.long 1371981463,1371981463 +.long 602466507,602466507 +.long 2094914977,2094914977 +.long 2624877800,2624877800 +.long 555687742,555687742 +.long 3712699286,3712699286 +.long 3703422305,3703422305 +.long 2257292045,2257292045 +.long 2240449039,2240449039 +.long 2423288032,2423288032 +.long 1111375484,1111375484 +.long 3300242801,3300242801 +.long 2858837708,2858837708 +.long 3628615824,3628615824 +.long 84083462,84083462 +.long 32962295,32962295 +.long 302911004,302911004 +.long 2741068226,2741068226 +.long 1597322602,1597322602 +.long 4183250862,4183250862 +.long 3501832553,3501832553 +.long 2441512471,2441512471 +.long 1489093017,1489093017 +.long 656219450,656219450 +.long 3114180135,3114180135 +.long 954327513,954327513 +.long 335083755,335083755 +.long 3013122091,3013122091 +.long 856756514,856756514 +.long 3144247762,3144247762 +.long 1893325225,1893325225 +.long 2307821063,2307821063 +.long 2811532339,2811532339 +.long 3063651117,3063651117 +.long 572399164,572399164 +.long 2458355477,2458355477 +.long 552200649,552200649 +.long 1238290055,1238290055 +.long 4283782570,4283782570 +.long 2015897680,2015897680 +.long 2061492133,2061492133 +.long 2408352771,2408352771 +.long 4171342169,4171342169 +.long 2156497161,2156497161 +.long 386731290,386731290 +.long 3669999461,3669999461 +.long 837215959,837215959 +.long 3326231172,3326231172 +.long 3093850320,3093850320 +.long 3275833730,3275833730 +.long 2962856233,2962856233 +.long 1999449434,1999449434 +.long 286199582,286199582 +.long 3417354363,3417354363 +.long 4233385128,4233385128 +.long 3602627437,3602627437 +.long 974525996,974525996 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.byte 99,124,119,123,242,107,111,197 +.byte 48,1,103,43,254,215,171,118 +.byte 202,130,201,125,250,89,71,240 +.byte 173,212,162,175,156,164,114,192 +.byte 183,253,147,38,54,63,247,204 +.byte 52,165,229,241,113,216,49,21 +.byte 4,199,35,195,24,150,5,154 +.byte 7,18,128,226,235,39,178,117 +.byte 9,131,44,26,27,110,90,160 +.byte 82,59,214,179,41,227,47,132 +.byte 83,209,0,237,32,252,177,91 +.byte 106,203,190,57,74,76,88,207 +.byte 208,239,170,251,67,77,51,133 +.byte 69,249,2,127,80,60,159,168 +.byte 81,163,64,143,146,157,56,245 +.byte 188,182,218,33,16,255,243,210 +.byte 205,12,19,236,95,151,68,23 +.byte 196,167,126,61,100,93,25,115 +.byte 96,129,79,220,34,42,144,136 +.byte 70,238,184,20,222,94,11,219 +.byte 224,50,58,10,73,6,36,92 +.byte 194,211,172,98,145,149,228,121 +.byte 231,200,55,109,141,213,78,169 +.byte 108,86,244,234,101,122,174,8 +.byte 186,120,37,46,28,166,180,198 +.byte 232,221,116,31,75,189,139,138 +.byte 112,62,181,102,72,3,246,14 +.byte 97,53,87,185,134,193,29,158 +.byte 225,248,152,17,105,217,142,148 +.byte 155,30,135,233,206,85,40,223 +.byte 140,161,137,13,191,230,66,104 +.byte 65,153,45,15,176,84,187,22 +.long 1,2,4,8 +.long 16,32,64,128 +.long 27,54,0,0 +.long 0,0,0,0 +.size _x86_AES_encrypt,.-_x86_AES_encrypt +.globl AES_encrypt +.type AES_encrypt,@function +.align 16 +AES_encrypt: +.L_AES_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call .L004pic_point +.L004pic_point: + popl %ebp + leal OPENSSL_ia32cap_P,%eax + leal .LAES_Te-.L004pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc .L005x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call _sse_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L005x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call _x86_AES_encrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_encrypt,.-.L_AES_encrypt_begin +.type _x86_AES_decrypt_compact,@function +.align 16 +_x86_AES_decrypt_compact: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi +.align 16 +.L006loop: + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%eax + subl %edi,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %esi,%eax + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi + shrl $7,%edi + leal (%eax,%eax,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %ecx,%eax + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ecx,%ebx + roll $8,%ecx + xorl %esi,%ebp + xorl %eax,%ecx + xorl %ebp,%eax + xorl %ebx,%ecx + xorl %ebp,%ebx + roll $24,%eax + xorl %ebp,%ecx + roll $16,%ebx + xorl %eax,%ecx + roll $8,%ebp + xorl %ebx,%ecx + movl 4(%esp),%eax + xorl %ebp,%ecx + movl %ecx,12(%esp) + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebx + subl %edi,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %esi,%ebx + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %edx,%ebx + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %edx,%ecx + roll $8,%edx + xorl %esi,%ebp + xorl %ebx,%edx + xorl %ebp,%ebx + xorl %ecx,%edx + xorl %ebp,%ecx + roll $24,%ebx + xorl %ebp,%edx + roll $16,%ecx + xorl %ebx,%edx + roll $8,%ebp + xorl %ecx,%edx + movl 8(%esp),%ebx + xorl %ebp,%edx + movl %edx,16(%esp) + movl $2155905152,%edi + andl %eax,%edi + movl %edi,%esi + shrl $7,%edi + leal (%eax,%eax,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %eax,%ecx + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %eax,%edx + roll $8,%eax + xorl %esi,%ebp + xorl %ecx,%eax + xorl %ebp,%ecx + xorl %edx,%eax + xorl %ebp,%edx + roll $24,%ecx + xorl %ebp,%eax + roll $16,%edx + xorl %ecx,%eax + roll $8,%ebp + xorl %edx,%eax + xorl %ebp,%eax + movl $2155905152,%edi + andl %ebx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ebx,%ebx,1),%ecx + subl %edi,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%edi + andl %ecx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%ecx,%ecx,1),%edx + subl %edi,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %esi,%edx + movl $2155905152,%edi + andl %edx,%edi + movl %edi,%esi + shrl $7,%edi + leal (%edx,%edx,1),%ebp + subl %edi,%esi + andl $4278124286,%ebp + andl $454761243,%esi + xorl %ebx,%edx + roll $8,%ebx + xorl %esi,%ebp + xorl %ecx,%ebx + xorl %ebp,%ecx + xorl %edx,%ebx + xorl %ebp,%edx + roll $24,%ecx + xorl %ebp,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%ebp + xorl %edx,%ebx + movl 12(%esp),%ecx + xorl %ebp,%ebx + movl 16(%esp),%edx + movl 20(%esp),%edi + movl 28(%esp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L006loop + movl %eax,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl -128(%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl -128(%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl -128(%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl -128(%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl -128(%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl -128(%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 24(%edi),%ecx + xorl 28(%edi),%edx + ret +.size _x86_AES_decrypt_compact,.-_x86_AES_decrypt_compact +.type _sse_AES_decrypt_compact,@function +.align 16 +_sse_AES_decrypt_compact: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) + movl $454761243,%eax + movl %eax,8(%esp) + movl %eax,12(%esp) + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx +.align 16 +.L007loop: + pshufw $12,%mm0,%mm1 + pshufw $9,%mm4,%mm5 + movd %mm1,%eax + movd %mm5,%ebx + movl %edi,20(%esp) + movzbl %al,%esi + movzbl %ah,%edx + pshufw $6,%mm0,%mm2 + movzbl -128(%ebp,%esi,1),%ecx + movzbl %bl,%edi + movzbl -128(%ebp,%edx,1),%edx + shrl $16,%eax + shll $8,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $16,%esi + pshufw $3,%mm4,%mm6 + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $24,%esi + shrl $16,%ebx + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shll $24,%esi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movd %mm2,%eax + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + shll $16,%esi + movd %mm6,%ebx + movd %ecx,%mm0 + movzbl -128(%ebp,%edi,1),%ecx + movzbl %al,%edi + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bl,%edi + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %ah,%edi + shll $16,%esi + shrl $16,%eax + orl %esi,%edx + movzbl -128(%ebp,%edi,1),%esi + movzbl %bh,%edi + shrl $16,%ebx + shll $8,%esi + movd %edx,%mm1 + movzbl -128(%ebp,%edi,1),%edx + movzbl %bh,%edi + shll $24,%edx + andl $255,%ebx + orl %esi,%edx + punpckldq %mm1,%mm0 + movzbl -128(%ebp,%edi,1),%esi + movzbl %al,%edi + shll $8,%esi + movzbl %ah,%eax + movzbl -128(%ebp,%ebx,1),%ebx + orl %esi,%ecx + movzbl -128(%ebp,%edi,1),%esi + orl %ebx,%edx + shll $16,%esi + movzbl -128(%ebp,%eax,1),%eax + orl %esi,%edx + shll $24,%eax + orl %eax,%ecx + movl 20(%esp),%edi + movd %edx,%mm4 + movd %ecx,%mm5 + punpckldq %mm5,%mm4 + addl $16,%edi + cmpl 24(%esp),%edi + ja .L008out + movq %mm0,%mm3 + movq %mm4,%mm7 + pshufw $228,%mm0,%mm2 + pshufw $228,%mm4,%mm6 + movq %mm0,%mm1 + movq %mm4,%mm5 + pshufw $177,%mm0,%mm0 + pshufw $177,%mm4,%mm4 + pslld $8,%mm2 + pslld $8,%mm6 + psrld $8,%mm3 + psrld $8,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pslld $16,%mm2 + pslld $16,%mm6 + psrld $16,%mm3 + psrld $16,%mm7 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movq 8(%esp),%mm3 + pxor %mm2,%mm2 + pxor %mm6,%mm6 + pcmpgtb %mm1,%mm2 + pcmpgtb %mm5,%mm6 + pand %mm3,%mm2 + pand %mm3,%mm6 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm2,%mm1 + pxor %mm6,%mm5 + movq %mm1,%mm3 + movq %mm5,%mm7 + movq %mm1,%mm2 + movq %mm5,%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pslld $24,%mm3 + pslld $24,%mm7 + psrld $8,%mm2 + psrld $8,%mm6 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + movq 8(%esp),%mm2 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pshufw $177,%mm1,%mm3 + pshufw $177,%mm5,%mm7 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm3,%mm3 + pxor %mm7,%mm7 + pcmpgtb %mm1,%mm3 + pcmpgtb %mm5,%mm7 + pand %mm2,%mm3 + pand %mm2,%mm7 + paddb %mm1,%mm1 + paddb %mm5,%mm5 + pxor %mm3,%mm1 + pxor %mm7,%mm5 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movq %mm1,%mm3 + movq %mm5,%mm7 + pshufw $177,%mm1,%mm2 + pshufw $177,%mm5,%mm6 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + pslld $8,%mm1 + pslld $8,%mm5 + psrld $8,%mm3 + psrld $8,%mm7 + movq (%edi),%mm2 + movq 8(%edi),%mm6 + pxor %mm1,%mm0 + pxor %mm5,%mm4 + pxor %mm3,%mm0 + pxor %mm7,%mm4 + movl -128(%ebp),%eax + pslld $16,%mm1 + pslld $16,%mm5 + movl -64(%ebp),%ebx + psrld $16,%mm3 + psrld $16,%mm7 + movl (%ebp),%ecx + pxor %mm1,%mm0 + pxor %mm5,%mm4 + movl 64(%ebp),%edx + pxor %mm3,%mm0 + pxor %mm7,%mm4 + pxor %mm2,%mm0 + pxor %mm6,%mm4 + jmp .L007loop +.align 16 +.L008out: + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + ret +.size _sse_AES_decrypt_compact,.-_sse_AES_decrypt_compact +.type _x86_AES_decrypt,@function +.align 16 +_x86_AES_decrypt: + movl %edi,20(%esp) + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,24(%esp) +.align 16 +.L009loop: + movl %eax,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %dh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ebx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,4(%esp) + + movl %ebx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %ah,%edi + xorl 3(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %ecx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + movl %esi,8(%esp) + + movl %ecx,%esi + andl $255,%esi + movl (%ebp,%esi,8),%esi + movzbl %bh,%edi + xorl 3(%ebp,%edi,8),%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + xorl 2(%ebp,%edi,8),%esi + movl %edx,%edi + shrl $24,%edi + xorl 1(%ebp,%edi,8),%esi + + movl 20(%esp),%edi + andl $255,%edx + movl (%ebp,%edx,8),%edx + movzbl %ch,%ecx + xorl 3(%ebp,%ecx,8),%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + xorl 2(%ebp,%ebx,8),%edx + movl 8(%esp),%ebx + shrl $24,%eax + xorl 1(%ebp,%eax,8),%edx + movl 4(%esp),%eax + + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + cmpl 24(%esp),%edi + movl %edi,20(%esp) + jb .L009loop + leal 2176(%ebp),%ebp + movl -128(%ebp),%edi + movl -96(%ebp),%esi + movl -64(%ebp),%edi + movl -32(%ebp),%esi + movl (%ebp),%edi + movl 32(%ebp),%esi + movl 64(%ebp),%edi + movl 96(%ebp),%esi + leal -128(%ebp),%ebp + movl %eax,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %dh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ebx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,4(%esp) + movl %ebx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %ah,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %ecx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl %esi,8(%esp) + movl %ecx,%esi + andl $255,%esi + movzbl (%ebp,%esi,1),%esi + movzbl %bh,%edi + movzbl (%ebp,%edi,1),%edi + shll $8,%edi + xorl %edi,%esi + movl %eax,%edi + shrl $16,%edi + andl $255,%edi + movzbl (%ebp,%edi,1),%edi + shll $16,%edi + xorl %edi,%esi + movl %edx,%edi + shrl $24,%edi + movzbl (%ebp,%edi,1),%edi + shll $24,%edi + xorl %edi,%esi + movl 20(%esp),%edi + andl $255,%edx + movzbl (%ebp,%edx,1),%edx + movzbl %ch,%ecx + movzbl (%ebp,%ecx,1),%ecx + shll $8,%ecx + xorl %ecx,%edx + movl %esi,%ecx + shrl $16,%ebx + andl $255,%ebx + movzbl (%ebp,%ebx,1),%ebx + shll $16,%ebx + xorl %ebx,%edx + movl 8(%esp),%ebx + shrl $24,%eax + movzbl (%ebp,%eax,1),%eax + shll $24,%eax + xorl %eax,%edx + movl 4(%esp),%eax + leal -2048(%ebp),%ebp + addl $16,%edi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + ret +.align 64 +.LAES_Td: +.long 1353184337,1353184337 +.long 1399144830,1399144830 +.long 3282310938,3282310938 +.long 2522752826,2522752826 +.long 3412831035,3412831035 +.long 4047871263,4047871263 +.long 2874735276,2874735276 +.long 2466505547,2466505547 +.long 1442459680,1442459680 +.long 4134368941,4134368941 +.long 2440481928,2440481928 +.long 625738485,625738485 +.long 4242007375,4242007375 +.long 3620416197,3620416197 +.long 2151953702,2151953702 +.long 2409849525,2409849525 +.long 1230680542,1230680542 +.long 1729870373,1729870373 +.long 2551114309,2551114309 +.long 3787521629,3787521629 +.long 41234371,41234371 +.long 317738113,317738113 +.long 2744600205,2744600205 +.long 3338261355,3338261355 +.long 3881799427,3881799427 +.long 2510066197,2510066197 +.long 3950669247,3950669247 +.long 3663286933,3663286933 +.long 763608788,763608788 +.long 3542185048,3542185048 +.long 694804553,694804553 +.long 1154009486,1154009486 +.long 1787413109,1787413109 +.long 2021232372,2021232372 +.long 1799248025,1799248025 +.long 3715217703,3715217703 +.long 3058688446,3058688446 +.long 397248752,397248752 +.long 1722556617,1722556617 +.long 3023752829,3023752829 +.long 407560035,407560035 +.long 2184256229,2184256229 +.long 1613975959,1613975959 +.long 1165972322,1165972322 +.long 3765920945,3765920945 +.long 2226023355,2226023355 +.long 480281086,480281086 +.long 2485848313,2485848313 +.long 1483229296,1483229296 +.long 436028815,436028815 +.long 2272059028,2272059028 +.long 3086515026,3086515026 +.long 601060267,601060267 +.long 3791801202,3791801202 +.long 1468997603,1468997603 +.long 715871590,715871590 +.long 120122290,120122290 +.long 63092015,63092015 +.long 2591802758,2591802758 +.long 2768779219,2768779219 +.long 4068943920,4068943920 +.long 2997206819,2997206819 +.long 3127509762,3127509762 +.long 1552029421,1552029421 +.long 723308426,723308426 +.long 2461301159,2461301159 +.long 4042393587,4042393587 +.long 2715969870,2715969870 +.long 3455375973,3455375973 +.long 3586000134,3586000134 +.long 526529745,526529745 +.long 2331944644,2331944644 +.long 2639474228,2639474228 +.long 2689987490,2689987490 +.long 853641733,853641733 +.long 1978398372,1978398372 +.long 971801355,971801355 +.long 2867814464,2867814464 +.long 111112542,111112542 +.long 1360031421,1360031421 +.long 4186579262,4186579262 +.long 1023860118,1023860118 +.long 2919579357,2919579357 +.long 1186850381,1186850381 +.long 3045938321,3045938321 +.long 90031217,90031217 +.long 1876166148,1876166148 +.long 4279586912,4279586912 +.long 620468249,620468249 +.long 2548678102,2548678102 +.long 3426959497,3426959497 +.long 2006899047,2006899047 +.long 3175278768,3175278768 +.long 2290845959,2290845959 +.long 945494503,945494503 +.long 3689859193,3689859193 +.long 1191869601,1191869601 +.long 3910091388,3910091388 +.long 3374220536,3374220536 +.long 0,0 +.long 2206629897,2206629897 +.long 1223502642,1223502642 +.long 2893025566,2893025566 +.long 1316117100,1316117100 +.long 4227796733,4227796733 +.long 1446544655,1446544655 +.long 517320253,517320253 +.long 658058550,658058550 +.long 1691946762,1691946762 +.long 564550760,564550760 +.long 3511966619,3511966619 +.long 976107044,976107044 +.long 2976320012,2976320012 +.long 266819475,266819475 +.long 3533106868,3533106868 +.long 2660342555,2660342555 +.long 1338359936,1338359936 +.long 2720062561,2720062561 +.long 1766553434,1766553434 +.long 370807324,370807324 +.long 179999714,179999714 +.long 3844776128,3844776128 +.long 1138762300,1138762300 +.long 488053522,488053522 +.long 185403662,185403662 +.long 2915535858,2915535858 +.long 3114841645,3114841645 +.long 3366526484,3366526484 +.long 2233069911,2233069911 +.long 1275557295,1275557295 +.long 3151862254,3151862254 +.long 4250959779,4250959779 +.long 2670068215,2670068215 +.long 3170202204,3170202204 +.long 3309004356,3309004356 +.long 880737115,880737115 +.long 1982415755,1982415755 +.long 3703972811,3703972811 +.long 1761406390,1761406390 +.long 1676797112,1676797112 +.long 3403428311,3403428311 +.long 277177154,277177154 +.long 1076008723,1076008723 +.long 538035844,538035844 +.long 2099530373,2099530373 +.long 4164795346,4164795346 +.long 288553390,288553390 +.long 1839278535,1839278535 +.long 1261411869,1261411869 +.long 4080055004,4080055004 +.long 3964831245,3964831245 +.long 3504587127,3504587127 +.long 1813426987,1813426987 +.long 2579067049,2579067049 +.long 4199060497,4199060497 +.long 577038663,577038663 +.long 3297574056,3297574056 +.long 440397984,440397984 +.long 3626794326,3626794326 +.long 4019204898,4019204898 +.long 3343796615,3343796615 +.long 3251714265,3251714265 +.long 4272081548,4272081548 +.long 906744984,906744984 +.long 3481400742,3481400742 +.long 685669029,685669029 +.long 646887386,646887386 +.long 2764025151,2764025151 +.long 3835509292,3835509292 +.long 227702864,227702864 +.long 2613862250,2613862250 +.long 1648787028,1648787028 +.long 3256061430,3256061430 +.long 3904428176,3904428176 +.long 1593260334,1593260334 +.long 4121936770,4121936770 +.long 3196083615,3196083615 +.long 2090061929,2090061929 +.long 2838353263,2838353263 +.long 3004310991,3004310991 +.long 999926984,999926984 +.long 2809993232,2809993232 +.long 1852021992,1852021992 +.long 2075868123,2075868123 +.long 158869197,158869197 +.long 4095236462,4095236462 +.long 28809964,28809964 +.long 2828685187,2828685187 +.long 1701746150,1701746150 +.long 2129067946,2129067946 +.long 147831841,147831841 +.long 3873969647,3873969647 +.long 3650873274,3650873274 +.long 3459673930,3459673930 +.long 3557400554,3557400554 +.long 3598495785,3598495785 +.long 2947720241,2947720241 +.long 824393514,824393514 +.long 815048134,815048134 +.long 3227951669,3227951669 +.long 935087732,935087732 +.long 2798289660,2798289660 +.long 2966458592,2966458592 +.long 366520115,366520115 +.long 1251476721,1251476721 +.long 4158319681,4158319681 +.long 240176511,240176511 +.long 804688151,804688151 +.long 2379631990,2379631990 +.long 1303441219,1303441219 +.long 1414376140,1414376140 +.long 3741619940,3741619940 +.long 3820343710,3820343710 +.long 461924940,461924940 +.long 3089050817,3089050817 +.long 2136040774,2136040774 +.long 82468509,82468509 +.long 1563790337,1563790337 +.long 1937016826,1937016826 +.long 776014843,776014843 +.long 1511876531,1511876531 +.long 1389550482,1389550482 +.long 861278441,861278441 +.long 323475053,323475053 +.long 2355222426,2355222426 +.long 2047648055,2047648055 +.long 2383738969,2383738969 +.long 2302415851,2302415851 +.long 3995576782,3995576782 +.long 902390199,902390199 +.long 3991215329,3991215329 +.long 1018251130,1018251130 +.long 1507840668,1507840668 +.long 1064563285,1064563285 +.long 2043548696,2043548696 +.long 3208103795,3208103795 +.long 3939366739,3939366739 +.long 1537932639,1537932639 +.long 342834655,342834655 +.long 2262516856,2262516856 +.long 2180231114,2180231114 +.long 1053059257,1053059257 +.long 741614648,741614648 +.long 1598071746,1598071746 +.long 1925389590,1925389590 +.long 203809468,203809468 +.long 2336832552,2336832552 +.long 1100287487,1100287487 +.long 1895934009,1895934009 +.long 3736275976,3736275976 +.long 2632234200,2632234200 +.long 2428589668,2428589668 +.long 1636092795,1636092795 +.long 1890988757,1890988757 +.long 1952214088,1952214088 +.long 1113045200,1113045200 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.byte 82,9,106,213,48,54,165,56 +.byte 191,64,163,158,129,243,215,251 +.byte 124,227,57,130,155,47,255,135 +.byte 52,142,67,68,196,222,233,203 +.byte 84,123,148,50,166,194,35,61 +.byte 238,76,149,11,66,250,195,78 +.byte 8,46,161,102,40,217,36,178 +.byte 118,91,162,73,109,139,209,37 +.byte 114,248,246,100,134,104,152,22 +.byte 212,164,92,204,93,101,182,146 +.byte 108,112,72,80,253,237,185,218 +.byte 94,21,70,87,167,141,157,132 +.byte 144,216,171,0,140,188,211,10 +.byte 247,228,88,5,184,179,69,6 +.byte 208,44,30,143,202,63,15,2 +.byte 193,175,189,3,1,19,138,107 +.byte 58,145,17,65,79,103,220,234 +.byte 151,242,207,206,240,180,230,115 +.byte 150,172,116,34,231,173,53,133 +.byte 226,249,55,232,28,117,223,110 +.byte 71,241,26,113,29,41,197,137 +.byte 111,183,98,14,170,24,190,27 +.byte 252,86,62,75,198,210,121,32 +.byte 154,219,192,254,120,205,90,244 +.byte 31,221,168,51,136,7,199,49 +.byte 177,18,16,89,39,128,236,95 +.byte 96,81,127,169,25,181,74,13 +.byte 45,229,122,159,147,201,156,239 +.byte 160,224,59,77,174,42,245,176 +.byte 200,235,187,60,131,83,153,97 +.byte 23,43,4,126,186,119,214,38 +.byte 225,105,20,99,85,33,12,125 +.size _x86_AES_decrypt,.-_x86_AES_decrypt +.globl AES_decrypt +.type AES_decrypt,@function +.align 16 +AES_decrypt: +.L_AES_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 28(%esp),%edi + movl %esp,%eax + subl $36,%esp + andl $-64,%esp + leal -127(%edi),%ebx + subl %esp,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esp + addl $4,%esp + movl %eax,28(%esp) + call .L010pic_point +.L010pic_point: + popl %ebp + leal OPENSSL_ia32cap_P,%eax + leal .LAES_Td-.L010pic_point(%ebp),%ebp + leal 764(%esp),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + btl $25,(%eax) + jnc .L011x86 + movq (%esi),%mm0 + movq 8(%esi),%mm4 + call _sse_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 16 +.L011x86: + movl %ebp,24(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + call _x86_AES_decrypt_compact + movl 28(%esp),%esp + movl 24(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_decrypt,.-.L_AES_decrypt_begin +.globl AES_cbc_encrypt +.type AES_cbc_encrypt,@function +.align 16 +AES_cbc_encrypt: +.L_AES_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%ecx + cmpl $0,%ecx + je .L012drop_out + call .L013pic_point +.L013pic_point: + popl %ebp + leal OPENSSL_ia32cap_P,%eax + cmpl $0,40(%esp) + leal .LAES_Te-.L013pic_point(%ebp),%ebp + jne .L014picked_te + leal .LAES_Td-.LAES_Te(%ebp),%ebp +.L014picked_te: + pushfl + cld + cmpl $512,%ecx + jb .L015slow_way + testl $15,%ecx + jnz .L015slow_way + btl $28,(%eax) + jc .L015slow_way + leal -324(%esp),%esi + andl $-64,%esi + movl %ebp,%eax + leal 2304(%ebp),%ebx + movl %esi,%edx + andl $4095,%eax + andl $4095,%ebx + andl $4095,%edx + cmpl %ebx,%edx + jb .L016tbl_break_out + subl %ebx,%edx + subl %edx,%esi + jmp .L017tbl_ok +.align 4 +.L016tbl_break_out: + subl %eax,%edx + andl $4095,%edx + addl $384,%edx + subl %edx,%esi +.align 4 +.L017tbl_ok: + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 12(%edx),%edi + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl $0,316(%esp) + movl %edi,%ebx + movl $61,%ecx + subl %ebp,%ebx + movl %edi,%esi + andl $4095,%ebx + leal 76(%esp),%edi + cmpl $2304,%ebx + jb .L018do_copy + cmpl $3852,%ebx + jb .L019skip_copy +.align 4 +.L018do_copy: + movl %edi,44(%esp) +.long 2784229001 +.L019skip_copy: + movl $16,%edi +.align 4 +.L020prefetch_tbl: + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%esi + leal 128(%ebp),%ebp + subl $1,%edi + jnz .L020prefetch_tbl + subl $2048,%ebp + movl 32(%esp),%esi + movl 48(%esp),%edi + cmpl $0,%edx + je .L021fast_decrypt + movl (%edi),%eax + movl 4(%edi),%ebx +.align 16 +.L022fast_enc_loop: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_encrypt + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + leal 16(%esi),%esi + movl 40(%esp),%ecx + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L022fast_enc_loop + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + cmpl $0,316(%esp) + movl 44(%esp),%edi + je .L023skip_ezero + movl $60,%ecx + xorl %eax,%eax +.align 4 +.long 2884892297 +.L023skip_ezero: + movl 28(%esp),%esp + popfl +.L012drop_out: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L021fast_decrypt: + cmpl 36(%esp),%esi + je .L024fast_dec_in_place + movl %edi,52(%esp) +.align 4 +.align 16 +.L025fast_dec_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_decrypt + movl 52(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl 36(%esp),%edi + movl 32(%esp),%esi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl %esi,52(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edi + movl %edi,36(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L025fast_dec_loop + movl 52(%esp),%edi + movl 48(%esp),%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + jmp .L026fast_dec_out +.align 16 +.L024fast_dec_in_place: +.L027fast_dec_in_place_loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call _x86_AES_decrypt + movl 48(%esp),%edi + movl 36(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + subl $16,%ecx + movl %ecx,40(%esp) + jnz .L027fast_dec_in_place_loop +.align 4 +.L026fast_dec_out: + cmpl $0,316(%esp) + movl 44(%esp),%edi + je .L028skip_dzero + movl $60,%ecx + xorl %eax,%eax +.align 4 +.long 2884892297 +.L028skip_dzero: + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L015slow_way: + movl (%eax),%eax + movl 36(%esp),%edi + leal -80(%esp),%esi + andl $-64,%esi + leal -143(%edi),%ebx + subl %esi,%ebx + negl %ebx + andl $960,%ebx + subl %ebx,%esi + leal 768(%esi),%ebx + subl %ebp,%ebx + andl $768,%ebx + leal 2176(%ebp,%ebx,1),%ebp + leal 24(%esp),%edx + xchgl %esi,%esp + addl $4,%esp + movl %ebp,24(%esp) + movl %esi,28(%esp) + movl %eax,52(%esp) + movl (%edx),%eax + movl 4(%edx),%ebx + movl 16(%edx),%esi + movl 20(%edx),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edi,44(%esp) + movl %esi,48(%esp) + movl %esi,%edi + movl %eax,%esi + cmpl $0,%edx + je .L029slow_decrypt + cmpl $16,%ecx + movl %ebx,%edx + jb .L030slow_enc_tail + btl $25,52(%esp) + jnc .L031slow_enc_x86 + movq (%edi),%mm0 + movq 8(%edi),%mm4 +.align 16 +.L032slow_enc_loop_sse: + pxor (%esi),%mm0 + pxor 8(%esi),%mm4 + movl 44(%esp),%edi + call _sse_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl 40(%esp),%ecx + movq %mm0,(%edi) + movq %mm4,8(%edi) + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae .L032slow_enc_loop_sse + testl $15,%ecx + jnz .L030slow_enc_tail + movl 48(%esp),%esi + movq %mm0,(%esi) + movq %mm4,8(%esi) + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L031slow_enc_x86: + movl (%edi),%eax + movl 4(%edi),%ebx +.align 4 +.L033slow_enc_loop_x86: + movl 8(%edi),%ecx + movl 12(%edi),%edx + xorl (%esi),%eax + xorl 4(%esi),%ebx + xorl 8(%esi),%ecx + xorl 12(%esi),%edx + movl 44(%esp),%edi + call _x86_AES_encrypt_compact + movl 32(%esp),%esi + movl 36(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + leal 16(%esi),%esi + movl %esi,32(%esp) + leal 16(%edi),%edx + movl %edx,36(%esp) + subl $16,%ecx + cmpl $16,%ecx + movl %ecx,40(%esp) + jae .L033slow_enc_loop_x86 + testl $15,%ecx + jnz .L030slow_enc_tail + movl 48(%esp),%esi + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L030slow_enc_tail: + emms + movl %edx,%edi + movl $16,%ebx + subl %ecx,%ebx + cmpl %esi,%edi + je .L034enc_in_place +.align 4 +.long 2767451785 + jmp .L035enc_skip_in_place +.L034enc_in_place: + leal (%edi,%ecx,1),%edi +.L035enc_skip_in_place: + movl %ebx,%ecx + xorl %eax,%eax +.align 4 +.long 2868115081 + movl 48(%esp),%edi + movl %edx,%esi + movl (%edi),%eax + movl 4(%edi),%ebx + movl $16,40(%esp) + jmp .L033slow_enc_loop_x86 +.align 16 +.L029slow_decrypt: + btl $25,52(%esp) + jnc .L036slow_dec_loop_x86 +.align 4 +.L037slow_dec_loop_sse: + movq (%esi),%mm0 + movq 8(%esi),%mm4 + movl 44(%esp),%edi + call _sse_AES_decrypt_compact + movl 32(%esp),%esi + leal 60(%esp),%eax + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl 48(%esp),%edi + movq (%esi),%mm1 + movq 8(%esi),%mm5 + pxor (%edi),%mm0 + pxor 8(%edi),%mm4 + movq %mm1,(%edi) + movq %mm5,8(%edi) + subl $16,%ecx + jc .L038slow_dec_partial_sse + movq %mm0,(%ebx) + movq %mm4,8(%ebx) + leal 16(%ebx),%ebx + movl %ebx,36(%esp) + leal 16(%esi),%esi + movl %esi,32(%esp) + movl %ecx,40(%esp) + jnz .L037slow_dec_loop_sse + emms + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L038slow_dec_partial_sse: + movq %mm0,(%eax) + movq %mm4,8(%eax) + emms + addl $16,%ecx + movl %ebx,%edi + movl %eax,%esi +.align 4 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L036slow_dec_loop_x86: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + leal 60(%esp),%edi + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 44(%esp),%edi + call _x86_AES_decrypt_compact + movl 48(%esp),%edi + movl 40(%esp),%esi + xorl (%edi),%eax + xorl 4(%edi),%ebx + xorl 8(%edi),%ecx + xorl 12(%edi),%edx + subl $16,%esi + jc .L039slow_dec_partial_x86 + movl %esi,40(%esp) + movl 36(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + leal 16(%esi),%esi + movl %esi,36(%esp) + leal 60(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 32(%esp),%esi + leal 16(%esi),%esi + movl %esi,32(%esp) + jnz .L036slow_dec_loop_x86 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret + pushfl +.align 16 +.L039slow_dec_partial_x86: + leal 60(%esp),%esi + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 32(%esp),%esi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 40(%esp),%ecx + movl 36(%esp),%edi + leal 60(%esp),%esi +.align 4 +.long 2767451785 + movl 28(%esp),%esp + popfl + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_cbc_encrypt,.-.L_AES_cbc_encrypt_begin +.type _x86_AES_set_encrypt_key,@function +.align 16 +_x86_AES_set_encrypt_key: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 24(%esp),%esi + movl 32(%esp),%edi + testl $-1,%esi + jz .L040badpointer + testl $-1,%edi + jz .L040badpointer + call .L041pic_point +.L041pic_point: + popl %ebp + leal .LAES_Te-.L041pic_point(%ebp),%ebp + leal 2176(%ebp),%ebp + movl -128(%ebp),%eax + movl -96(%ebp),%ebx + movl -64(%ebp),%ecx + movl -32(%ebp),%edx + movl (%ebp),%eax + movl 32(%ebp),%ebx + movl 64(%ebp),%ecx + movl 96(%ebp),%edx + movl 28(%esp),%ecx + cmpl $128,%ecx + je .L04210rounds + cmpl $192,%ecx + je .L04312rounds + cmpl $256,%ecx + je .L04414rounds + movl $-2,%eax + jmp .L045exit +.L04210rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + xorl %ecx,%ecx + jmp .L04610shortcut +.align 4 +.L04710loop: + movl (%edi),%eax + movl 12(%edi),%edx +.L04610shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,16(%edi) + xorl 4(%edi),%eax + movl %eax,20(%edi) + xorl 8(%edi),%eax + movl %eax,24(%edi) + xorl 12(%edi),%eax + movl %eax,28(%edi) + incl %ecx + addl $16,%edi + cmpl $10,%ecx + jl .L04710loop + movl $10,80(%edi) + xorl %eax,%eax + jmp .L045exit +.L04312rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%ecx + movl 20(%esi),%edx + movl %ecx,16(%edi) + movl %edx,20(%edi) + xorl %ecx,%ecx + jmp .L04812shortcut +.align 4 +.L04912loop: + movl (%edi),%eax + movl 20(%edi),%edx +.L04812shortcut: + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,24(%edi) + xorl 4(%edi),%eax + movl %eax,28(%edi) + xorl 8(%edi),%eax + movl %eax,32(%edi) + xorl 12(%edi),%eax + movl %eax,36(%edi) + cmpl $7,%ecx + je .L05012break + incl %ecx + xorl 16(%edi),%eax + movl %eax,40(%edi) + xorl 20(%edi),%eax + movl %eax,44(%edi) + addl $24,%edi + jmp .L04912loop +.L05012break: + movl $12,72(%edi) + xorl %eax,%eax + jmp .L045exit +.L04414rounds: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,8(%edi) + movl %edx,12(%edi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + movl %eax,16(%edi) + movl %ebx,20(%edi) + movl %ecx,24(%edi) + movl %edx,28(%edi) + xorl %ecx,%ecx + jmp .L05114shortcut +.align 4 +.L05214loop: + movl 28(%edi),%edx +.L05114shortcut: + movl (%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $24,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $8,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $16,%ebx + xorl %ebx,%eax + xorl 896(%ebp,%ecx,4),%eax + movl %eax,32(%edi) + xorl 4(%edi),%eax + movl %eax,36(%edi) + xorl 8(%edi),%eax + movl %eax,40(%edi) + xorl 12(%edi),%eax + movl %eax,44(%edi) + cmpl $6,%ecx + je .L05314break + incl %ecx + movl %eax,%edx + movl 16(%edi),%eax + movzbl %dl,%esi + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shrl $16,%edx + shll $8,%ebx + movzbl %dl,%esi + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + movzbl %dh,%esi + shll $16,%ebx + xorl %ebx,%eax + movzbl -128(%ebp,%esi,1),%ebx + shll $24,%ebx + xorl %ebx,%eax + movl %eax,48(%edi) + xorl 20(%edi),%eax + movl %eax,52(%edi) + xorl 24(%edi),%eax + movl %eax,56(%edi) + xorl 28(%edi),%eax + movl %eax,60(%edi) + addl $32,%edi + jmp .L05214loop +.L05314break: + movl $14,48(%edi) + xorl %eax,%eax + jmp .L045exit +.L040badpointer: + movl $-1,%eax +.L045exit: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size _x86_AES_set_encrypt_key,.-_x86_AES_set_encrypt_key +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,@function +.align 16 +AES_set_encrypt_key: +.L_AES_set_encrypt_key_begin: + call _x86_AES_set_encrypt_key + ret +.size AES_set_encrypt_key,.-.L_AES_set_encrypt_key_begin +.globl AES_set_decrypt_key +.type AES_set_decrypt_key,@function +.align 16 +AES_set_decrypt_key: +.L_AES_set_decrypt_key_begin: + call _x86_AES_set_encrypt_key + cmpl $0,%eax + je .L054proceed + ret +.L054proceed: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 28(%esp),%esi + movl 240(%esi),%ecx + leal (,%ecx,4),%ecx + leal (%esi,%ecx,4),%edi +.align 4 +.L055invert: + movl (%esi),%eax + movl 4(%esi),%ebx + movl (%edi),%ecx + movl 4(%edi),%edx + movl %eax,(%edi) + movl %ebx,4(%edi) + movl %ecx,(%esi) + movl %edx,4(%esi) + movl 8(%esi),%eax + movl 12(%esi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + movl %eax,8(%edi) + movl %ebx,12(%edi) + movl %ecx,8(%esi) + movl %edx,12(%esi) + addl $16,%esi + subl $16,%edi + cmpl %edi,%esi + jne .L055invert + movl 28(%esp),%edi + movl 240(%edi),%esi + leal -2(%esi,%esi,1),%esi + leal (%edi,%esi,8),%esi + movl %esi,28(%esp) + movl 16(%edi),%eax +.align 4 +.L056permute: + addl $16,%edi + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %eax,%ebx + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + xorl %eax,%ecx + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + roll $8,%eax + xorl %esi,%edx + movl 4(%edi),%ebp + xorl %ebx,%eax + xorl %edx,%ebx + xorl %ecx,%eax + roll $24,%ebx + xorl %edx,%ecx + xorl %edx,%eax + roll $16,%ecx + xorl %ebx,%eax + roll $8,%edx + xorl %ecx,%eax + movl %ebp,%ebx + xorl %edx,%eax + movl %eax,(%edi) + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + xorl %esi,%ecx + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %ebx,%ecx + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + xorl %ebx,%edx + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + roll $8,%ebx + xorl %esi,%eax + movl 8(%edi),%ebp + xorl %ecx,%ebx + xorl %eax,%ecx + xorl %edx,%ebx + roll $24,%ecx + xorl %eax,%edx + xorl %eax,%ebx + roll $16,%edx + xorl %ecx,%ebx + roll $8,%eax + xorl %edx,%ebx + movl %ebp,%ecx + xorl %eax,%ebx + movl %ebx,4(%edi) + movl $2155905152,%ebp + andl %ecx,%ebp + leal (%ecx,%ecx,1),%edx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%edx + andl $454761243,%esi + xorl %esi,%edx + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %ecx,%edx + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + xorl %ecx,%eax + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + roll $8,%ecx + xorl %esi,%ebx + movl 12(%edi),%ebp + xorl %edx,%ecx + xorl %ebx,%edx + xorl %eax,%ecx + roll $24,%edx + xorl %ebx,%eax + xorl %ebx,%ecx + roll $16,%eax + xorl %edx,%ecx + roll $8,%ebx + xorl %eax,%ecx + movl %ebp,%edx + xorl %ebx,%ecx + movl %ecx,8(%edi) + movl $2155905152,%ebp + andl %edx,%ebp + leal (%edx,%edx,1),%eax + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%eax + andl $454761243,%esi + xorl %esi,%eax + movl $2155905152,%ebp + andl %eax,%ebp + leal (%eax,%eax,1),%ebx + movl %ebp,%esi + shrl $7,%ebp + subl %ebp,%esi + andl $4278124286,%ebx + andl $454761243,%esi + xorl %edx,%eax + xorl %esi,%ebx + movl $2155905152,%ebp + andl %ebx,%ebp + leal (%ebx,%ebx,1),%ecx + movl %ebp,%esi + shrl $7,%ebp + xorl %edx,%ebx + subl %ebp,%esi + andl $4278124286,%ecx + andl $454761243,%esi + roll $8,%edx + xorl %esi,%ecx + movl 16(%edi),%ebp + xorl %eax,%edx + xorl %ecx,%eax + xorl %ebx,%edx + roll $24,%eax + xorl %ecx,%ebx + xorl %ecx,%edx + roll $16,%ebx + xorl %eax,%edx + roll $8,%ecx + xorl %ebx,%edx + movl %ebp,%eax + xorl %ecx,%edx + movl %edx,12(%edi) + cmpl 28(%esp),%edi + jb .L056permute + xorl %eax,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size AES_set_decrypt_key,.-.L_AES_set_decrypt_key_begin +.byte 65,69,83,32,102,111,114,32,120,56,54,44,32,67,82,89 +.byte 80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114 +.byte 111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.comm OPENSSL_ia32cap_P,16,4