11 #if defined(__x86_64__) || defined(__amd64__)    15 void Transform(uint32_t* s, 
const unsigned char* chunk, 
size_t blocks)
    17     static const uint32_t K256 
alignas(16) [] = {
    18         0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
    19         0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
    20         0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
    21         0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
    22         0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
    23         0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
    24         0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
    25         0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
    26         0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
    27         0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
    28         0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
    29         0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
    30         0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
    31         0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
    32         0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
    33         0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
    35     static const uint32_t FLIP_MASK 
alignas(16) [] = {0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f};
    36     static const uint32_t SHUF_00BA 
alignas(16) [] = {0x03020100, 0x0b0a0908, 0xffffffff, 0xffffffff};
    37     static const uint32_t SHUF_DC00 
alignas(16) [] = {0xffffffff, 0xffffffff, 0x03020100, 0x0b0a0908};
    38     uint32_t a, b, c, d, f, g, h, y0, y1, y2;
    40     uint64_t inp_end, inp;
    41     uint32_t xfer 
alignas(16) [4];
    63         "pshufb %%xmm12,%%xmm4;"    64         "movdqu 0x10(%1),%%xmm5;"    65         "pshufb %%xmm12,%%xmm5;"    66         "movdqu 0x20(%1),%%xmm6;"    67         "pshufb %%xmm12,%%xmm6;"    68         "movdqu 0x30(%1),%%xmm7;"    69         "pshufb %%xmm12,%%xmm7;"    74         "movdqa 0x0(%13),%%xmm9;"    75         "paddd  %%xmm4,%%xmm9;"    77         "movdqa %%xmm7,%%xmm0;"    81         "palignr $0x4,%%xmm6,%%xmm0;"    86         "movdqa %%xmm5,%%xmm1;"    89         "paddd  %%xmm4,%%xmm0;"    93         "palignr $0x4,%%xmm4,%%xmm1;"    97         "movdqa %%xmm1,%%xmm2;"   101         "movdqa %%xmm1,%%xmm3;"   105         "pslld  $0x19,%%xmm1;"   115         "movdqa %%xmm3,%%xmm2;"   118         "movdqa %%xmm3,%%xmm8;"   127         "psrld  $0x12,%%xmm2;"   132         "pxor   %%xmm3,%%xmm1;"   139         "pxor   %%xmm2,%%xmm1;"   143         "pxor   %%xmm8,%%xmm1;"   147         "pshufd $0xfa,%%xmm7,%%xmm2;"   150         "paddd  %%xmm1,%%xmm0;"   153         "movdqa %%xmm2,%%xmm3;"   157         "movdqa %%xmm2,%%xmm8;"   163         "psrlq  $0x11,%%xmm2;"   165         "psrlq  $0x13,%%xmm3;"   173         "pxor   %%xmm3,%%xmm2;"   177         "pxor   %%xmm2,%%xmm8;"   181         "pshufb %%xmm10,%%xmm8;"   185         "paddd  %%xmm8,%%xmm0;"   188         "pshufd $0x50,%%xmm0,%%xmm2;"   191         "movdqa %%xmm2,%%xmm3;"   195         "movdqa %%xmm2,%%xmm4;"   200         "psrlq  $0x11,%%xmm2;"   203         "psrlq  $0x13,%%xmm3;"   211         "pxor   %%xmm3,%%xmm2;"   215         "pxor   %%xmm2,%%xmm4;"   219         "pshufb %%xmm11,%%xmm4;"   223         "paddd  %%xmm0,%%xmm4;"   228         "movdqa 0x10(%13),%%xmm9;"   229         "paddd  %%xmm5,%%xmm9;"   231         "movdqa %%xmm4,%%xmm0;"   235         "palignr $0x4,%%xmm7,%%xmm0;"   240         "movdqa %%xmm6,%%xmm1;"   243         "paddd  %%xmm5,%%xmm0;"   247         "palignr $0x4,%%xmm5,%%xmm1;"   251         "movdqa %%xmm1,%%xmm2;"   255         "movdqa %%xmm1,%%xmm3;"   259         "pslld  $0x19,%%xmm1;"   269         "movdqa %%xmm3,%%xmm2;"   272         "movdqa %%xmm3,%%xmm8;"   281         "psrld  $0x12,%%xmm2;"   286         "pxor   %%xmm3,%%xmm1;"   293         "pxor   %%xmm2,%%xmm1;"   297         "pxor   %%xmm8,%%xmm1;"   301         "pshufd $0xfa,%%xmm4,%%xmm2;"   304         "paddd  %%xmm1,%%xmm0;"   307         "movdqa %%xmm2,%%xmm3;"   311         "movdqa %%xmm2,%%xmm8;"   317         "psrlq  $0x11,%%xmm2;"   319         "psrlq  $0x13,%%xmm3;"   327         "pxor   %%xmm3,%%xmm2;"   331         "pxor   %%xmm2,%%xmm8;"   335         "pshufb %%xmm10,%%xmm8;"   339         "paddd  %%xmm8,%%xmm0;"   342         "pshufd $0x50,%%xmm0,%%xmm2;"   345         "movdqa %%xmm2,%%xmm3;"   349         "movdqa %%xmm2,%%xmm5;"   354         "psrlq  $0x11,%%xmm2;"   357         "psrlq  $0x13,%%xmm3;"   365         "pxor   %%xmm3,%%xmm2;"   369         "pxor   %%xmm2,%%xmm5;"   373         "pshufb %%xmm11,%%xmm5;"   377         "paddd  %%xmm0,%%xmm5;"   382         "movdqa 0x20(%13),%%xmm9;"   383         "paddd  %%xmm6,%%xmm9;"   385         "movdqa %%xmm5,%%xmm0;"   389         "palignr $0x4,%%xmm4,%%xmm0;"   394         "movdqa %%xmm7,%%xmm1;"   397         "paddd  %%xmm6,%%xmm0;"   401         "palignr $0x4,%%xmm6,%%xmm1;"   405         "movdqa %%xmm1,%%xmm2;"   409         "movdqa %%xmm1,%%xmm3;"   413         "pslld  $0x19,%%xmm1;"   423         "movdqa %%xmm3,%%xmm2;"   426         "movdqa %%xmm3,%%xmm8;"   435         "psrld  $0x12,%%xmm2;"   440         "pxor   %%xmm3,%%xmm1;"   447         "pxor   %%xmm2,%%xmm1;"   451         "pxor   %%xmm8,%%xmm1;"   455         "pshufd $0xfa,%%xmm5,%%xmm2;"   458         "paddd  %%xmm1,%%xmm0;"   461         "movdqa %%xmm2,%%xmm3;"   465         "movdqa %%xmm2,%%xmm8;"   471         "psrlq  $0x11,%%xmm2;"   473         "psrlq  $0x13,%%xmm3;"   481         "pxor   %%xmm3,%%xmm2;"   485         "pxor   %%xmm2,%%xmm8;"   489         "pshufb %%xmm10,%%xmm8;"   493         "paddd  %%xmm8,%%xmm0;"   496         "pshufd $0x50,%%xmm0,%%xmm2;"   499         "movdqa %%xmm2,%%xmm3;"   503         "movdqa %%xmm2,%%xmm6;"   508         "psrlq  $0x11,%%xmm2;"   511         "psrlq  $0x13,%%xmm3;"   519         "pxor   %%xmm3,%%xmm2;"   523         "pxor   %%xmm2,%%xmm6;"   527         "pshufb %%xmm11,%%xmm6;"   531         "paddd  %%xmm0,%%xmm6;"   536         "movdqa 0x30(%13),%%xmm9;"   537         "paddd  %%xmm7,%%xmm9;"   540         "movdqa %%xmm6,%%xmm0;"   544         "palignr $0x4,%%xmm5,%%xmm0;"   549         "movdqa %%xmm4,%%xmm1;"   552         "paddd  %%xmm7,%%xmm0;"   556         "palignr $0x4,%%xmm7,%%xmm1;"   560         "movdqa %%xmm1,%%xmm2;"   564         "movdqa %%xmm1,%%xmm3;"   568         "pslld  $0x19,%%xmm1;"   578         "movdqa %%xmm3,%%xmm2;"   581         "movdqa %%xmm3,%%xmm8;"   590         "psrld  $0x12,%%xmm2;"   595         "pxor   %%xmm3,%%xmm1;"   602         "pxor   %%xmm2,%%xmm1;"   606         "pxor   %%xmm8,%%xmm1;"   610         "pshufd $0xfa,%%xmm6,%%xmm2;"   613         "paddd  %%xmm1,%%xmm0;"   616         "movdqa %%xmm2,%%xmm3;"   620         "movdqa %%xmm2,%%xmm8;"   626         "psrlq  $0x11,%%xmm2;"   628         "psrlq  $0x13,%%xmm3;"   636         "pxor   %%xmm3,%%xmm2;"   640         "pxor   %%xmm2,%%xmm8;"   644         "pshufb %%xmm10,%%xmm8;"   648         "paddd  %%xmm8,%%xmm0;"   651         "pshufd $0x50,%%xmm0,%%xmm2;"   654         "movdqa %%xmm2,%%xmm3;"   658         "movdqa %%xmm2,%%xmm7;"   663         "psrlq  $0x11,%%xmm2;"   666         "psrlq  $0x13,%%xmm3;"   674         "pxor   %%xmm3,%%xmm2;"   678         "pxor   %%xmm2,%%xmm7;"   682         "pshufb %%xmm11,%%xmm7;"   686         "paddd  %%xmm0,%%xmm7;"   696         "paddd  0x0(%13),%%xmm4;"   810         "paddd  0x10(%13),%%xmm5;"   925         "movdqa %%xmm6,%%xmm4;"   926         "movdqa %%xmm7,%%xmm5;"   952         : 
"+r"(s), 
"+r"(chunk), 
"+r"(blocks), 
"=r"(a), 
"=r"(b), 
"=r"(c), 
"=r"(d),  
"=r"(f), 
"=r"(g), 
"=r"(h), 
"=r"(y0), 
"=r"(y1), 
"=r"(y2), 
"=r"(tbl), 
"+m"(inp_end), 
"+m"(inp), 
"+m"(xfer)
   953         : 
"m"(K256), 
"m"(FLIP_MASK), 
"m"(SHUF_00BA), 
"m"(SHUF_DC00)
   954         : 
"cc", 
"memory", 
"xmm0", 
"xmm1", 
"xmm2", 
"xmm3", 
"xmm4", 
"xmm5", 
"xmm6", 
"xmm7", 
"xmm8", 
"xmm9", 
"xmm10", 
"xmm11", 
"xmm12" void Transform(uint32_t *s, const unsigned char *chunk, size_t blocks)