Text file src/crypto/internal/fips140/sha512/sha512block_loong64.s

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  #include "textflag.h"
     8  
     9  // SHA512 block routine. See sha512block.go for Go equivalent.
    10  
    11  #define REGTMP	R30
    12  #define REGTMP1	R16
    13  #define REGTMP2	R17
    14  #define REGTMP3	R18
    15  #define REGTMP4	R7
    16  #define REGTMP5	R6
    17  #define REG_KT	R19
    18  
    19  // W[i] = M[i]; for 0 <= i <= 15
    20  #define LOAD0(index) \
    21  	MOVV	(index*8)(R5), REGTMP4; \
    22  	REVBV	REGTMP4, REGTMP4; \
    23  	MOVV	REGTMP4, (index*8)(R3)
    24  
    25  // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 79
    26  //   SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
    27  //   SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
    28  #define LOAD1(index) \
    29  	MOVV	(((index-2)&0xf)*8)(R3), REGTMP4; \
    30  	MOVV	(((index-15)&0xf)*8)(R3), REGTMP1; \
    31  	MOVV	(((index-7)&0xf)*8)(R3), REGTMP; \
    32  	MOVV	REGTMP4, REGTMP2; \
    33  	MOVV	REGTMP4, REGTMP3; \
    34  	ROTRV	$19, REGTMP4; \
    35  	ROTRV	$61, REGTMP2; \
    36  	SRLV	$6, REGTMP3; \
    37  	XOR	REGTMP2, REGTMP4; \
    38  	XOR	REGTMP3, REGTMP4; \
    39  	ROTRV	$1, REGTMP1, REGTMP5; \
    40  	SRLV	$7, REGTMP1, REGTMP2; \
    41  	ROTRV	$8, REGTMP1; \
    42  	ADDV	REGTMP, REGTMP4; \
    43  	MOVV	(((index-16)&0xf)*8)(R3), REGTMP; \
    44  	XOR	REGTMP1, REGTMP5; \
    45  	XOR	REGTMP2, REGTMP5; \
    46  	ADDV	REGTMP, REGTMP5; \
    47  	ADDV	REGTMP5, REGTMP4; \
    48  	MOVV	REGTMP4, ((index&0xf)*8)(R3)
    49  
    50  // h is also used as an accumulator. Wt is passed in REGTMP4.
    51  //   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
    52  //     BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
    53  //     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    54  //                 = ((y XOR z) AND x) XOR z
    55  // Calculate T1 in REGTMP4
    56  #define SHA512T1(index, e, f, g, h) \
    57  	MOVV	(index*8)(REG_KT), REGTMP5; \
    58  	ADDV	REGTMP5, h; \
    59  	ADDV	REGTMP4, h; \
    60  	ROTRV	$14, e, REGTMP5; \
    61  	ROTRV	$18, e, REGTMP; \
    62  	ROTRV	$41, e, REGTMP3; \
    63  	XOR	f, g, REGTMP2; \
    64  	XOR	REGTMP, REGTMP5; \
    65  	AND	e, REGTMP2; \
    66  	XOR	REGTMP5, REGTMP3; \
    67  	XOR	g, REGTMP2; \
    68  	ADDV	REGTMP3, h; \
    69  	ADDV	h, REGTMP2, REGTMP4
    70  
    71  // T2 = BIGSIGMA0(a) + Maj(a, b, c)
    72  // BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
    73  // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
    74  //              = ((y XOR z) AND x) XOR (y AND z)
    75  // Calculate T2 in REGTMP1
    76  #define SHA512T2(a, b, c) \
    77  	ROTRV	$28, a, REGTMP5; \
    78  	ROTRV	$34, a, REGTMP3; \
    79  	ROTRV	$39, a, REGTMP2; \
    80  	XOR	b, c, REGTMP; \
    81  	AND	b, c, REGTMP1; \
    82  	XOR	REGTMP3, REGTMP5; \
    83  	AND	REGTMP, a, REGTMP; \
    84  	XOR	REGTMP2, REGTMP5; \
    85  	XOR	REGTMP, REGTMP1; \
    86  	ADDV	REGTMP5, REGTMP1
    87  
    88  // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
    89  // The values for e and a are stored in d and h, ready for rotation.
    90  #define SHA512ROUND(index, a, b, c, d, e, f, g, h) \
    91  	SHA512T1(index, e, f, g, h); \
    92  	SHA512T2(a, b, c); \
    93  	ADDV	REGTMP4, d; \
    94  	ADDV	REGTMP1, REGTMP4, h
    95  
    96  #define SHA512ROUND0(index, a, b, c, d, e, f, g, h) \
    97  	LOAD0(index); \
    98  	SHA512ROUND(index, a, b, c, d, e, f, g, h)
    99  
   100  #define SHA512ROUND1(index, a, b, c, d, e, f, g, h) \
   101  	LOAD1(index); \
   102  	SHA512ROUND(index, a, b, c, d, e, f, g, h)
   103  
   104  // A stack frame size of 128 bytes is required here, because
   105  // the frame size used for data expansion is 128 bytes.
   106  // See the definition of the macro LOAD1 above (8 bytes * 16 entries).
   107  //
   108  // func block(dig *Digest, p []byte)
   109  TEXT ·block(SB),NOSPLIT,$128-32
   110  	MOVV	p_len+16(FP), R6
   111  	MOVV	p_base+8(FP), R5
   112  	AND	$~127, R6
   113  	BEQ	R6, end
   114  
   115  	MOVV	$·_K(SB), REG_KT		// const table
   116  
   117  	// p_len >= 128
   118  	MOVV	dig+0(FP), R4
   119  	ADDV	R5, R6, R25
   120  	MOVV	(0*8)(R4), R8	// a = H0
   121  	MOVV	(1*8)(R4), R9	// b = H1
   122  	MOVV	(2*8)(R4), R10	// c = H2
   123  	MOVV	(3*8)(R4), R11	// d = H3
   124  	MOVV	(4*8)(R4), R12	// e = H4
   125  	MOVV	(5*8)(R4), R13	// f = H5
   126  	MOVV	(6*8)(R4), R14	// g = H6
   127  	MOVV	(7*8)(R4), R15	// h = H7
   128  
   129  loop:
   130  	SHA512ROUND0( 0, R8,  R9,  R10, R11, R12, R13, R14, R15)
   131  	SHA512ROUND0( 1, R15, R8,  R9,  R10, R11, R12, R13, R14)
   132  	SHA512ROUND0( 2, R14, R15, R8,  R9,  R10, R11, R12, R13)
   133  	SHA512ROUND0( 3, R13, R14, R15, R8,  R9,  R10, R11, R12)
   134  	SHA512ROUND0( 4, R12, R13, R14, R15, R8,  R9,  R10, R11)
   135  	SHA512ROUND0( 5, R11, R12, R13, R14, R15, R8,  R9,  R10)
   136  	SHA512ROUND0( 6, R10, R11, R12, R13, R14, R15, R8,  R9)
   137  	SHA512ROUND0( 7, R9,  R10, R11, R12, R13, R14, R15, R8)
   138  	SHA512ROUND0( 8, R8,  R9,  R10, R11, R12, R13, R14, R15)
   139  	SHA512ROUND0( 9, R15, R8,  R9,  R10, R11, R12, R13, R14)
   140  	SHA512ROUND0(10, R14, R15, R8,  R9,  R10, R11, R12, R13)
   141  	SHA512ROUND0(11, R13, R14, R15, R8,  R9,  R10, R11, R12)
   142  	SHA512ROUND0(12, R12, R13, R14, R15, R8,  R9,  R10, R11)
   143  	SHA512ROUND0(13, R11, R12, R13, R14, R15, R8,  R9,  R10)
   144  	SHA512ROUND0(14, R10, R11, R12, R13, R14, R15, R8,  R9)
   145  	SHA512ROUND0(15, R9,  R10, R11, R12, R13, R14, R15, R8)
   146  
   147  	SHA512ROUND1(16, R8,  R9,  R10, R11, R12, R13, R14, R15)
   148  	SHA512ROUND1(17, R15, R8,  R9,  R10, R11, R12, R13, R14)
   149  	SHA512ROUND1(18, R14, R15, R8,  R9,  R10, R11, R12, R13)
   150  	SHA512ROUND1(19, R13, R14, R15, R8,  R9,  R10, R11, R12)
   151  	SHA512ROUND1(20, R12, R13, R14, R15, R8,  R9,  R10, R11)
   152  	SHA512ROUND1(21, R11, R12, R13, R14, R15, R8,  R9,  R10)
   153  	SHA512ROUND1(22, R10, R11, R12, R13, R14, R15, R8,  R9)
   154  	SHA512ROUND1(23, R9,  R10, R11, R12, R13, R14, R15, R8)
   155  	SHA512ROUND1(24, R8,  R9,  R10, R11, R12, R13, R14, R15)
   156  	SHA512ROUND1(25, R15, R8,  R9,  R10, R11, R12, R13, R14)
   157  	SHA512ROUND1(26, R14, R15, R8,  R9,  R10, R11, R12, R13)
   158  	SHA512ROUND1(27, R13, R14, R15, R8,  R9,  R10, R11, R12)
   159  	SHA512ROUND1(28, R12, R13, R14, R15, R8,  R9,  R10, R11)
   160  	SHA512ROUND1(29, R11, R12, R13, R14, R15, R8,  R9,  R10)
   161  	SHA512ROUND1(30, R10, R11, R12, R13, R14, R15, R8,  R9)
   162  	SHA512ROUND1(31, R9,  R10, R11, R12, R13, R14, R15, R8)
   163  	SHA512ROUND1(32, R8,  R9,  R10, R11, R12, R13, R14, R15)
   164  	SHA512ROUND1(33, R15, R8,  R9,  R10, R11, R12, R13, R14)
   165  	SHA512ROUND1(34, R14, R15, R8,  R9,  R10, R11, R12, R13)
   166  	SHA512ROUND1(35, R13, R14, R15, R8,  R9,  R10, R11, R12)
   167  	SHA512ROUND1(36, R12, R13, R14, R15, R8,  R9,  R10, R11)
   168  	SHA512ROUND1(37, R11, R12, R13, R14, R15, R8,  R9,  R10)
   169  	SHA512ROUND1(38, R10, R11, R12, R13, R14, R15, R8,  R9)
   170  	SHA512ROUND1(39, R9,  R10, R11, R12, R13, R14, R15, R8)
   171  	SHA512ROUND1(40, R8,  R9,  R10, R11, R12, R13, R14, R15)
   172  	SHA512ROUND1(41, R15, R8,  R9,  R10, R11, R12, R13, R14)
   173  	SHA512ROUND1(42, R14, R15, R8,  R9,  R10, R11, R12, R13)
   174  	SHA512ROUND1(43, R13, R14, R15, R8,  R9,  R10, R11, R12)
   175  	SHA512ROUND1(44, R12, R13, R14, R15, R8,  R9,  R10, R11)
   176  	SHA512ROUND1(45, R11, R12, R13, R14, R15, R8,  R9,  R10)
   177  	SHA512ROUND1(46, R10, R11, R12, R13, R14, R15, R8,  R9)
   178  	SHA512ROUND1(47, R9,  R10, R11, R12, R13, R14, R15, R8)
   179  	SHA512ROUND1(48, R8,  R9,  R10, R11, R12, R13, R14, R15)
   180  	SHA512ROUND1(49, R15, R8,  R9,  R10, R11, R12, R13, R14)
   181  	SHA512ROUND1(50, R14, R15, R8,  R9,  R10, R11, R12, R13)
   182  	SHA512ROUND1(51, R13, R14, R15, R8,  R9,  R10, R11, R12)
   183  	SHA512ROUND1(52, R12, R13, R14, R15, R8,  R9,  R10, R11)
   184  	SHA512ROUND1(53, R11, R12, R13, R14, R15, R8,  R9,  R10)
   185  	SHA512ROUND1(54, R10, R11, R12, R13, R14, R15, R8,  R9)
   186  	SHA512ROUND1(55, R9,  R10, R11, R12, R13, R14, R15, R8)
   187  	SHA512ROUND1(56, R8,  R9,  R10, R11, R12, R13, R14, R15)
   188  	SHA512ROUND1(57, R15, R8,  R9,  R10, R11, R12, R13, R14)
   189  	SHA512ROUND1(58, R14, R15, R8,  R9,  R10, R11, R12, R13)
   190  	SHA512ROUND1(59, R13, R14, R15, R8,  R9,  R10, R11, R12)
   191  	SHA512ROUND1(60, R12, R13, R14, R15, R8,  R9,  R10, R11)
   192  	SHA512ROUND1(61, R11, R12, R13, R14, R15, R8,  R9,  R10)
   193  	SHA512ROUND1(62, R10, R11, R12, R13, R14, R15, R8,  R9)
   194  	SHA512ROUND1(63, R9,  R10, R11, R12, R13, R14, R15, R8)
   195  	SHA512ROUND1(64, R8,  R9,  R10, R11, R12, R13, R14, R15)
   196  	SHA512ROUND1(65, R15, R8,  R9,  R10, R11, R12, R13, R14)
   197  	SHA512ROUND1(66, R14, R15, R8,  R9,  R10, R11, R12, R13)
   198  	SHA512ROUND1(67, R13, R14, R15, R8,  R9,  R10, R11, R12)
   199  	SHA512ROUND1(68, R12, R13, R14, R15, R8,  R9,  R10, R11)
   200  	SHA512ROUND1(69, R11, R12, R13, R14, R15, R8,  R9,  R10)
   201  	SHA512ROUND1(70, R10, R11, R12, R13, R14, R15, R8,  R9)
   202  	SHA512ROUND1(71, R9,  R10, R11, R12, R13, R14, R15, R8)
   203  	SHA512ROUND1(72, R8,  R9,  R10, R11, R12, R13, R14, R15)
   204  	SHA512ROUND1(73, R15, R8,  R9,  R10, R11, R12, R13, R14)
   205  	SHA512ROUND1(74, R14, R15, R8,  R9,  R10, R11, R12, R13)
   206  	SHA512ROUND1(75, R13, R14, R15, R8,  R9,  R10, R11, R12)
   207  	SHA512ROUND1(76, R12, R13, R14, R15, R8,  R9,  R10, R11)
   208  	SHA512ROUND1(77, R11, R12, R13, R14, R15, R8,  R9,  R10)
   209  	SHA512ROUND1(78, R10, R11, R12, R13, R14, R15, R8,  R9)
   210  	SHA512ROUND1(79, R9,  R10, R11, R12, R13, R14, R15, R8)
   211  
   212  	MOVV	(0*8)(R4), REGTMP
   213  	MOVV	(1*8)(R4), REGTMP1
   214  	MOVV	(2*8)(R4), REGTMP2
   215  	MOVV	(3*8)(R4), REGTMP3
   216  	ADDV	REGTMP, R8	// H0 = a + H0
   217  	ADDV	REGTMP1, R9	// H1 = b + H1
   218  	ADDV	REGTMP2, R10	// H2 = c + H2
   219  	ADDV	REGTMP3, R11	// H3 = d + H3
   220  	MOVV	R8, (0*8)(R4)
   221  	MOVV	R9, (1*8)(R4)
   222  	MOVV	R10, (2*8)(R4)
   223  	MOVV	R11, (3*8)(R4)
   224  	MOVV	(4*8)(R4), REGTMP
   225  	MOVV	(5*8)(R4), REGTMP1
   226  	MOVV	(6*8)(R4), REGTMP2
   227  	MOVV	(7*8)(R4), REGTMP3
   228  	ADDV	REGTMP, R12	// H4 = e + H4
   229  	ADDV	REGTMP1, R13	// H5 = f + H5
   230  	ADDV	REGTMP2, R14	// H6 = g + H6
   231  	ADDV	REGTMP3, R15	// H7 = h + H7
   232  	MOVV	R12, (4*8)(R4)
   233  	MOVV	R13, (5*8)(R4)
   234  	MOVV	R14, (6*8)(R4)
   235  	MOVV	R15, (7*8)(R4)
   236  
   237  	ADDV	$128, R5
   238  	BNE	R5, R25, loop
   239  
   240  end:
   241  	RET
   242  

View as plain text