1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // SHA512 block routine. See sha512block.go for Go equivalent.
10
11 #define REGTMP R30
12 #define REGTMP1 R16
13 #define REGTMP2 R17
14 #define REGTMP3 R18
15 #define REGTMP4 R7
16 #define REGTMP5 R6
17 #define REG_KT R19
18
19 // W[i] = M[i]; for 0 <= i <= 15
20 #define LOAD0(index) \
21 MOVV (index*8)(R5), REGTMP4; \
22 REVBV REGTMP4, REGTMP4; \
23 MOVV REGTMP4, (index*8)(R3)
24
25 // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 79
26 // SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
27 // SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
28 #define LOAD1(index) \
29 MOVV (((index-2)&0xf)*8)(R3), REGTMP4; \
30 MOVV (((index-15)&0xf)*8)(R3), REGTMP1; \
31 MOVV (((index-7)&0xf)*8)(R3), REGTMP; \
32 MOVV REGTMP4, REGTMP2; \
33 MOVV REGTMP4, REGTMP3; \
34 ROTRV $19, REGTMP4; \
35 ROTRV $61, REGTMP2; \
36 SRLV $6, REGTMP3; \
37 XOR REGTMP2, REGTMP4; \
38 XOR REGTMP3, REGTMP4; \
39 ROTRV $1, REGTMP1, REGTMP5; \
40 SRLV $7, REGTMP1, REGTMP2; \
41 ROTRV $8, REGTMP1; \
42 ADDV REGTMP, REGTMP4; \
43 MOVV (((index-16)&0xf)*8)(R3), REGTMP; \
44 XOR REGTMP1, REGTMP5; \
45 XOR REGTMP2, REGTMP5; \
46 ADDV REGTMP, REGTMP5; \
47 ADDV REGTMP5, REGTMP4; \
48 MOVV REGTMP4, ((index&0xf)*8)(R3)
49
50 // h is also used as an accumulator. Wt is passed in REGTMP4.
51 // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
52 // BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
53 // Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
54 // = ((y XOR z) AND x) XOR z
55 // Calculate T1 in REGTMP4
56 #define SHA512T1(index, e, f, g, h) \
57 MOVV (index*8)(REG_KT), REGTMP5; \
58 ADDV REGTMP5, h; \
59 ADDV REGTMP4, h; \
60 ROTRV $14, e, REGTMP5; \
61 ROTRV $18, e, REGTMP; \
62 ROTRV $41, e, REGTMP3; \
63 XOR f, g, REGTMP2; \
64 XOR REGTMP, REGTMP5; \
65 AND e, REGTMP2; \
66 XOR REGTMP5, REGTMP3; \
67 XOR g, REGTMP2; \
68 ADDV REGTMP3, h; \
69 ADDV h, REGTMP2, REGTMP4
70
71 // T2 = BIGSIGMA0(a) + Maj(a, b, c)
72 // BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
73 // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
74 // = ((y XOR z) AND x) XOR (y AND z)
75 // Calculate T2 in REGTMP1
76 #define SHA512T2(a, b, c) \
77 ROTRV $28, a, REGTMP5; \
78 ROTRV $34, a, REGTMP3; \
79 ROTRV $39, a, REGTMP2; \
80 XOR b, c, REGTMP; \
81 AND b, c, REGTMP1; \
82 XOR REGTMP3, REGTMP5; \
83 AND REGTMP, a, REGTMP; \
84 XOR REGTMP2, REGTMP5; \
85 XOR REGTMP, REGTMP1; \
86 ADDV REGTMP5, REGTMP1
87
88 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
89 // The values for e and a are stored in d and h, ready for rotation.
90 #define SHA512ROUND(index, a, b, c, d, e, f, g, h) \
91 SHA512T1(index, e, f, g, h); \
92 SHA512T2(a, b, c); \
93 ADDV REGTMP4, d; \
94 ADDV REGTMP1, REGTMP4, h
95
96 #define SHA512ROUND0(index, a, b, c, d, e, f, g, h) \
97 LOAD0(index); \
98 SHA512ROUND(index, a, b, c, d, e, f, g, h)
99
100 #define SHA512ROUND1(index, a, b, c, d, e, f, g, h) \
101 LOAD1(index); \
102 SHA512ROUND(index, a, b, c, d, e, f, g, h)
103
104 // A stack frame size of 128 bytes is required here, because
105 // the frame size used for data expansion is 128 bytes.
106 // See the definition of the macro LOAD1 above (8 bytes * 16 entries).
107 //
108 // func block(dig *Digest, p []byte)
109 TEXT ·block(SB),NOSPLIT,$128-32
110 MOVV p_len+16(FP), R6
111 MOVV p_base+8(FP), R5
112 AND $~127, R6
113 BEQ R6, end
114
115 MOVV $·_K(SB), REG_KT // const table
116
117 // p_len >= 128
118 MOVV dig+0(FP), R4
119 ADDV R5, R6, R25
120 MOVV (0*8)(R4), R8 // a = H0
121 MOVV (1*8)(R4), R9 // b = H1
122 MOVV (2*8)(R4), R10 // c = H2
123 MOVV (3*8)(R4), R11 // d = H3
124 MOVV (4*8)(R4), R12 // e = H4
125 MOVV (5*8)(R4), R13 // f = H5
126 MOVV (6*8)(R4), R14 // g = H6
127 MOVV (7*8)(R4), R15 // h = H7
128
129 loop:
130 SHA512ROUND0( 0, R8, R9, R10, R11, R12, R13, R14, R15)
131 SHA512ROUND0( 1, R15, R8, R9, R10, R11, R12, R13, R14)
132 SHA512ROUND0( 2, R14, R15, R8, R9, R10, R11, R12, R13)
133 SHA512ROUND0( 3, R13, R14, R15, R8, R9, R10, R11, R12)
134 SHA512ROUND0( 4, R12, R13, R14, R15, R8, R9, R10, R11)
135 SHA512ROUND0( 5, R11, R12, R13, R14, R15, R8, R9, R10)
136 SHA512ROUND0( 6, R10, R11, R12, R13, R14, R15, R8, R9)
137 SHA512ROUND0( 7, R9, R10, R11, R12, R13, R14, R15, R8)
138 SHA512ROUND0( 8, R8, R9, R10, R11, R12, R13, R14, R15)
139 SHA512ROUND0( 9, R15, R8, R9, R10, R11, R12, R13, R14)
140 SHA512ROUND0(10, R14, R15, R8, R9, R10, R11, R12, R13)
141 SHA512ROUND0(11, R13, R14, R15, R8, R9, R10, R11, R12)
142 SHA512ROUND0(12, R12, R13, R14, R15, R8, R9, R10, R11)
143 SHA512ROUND0(13, R11, R12, R13, R14, R15, R8, R9, R10)
144 SHA512ROUND0(14, R10, R11, R12, R13, R14, R15, R8, R9)
145 SHA512ROUND0(15, R9, R10, R11, R12, R13, R14, R15, R8)
146
147 SHA512ROUND1(16, R8, R9, R10, R11, R12, R13, R14, R15)
148 SHA512ROUND1(17, R15, R8, R9, R10, R11, R12, R13, R14)
149 SHA512ROUND1(18, R14, R15, R8, R9, R10, R11, R12, R13)
150 SHA512ROUND1(19, R13, R14, R15, R8, R9, R10, R11, R12)
151 SHA512ROUND1(20, R12, R13, R14, R15, R8, R9, R10, R11)
152 SHA512ROUND1(21, R11, R12, R13, R14, R15, R8, R9, R10)
153 SHA512ROUND1(22, R10, R11, R12, R13, R14, R15, R8, R9)
154 SHA512ROUND1(23, R9, R10, R11, R12, R13, R14, R15, R8)
155 SHA512ROUND1(24, R8, R9, R10, R11, R12, R13, R14, R15)
156 SHA512ROUND1(25, R15, R8, R9, R10, R11, R12, R13, R14)
157 SHA512ROUND1(26, R14, R15, R8, R9, R10, R11, R12, R13)
158 SHA512ROUND1(27, R13, R14, R15, R8, R9, R10, R11, R12)
159 SHA512ROUND1(28, R12, R13, R14, R15, R8, R9, R10, R11)
160 SHA512ROUND1(29, R11, R12, R13, R14, R15, R8, R9, R10)
161 SHA512ROUND1(30, R10, R11, R12, R13, R14, R15, R8, R9)
162 SHA512ROUND1(31, R9, R10, R11, R12, R13, R14, R15, R8)
163 SHA512ROUND1(32, R8, R9, R10, R11, R12, R13, R14, R15)
164 SHA512ROUND1(33, R15, R8, R9, R10, R11, R12, R13, R14)
165 SHA512ROUND1(34, R14, R15, R8, R9, R10, R11, R12, R13)
166 SHA512ROUND1(35, R13, R14, R15, R8, R9, R10, R11, R12)
167 SHA512ROUND1(36, R12, R13, R14, R15, R8, R9, R10, R11)
168 SHA512ROUND1(37, R11, R12, R13, R14, R15, R8, R9, R10)
169 SHA512ROUND1(38, R10, R11, R12, R13, R14, R15, R8, R9)
170 SHA512ROUND1(39, R9, R10, R11, R12, R13, R14, R15, R8)
171 SHA512ROUND1(40, R8, R9, R10, R11, R12, R13, R14, R15)
172 SHA512ROUND1(41, R15, R8, R9, R10, R11, R12, R13, R14)
173 SHA512ROUND1(42, R14, R15, R8, R9, R10, R11, R12, R13)
174 SHA512ROUND1(43, R13, R14, R15, R8, R9, R10, R11, R12)
175 SHA512ROUND1(44, R12, R13, R14, R15, R8, R9, R10, R11)
176 SHA512ROUND1(45, R11, R12, R13, R14, R15, R8, R9, R10)
177 SHA512ROUND1(46, R10, R11, R12, R13, R14, R15, R8, R9)
178 SHA512ROUND1(47, R9, R10, R11, R12, R13, R14, R15, R8)
179 SHA512ROUND1(48, R8, R9, R10, R11, R12, R13, R14, R15)
180 SHA512ROUND1(49, R15, R8, R9, R10, R11, R12, R13, R14)
181 SHA512ROUND1(50, R14, R15, R8, R9, R10, R11, R12, R13)
182 SHA512ROUND1(51, R13, R14, R15, R8, R9, R10, R11, R12)
183 SHA512ROUND1(52, R12, R13, R14, R15, R8, R9, R10, R11)
184 SHA512ROUND1(53, R11, R12, R13, R14, R15, R8, R9, R10)
185 SHA512ROUND1(54, R10, R11, R12, R13, R14, R15, R8, R9)
186 SHA512ROUND1(55, R9, R10, R11, R12, R13, R14, R15, R8)
187 SHA512ROUND1(56, R8, R9, R10, R11, R12, R13, R14, R15)
188 SHA512ROUND1(57, R15, R8, R9, R10, R11, R12, R13, R14)
189 SHA512ROUND1(58, R14, R15, R8, R9, R10, R11, R12, R13)
190 SHA512ROUND1(59, R13, R14, R15, R8, R9, R10, R11, R12)
191 SHA512ROUND1(60, R12, R13, R14, R15, R8, R9, R10, R11)
192 SHA512ROUND1(61, R11, R12, R13, R14, R15, R8, R9, R10)
193 SHA512ROUND1(62, R10, R11, R12, R13, R14, R15, R8, R9)
194 SHA512ROUND1(63, R9, R10, R11, R12, R13, R14, R15, R8)
195 SHA512ROUND1(64, R8, R9, R10, R11, R12, R13, R14, R15)
196 SHA512ROUND1(65, R15, R8, R9, R10, R11, R12, R13, R14)
197 SHA512ROUND1(66, R14, R15, R8, R9, R10, R11, R12, R13)
198 SHA512ROUND1(67, R13, R14, R15, R8, R9, R10, R11, R12)
199 SHA512ROUND1(68, R12, R13, R14, R15, R8, R9, R10, R11)
200 SHA512ROUND1(69, R11, R12, R13, R14, R15, R8, R9, R10)
201 SHA512ROUND1(70, R10, R11, R12, R13, R14, R15, R8, R9)
202 SHA512ROUND1(71, R9, R10, R11, R12, R13, R14, R15, R8)
203 SHA512ROUND1(72, R8, R9, R10, R11, R12, R13, R14, R15)
204 SHA512ROUND1(73, R15, R8, R9, R10, R11, R12, R13, R14)
205 SHA512ROUND1(74, R14, R15, R8, R9, R10, R11, R12, R13)
206 SHA512ROUND1(75, R13, R14, R15, R8, R9, R10, R11, R12)
207 SHA512ROUND1(76, R12, R13, R14, R15, R8, R9, R10, R11)
208 SHA512ROUND1(77, R11, R12, R13, R14, R15, R8, R9, R10)
209 SHA512ROUND1(78, R10, R11, R12, R13, R14, R15, R8, R9)
210 SHA512ROUND1(79, R9, R10, R11, R12, R13, R14, R15, R8)
211
212 MOVV (0*8)(R4), REGTMP
213 MOVV (1*8)(R4), REGTMP1
214 MOVV (2*8)(R4), REGTMP2
215 MOVV (3*8)(R4), REGTMP3
216 ADDV REGTMP, R8 // H0 = a + H0
217 ADDV REGTMP1, R9 // H1 = b + H1
218 ADDV REGTMP2, R10 // H2 = c + H2
219 ADDV REGTMP3, R11 // H3 = d + H3
220 MOVV R8, (0*8)(R4)
221 MOVV R9, (1*8)(R4)
222 MOVV R10, (2*8)(R4)
223 MOVV R11, (3*8)(R4)
224 MOVV (4*8)(R4), REGTMP
225 MOVV (5*8)(R4), REGTMP1
226 MOVV (6*8)(R4), REGTMP2
227 MOVV (7*8)(R4), REGTMP3
228 ADDV REGTMP, R12 // H4 = e + H4
229 ADDV REGTMP1, R13 // H5 = f + H5
230 ADDV REGTMP2, R14 // H6 = g + H6
231 ADDV REGTMP3, R15 // H7 = h + H7
232 MOVV R12, (4*8)(R4)
233 MOVV R13, (5*8)(R4)
234 MOVV R14, (6*8)(R4)
235 MOVV R15, (7*8)(R4)
236
237 ADDV $128, R5
238 BNE R5, R25, loop
239
240 end:
241 RET
242
View as plain text