1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // SHA256 block routine. See sha256block.go for Go equivalent.
10 //
11 // The algorithm is detailed in FIPS 180-4:
12 //
13 // https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
14 //
15 // W[i] = M[i]; for 0 <= i <= 15
16 // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
17 //
18 // a = H0
19 // b = H1
20 // c = H2
21 // d = H3
22 // e = H4
23 // f = H5
24 // g = H6
25 // h = H7
26 //
27 // for i = 0 to 63 {
28 // T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + K[i] + W[i]
29 // T2 = BIGSIGMA0(a) + Maj(a,b,c)
30 // h = g
31 // g = f
32 // f = e
33 // e = d + T1
34 // d = c
35 // c = b
36 // b = a
37 // a = T1 + T2
38 // }
39 //
40 // H0 = a + H0
41 // H1 = b + H1
42 // H2 = c + H2
43 // H3 = d + H3
44 // H4 = e + H4
45 // H5 = f + H5
46 // H6 = g + H6
47 // H7 = h + H7
48
49 #define REGTMP R30
50 #define REGTMP1 R16
51 #define REGTMP2 R17
52 #define REGTMP3 R18
53 #define REGTMP4 R7
54 #define REGTMP5 R6
55 #define REG_KT R19
56
57 // W[i] = M[i]; for 0 <= i <= 15
58 #define LOAD0(index) \
59 MOVW (index*4)(R5), REGTMP4; \
60 REVB2W REGTMP4, REGTMP4; \
61 MOVW REGTMP4, (index*4)(R3)
62
63 // W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
64 // SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
65 // SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
66 #define LOAD1(index) \
67 MOVW (((index-2)&0xf)*4)(R3), REGTMP4; \
68 MOVW (((index-15)&0xf)*4)(R3), REGTMP1; \
69 MOVW (((index-7)&0xf)*4)(R3), REGTMP; \
70 MOVW REGTMP4, REGTMP2; \
71 MOVW REGTMP4, REGTMP3; \
72 ROTR $17, REGTMP4; \
73 ROTR $19, REGTMP2; \
74 SRL $10, REGTMP3; \
75 XOR REGTMP2, REGTMP4; \
76 XOR REGTMP3, REGTMP4; \
77 ROTR $7, REGTMP1, REGTMP5; \
78 SRL $3, REGTMP1, REGTMP3; \
79 ROTR $18, REGTMP1, REGTMP2; \
80 ADD REGTMP, REGTMP4; \
81 MOVW (((index-16)&0xf)*4)(R3), REGTMP; \
82 XOR REGTMP3, REGTMP5; \
83 XOR REGTMP2, REGTMP5; \
84 ADD REGTMP, REGTMP5; \
85 ADD REGTMP5, REGTMP4; \
86 MOVW REGTMP4, ((index&0xf)*4)(R3)
87
88 // T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
89 // BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
90 // Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
91 // = ((y XOR z) AND x) XOR z
92 // Calculate T1 in REGTMP4
93 #define SHA256T1(index, e, f, g, h) \
94 MOVW (index*4)(REG_KT), REGTMP5; \
95 ADDV REGTMP5, h; \
96 ADD REGTMP4, h; \
97 ROTR $6, e, REGTMP5; \
98 ROTR $11, e, REGTMP; \
99 ROTR $25, e, REGTMP3; \
100 XOR f, g, REGTMP2; \
101 XOR REGTMP, REGTMP5; \
102 AND e, REGTMP2; \
103 XOR REGTMP5, REGTMP3; \
104 XOR g, REGTMP2; \
105 ADD REGTMP3, h; \
106 ADD h, REGTMP2, REGTMP4
107
108 // T2 = BIGSIGMA0(a) + Maj(a, b, c)
109 // BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
110 // Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
111 // = ((y XOR z) AND x) XOR (y AND z)
112 // Calculate T2 in REGTMP1
113 #define SHA256T2(a, b, c) \
114 ROTR $2, a, REGTMP5; \
115 ROTR $13, a, REGTMP3; \
116 ROTR $22, a, REGTMP2; \
117 XOR b, c, REGTMP; \
118 AND b, c, REGTMP1; \
119 XOR REGTMP3, REGTMP5; \
120 AND REGTMP, a, REGTMP; \
121 XOR REGTMP2, REGTMP5; \
122 XOR REGTMP, REGTMP1; \
123 ADD REGTMP5, REGTMP1
124
125 // Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
126 // The values for e and a are stored in d and h, ready for rotation.
127 #define SHA256ROUND(index, a, b, c, d, e, f, g, h) \
128 SHA256T1(index, e, f, g, h); \
129 SHA256T2(a, b, c); \
130 ADD REGTMP4, d; \
131 ADD REGTMP1, REGTMP4, h
132
133 #define SHA256ROUND0(index, a, b, c, d, e, f, g, h) \
134 LOAD0(index); \
135 SHA256ROUND(index, a, b, c, d, e, f, g, h)
136
137 #define SHA256ROUND1(index, a, b, c, d, e, f, g, h) \
138 LOAD1(index); \
139 SHA256ROUND(index, a, b, c, d, e, f, g, h)
140
141 // A stack frame size of 64 bytes is required here, because
142 // the frame size used for data expansion is 64 bytes.
143 // See the definition of the macro LOAD1 above (4 bytes * 16 entries).
144 //
145 //func block(dig *Digest, p []byte)
146 TEXT ·block(SB),NOSPLIT,$64-32
147 MOVV p_base+8(FP), R5
148 MOVV p_len+16(FP), R6
149 AND $~63, R6
150 BEQ R6, end
151
152 MOVV $·_K(SB), REG_KT // const table
153
154 // p_len >= 64
155 MOVV dig+0(FP), R4
156 ADDV R5, R6, R25
157 MOVW (0*4)(R4), R8 // a = H0
158 MOVW (1*4)(R4), R9 // b = H1
159 MOVW (2*4)(R4), R10 // c = H2
160 MOVW (3*4)(R4), R11 // d = H3
161 MOVW (4*4)(R4), R12 // e = H4
162 MOVW (5*4)(R4), R13 // f = H5
163 MOVW (6*4)(R4), R14 // g = H6
164 MOVW (7*4)(R4), R15 // h = H7
165
166 loop:
167 SHA256ROUND0(0, R8, R9, R10, R11, R12, R13, R14, R15)
168 SHA256ROUND0(1, R15, R8, R9, R10, R11, R12, R13, R14)
169 SHA256ROUND0(2, R14, R15, R8, R9, R10, R11, R12, R13)
170 SHA256ROUND0(3, R13, R14, R15, R8, R9, R10, R11, R12)
171 SHA256ROUND0(4, R12, R13, R14, R15, R8, R9, R10, R11)
172 SHA256ROUND0(5, R11, R12, R13, R14, R15, R8, R9, R10)
173 SHA256ROUND0(6, R10, R11, R12, R13, R14, R15, R8, R9)
174 SHA256ROUND0(7, R9, R10, R11, R12, R13, R14, R15, R8)
175 SHA256ROUND0(8, R8, R9, R10, R11, R12, R13, R14, R15)
176 SHA256ROUND0(9, R15, R8, R9, R10, R11, R12, R13, R14)
177 SHA256ROUND0(10, R14, R15, R8, R9, R10, R11, R12, R13)
178 SHA256ROUND0(11, R13, R14, R15, R8, R9, R10, R11, R12)
179 SHA256ROUND0(12, R12, R13, R14, R15, R8, R9, R10, R11)
180 SHA256ROUND0(13, R11, R12, R13, R14, R15, R8, R9, R10)
181 SHA256ROUND0(14, R10, R11, R12, R13, R14, R15, R8, R9)
182 SHA256ROUND0(15, R9, R10, R11, R12, R13, R14, R15, R8)
183
184 SHA256ROUND1(16, R8, R9, R10, R11, R12, R13, R14, R15)
185 SHA256ROUND1(17, R15, R8, R9, R10, R11, R12, R13, R14)
186 SHA256ROUND1(18, R14, R15, R8, R9, R10, R11, R12, R13)
187 SHA256ROUND1(19, R13, R14, R15, R8, R9, R10, R11, R12)
188 SHA256ROUND1(20, R12, R13, R14, R15, R8, R9, R10, R11)
189 SHA256ROUND1(21, R11, R12, R13, R14, R15, R8, R9, R10)
190 SHA256ROUND1(22, R10, R11, R12, R13, R14, R15, R8, R9)
191 SHA256ROUND1(23, R9, R10, R11, R12, R13, R14, R15, R8)
192 SHA256ROUND1(24, R8, R9, R10, R11, R12, R13, R14, R15)
193 SHA256ROUND1(25, R15, R8, R9, R10, R11, R12, R13, R14)
194 SHA256ROUND1(26, R14, R15, R8, R9, R10, R11, R12, R13)
195 SHA256ROUND1(27, R13, R14, R15, R8, R9, R10, R11, R12)
196 SHA256ROUND1(28, R12, R13, R14, R15, R8, R9, R10, R11)
197 SHA256ROUND1(29, R11, R12, R13, R14, R15, R8, R9, R10)
198 SHA256ROUND1(30, R10, R11, R12, R13, R14, R15, R8, R9)
199 SHA256ROUND1(31, R9, R10, R11, R12, R13, R14, R15, R8)
200 SHA256ROUND1(32, R8, R9, R10, R11, R12, R13, R14, R15)
201 SHA256ROUND1(33, R15, R8, R9, R10, R11, R12, R13, R14)
202 SHA256ROUND1(34, R14, R15, R8, R9, R10, R11, R12, R13)
203 SHA256ROUND1(35, R13, R14, R15, R8, R9, R10, R11, R12)
204 SHA256ROUND1(36, R12, R13, R14, R15, R8, R9, R10, R11)
205 SHA256ROUND1(37, R11, R12, R13, R14, R15, R8, R9, R10)
206 SHA256ROUND1(38, R10, R11, R12, R13, R14, R15, R8, R9)
207 SHA256ROUND1(39, R9, R10, R11, R12, R13, R14, R15, R8)
208 SHA256ROUND1(40, R8, R9, R10, R11, R12, R13, R14, R15)
209 SHA256ROUND1(41, R15, R8, R9, R10, R11, R12, R13, R14)
210 SHA256ROUND1(42, R14, R15, R8, R9, R10, R11, R12, R13)
211 SHA256ROUND1(43, R13, R14, R15, R8, R9, R10, R11, R12)
212 SHA256ROUND1(44, R12, R13, R14, R15, R8, R9, R10, R11)
213 SHA256ROUND1(45, R11, R12, R13, R14, R15, R8, R9, R10)
214 SHA256ROUND1(46, R10, R11, R12, R13, R14, R15, R8, R9)
215 SHA256ROUND1(47, R9, R10, R11, R12, R13, R14, R15, R8)
216 SHA256ROUND1(48, R8, R9, R10, R11, R12, R13, R14, R15)
217 SHA256ROUND1(49, R15, R8, R9, R10, R11, R12, R13, R14)
218 SHA256ROUND1(50, R14, R15, R8, R9, R10, R11, R12, R13)
219 SHA256ROUND1(51, R13, R14, R15, R8, R9, R10, R11, R12)
220 SHA256ROUND1(52, R12, R13, R14, R15, R8, R9, R10, R11)
221 SHA256ROUND1(53, R11, R12, R13, R14, R15, R8, R9, R10)
222 SHA256ROUND1(54, R10, R11, R12, R13, R14, R15, R8, R9)
223 SHA256ROUND1(55, R9, R10, R11, R12, R13, R14, R15, R8)
224 SHA256ROUND1(56, R8, R9, R10, R11, R12, R13, R14, R15)
225 SHA256ROUND1(57, R15, R8, R9, R10, R11, R12, R13, R14)
226 SHA256ROUND1(58, R14, R15, R8, R9, R10, R11, R12, R13)
227 SHA256ROUND1(59, R13, R14, R15, R8, R9, R10, R11, R12)
228 SHA256ROUND1(60, R12, R13, R14, R15, R8, R9, R10, R11)
229 SHA256ROUND1(61, R11, R12, R13, R14, R15, R8, R9, R10)
230 SHA256ROUND1(62, R10, R11, R12, R13, R14, R15, R8, R9)
231 SHA256ROUND1(63, R9, R10, R11, R12, R13, R14, R15, R8)
232
233 MOVW (0*4)(R4), REGTMP
234 MOVW (1*4)(R4), REGTMP1
235 MOVW (2*4)(R4), REGTMP2
236 MOVW (3*4)(R4), REGTMP3
237 ADD REGTMP, R8 // H0 = a + H0
238 ADD REGTMP1, R9 // H1 = b + H1
239 ADD REGTMP2, R10 // H2 = c + H2
240 ADD REGTMP3, R11 // H3 = d + H3
241 MOVW R8, (0*4)(R4)
242 MOVW R9, (1*4)(R4)
243 MOVW R10, (2*4)(R4)
244 MOVW R11, (3*4)(R4)
245 MOVW (4*4)(R4), REGTMP
246 MOVW (5*4)(R4), REGTMP1
247 MOVW (6*4)(R4), REGTMP2
248 MOVW (7*4)(R4), REGTMP3
249 ADD REGTMP, R12 // H4 = e + H4
250 ADD REGTMP1, R13 // H5 = f + H5
251 ADD REGTMP2, R14 // H6 = g + H6
252 ADD REGTMP3, R15 // H7 = h + H7
253 MOVW R12, (4*4)(R4)
254 MOVW R13, (5*4)(R4)
255 MOVW R14, (6*4)(R4)
256 MOVW R15, (7*4)(R4)
257
258 ADDV $64, R5
259 BNE R5, R25, loop
260
261 end:
262 RET
263
View as plain text