loveckiy.ivan
3 weeks ago
72 changed files with 20106 additions and 0 deletions
@ -0,0 +1,283 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
// Package argon2 implements the key derivation function Argon2.
|
|||
// Argon2 was selected as the winner of the Password Hashing Competition and can
|
|||
// be used to derive cryptographic keys from passwords.
|
|||
//
|
|||
// For a detailed specification of Argon2 see [1].
|
|||
//
|
|||
// If you aren't sure which function you need, use Argon2id (IDKey) and
|
|||
// the parameter recommendations for your scenario.
|
|||
//
|
|||
// # Argon2i
|
|||
//
|
|||
// Argon2i (implemented by Key) is the side-channel resistant version of Argon2.
|
|||
// It uses data-independent memory access, which is preferred for password
|
|||
// hashing and password-based key derivation. Argon2i requires more passes over
|
|||
// memory than Argon2id to protect from trade-off attacks. The recommended
|
|||
// parameters (taken from [2]) for non-interactive operations are time=3 and to
|
|||
// use the maximum available memory.
|
|||
//
|
|||
// # Argon2id
|
|||
//
|
|||
// Argon2id (implemented by IDKey) is a hybrid version of Argon2 combining
|
|||
// Argon2i and Argon2d. It uses data-independent memory access for the first
|
|||
// half of the first iteration over the memory and data-dependent memory access
|
|||
// for the rest. Argon2id is side-channel resistant and provides better brute-
|
|||
// force cost savings due to time-memory tradeoffs than Argon2i. The recommended
|
|||
// parameters for non-interactive operations (taken from [2]) are time=1 and to
|
|||
// use the maximum available memory.
|
|||
//
|
|||
// [1] https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf
|
|||
// [2] https://tools.ietf.org/html/draft-irtf-cfrg-argon2-03#section-9.3
|
|||
package argon2 |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"sync" |
|||
|
|||
"golang.org/x/crypto/blake2b" |
|||
) |
|||
|
|||
// The Argon2 version implemented by this package.
|
|||
const Version = 0x13 |
|||
|
|||
const ( |
|||
argon2d = iota |
|||
argon2i |
|||
argon2id |
|||
) |
|||
|
|||
// Key derives a key from the password, salt, and cost parameters using Argon2i
|
|||
// returning a byte slice of length keyLen that can be used as cryptographic
|
|||
// key. The CPU cost and parallelism degree must be greater than zero.
|
|||
//
|
|||
// For example, you can get a derived key for e.g. AES-256 (which needs a
|
|||
// 32-byte key) by doing:
|
|||
//
|
|||
// key := argon2.Key([]byte("some password"), salt, 3, 32*1024, 4, 32)
|
|||
//
|
|||
// The draft RFC recommends[2] time=3, and memory=32*1024 is a sensible number.
|
|||
// If using that amount of memory (32 MB) is not possible in some contexts then
|
|||
// the time parameter can be increased to compensate.
|
|||
//
|
|||
// The time parameter specifies the number of passes over the memory and the
|
|||
// memory parameter specifies the size of the memory in KiB. For example
|
|||
// memory=32*1024 sets the memory cost to ~32 MB. The number of threads can be
|
|||
// adjusted to the number of available CPUs. The cost parameters should be
|
|||
// increased as memory latency and CPU parallelism increases. Remember to get a
|
|||
// good random salt.
|
|||
func Key(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { |
|||
return deriveKey(argon2i, password, salt, nil, nil, time, memory, threads, keyLen) |
|||
} |
|||
|
|||
// IDKey derives a key from the password, salt, and cost parameters using
|
|||
// Argon2id returning a byte slice of length keyLen that can be used as
|
|||
// cryptographic key. The CPU cost and parallelism degree must be greater than
|
|||
// zero.
|
|||
//
|
|||
// For example, you can get a derived key for e.g. AES-256 (which needs a
|
|||
// 32-byte key) by doing:
|
|||
//
|
|||
// key := argon2.IDKey([]byte("some password"), salt, 1, 64*1024, 4, 32)
|
|||
//
|
|||
// The draft RFC recommends[2] time=1, and memory=64*1024 is a sensible number.
|
|||
// If using that amount of memory (64 MB) is not possible in some contexts then
|
|||
// the time parameter can be increased to compensate.
|
|||
//
|
|||
// The time parameter specifies the number of passes over the memory and the
|
|||
// memory parameter specifies the size of the memory in KiB. For example
|
|||
// memory=64*1024 sets the memory cost to ~64 MB. The number of threads can be
|
|||
// adjusted to the numbers of available CPUs. The cost parameters should be
|
|||
// increased as memory latency and CPU parallelism increases. Remember to get a
|
|||
// good random salt.
|
|||
func IDKey(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { |
|||
return deriveKey(argon2id, password, salt, nil, nil, time, memory, threads, keyLen) |
|||
} |
|||
|
|||
func deriveKey(mode int, password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { |
|||
if time < 1 { |
|||
panic("argon2: number of rounds too small") |
|||
} |
|||
if threads < 1 { |
|||
panic("argon2: parallelism degree too low") |
|||
} |
|||
h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode) |
|||
|
|||
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads)) |
|||
if memory < 2*syncPoints*uint32(threads) { |
|||
memory = 2 * syncPoints * uint32(threads) |
|||
} |
|||
B := initBlocks(&h0, memory, uint32(threads)) |
|||
processBlocks(B, time, memory, uint32(threads), mode) |
|||
return extractKey(B, memory, uint32(threads), keyLen) |
|||
} |
|||
|
|||
const ( |
|||
blockLength = 128 |
|||
syncPoints = 4 |
|||
) |
|||
|
|||
type block [blockLength]uint64 |
|||
|
|||
func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte { |
|||
var ( |
|||
h0 [blake2b.Size + 8]byte |
|||
params [24]byte |
|||
tmp [4]byte |
|||
) |
|||
|
|||
b2, _ := blake2b.New512(nil) |
|||
binary.LittleEndian.PutUint32(params[0:4], threads) |
|||
binary.LittleEndian.PutUint32(params[4:8], keyLen) |
|||
binary.LittleEndian.PutUint32(params[8:12], memory) |
|||
binary.LittleEndian.PutUint32(params[12:16], time) |
|||
binary.LittleEndian.PutUint32(params[16:20], uint32(Version)) |
|||
binary.LittleEndian.PutUint32(params[20:24], uint32(mode)) |
|||
b2.Write(params[:]) |
|||
binary.LittleEndian.PutUint32(tmp[:], uint32(len(password))) |
|||
b2.Write(tmp[:]) |
|||
b2.Write(password) |
|||
binary.LittleEndian.PutUint32(tmp[:], uint32(len(salt))) |
|||
b2.Write(tmp[:]) |
|||
b2.Write(salt) |
|||
binary.LittleEndian.PutUint32(tmp[:], uint32(len(key))) |
|||
b2.Write(tmp[:]) |
|||
b2.Write(key) |
|||
binary.LittleEndian.PutUint32(tmp[:], uint32(len(data))) |
|||
b2.Write(tmp[:]) |
|||
b2.Write(data) |
|||
b2.Sum(h0[:0]) |
|||
return h0 |
|||
} |
|||
|
|||
func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block { |
|||
var block0 [1024]byte |
|||
B := make([]block, memory) |
|||
for lane := uint32(0); lane < threads; lane++ { |
|||
j := lane * (memory / threads) |
|||
binary.LittleEndian.PutUint32(h0[blake2b.Size+4:], lane) |
|||
|
|||
binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0) |
|||
blake2bHash(block0[:], h0[:]) |
|||
for i := range B[j+0] { |
|||
B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:]) |
|||
} |
|||
|
|||
binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1) |
|||
blake2bHash(block0[:], h0[:]) |
|||
for i := range B[j+1] { |
|||
B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:]) |
|||
} |
|||
} |
|||
return B |
|||
} |
|||
|
|||
func processBlocks(B []block, time, memory, threads uint32, mode int) { |
|||
lanes := memory / threads |
|||
segments := lanes / syncPoints |
|||
|
|||
processSegment := func(n, slice, lane uint32, wg *sync.WaitGroup) { |
|||
var addresses, in, zero block |
|||
if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) { |
|||
in[0] = uint64(n) |
|||
in[1] = uint64(lane) |
|||
in[2] = uint64(slice) |
|||
in[3] = uint64(memory) |
|||
in[4] = uint64(time) |
|||
in[5] = uint64(mode) |
|||
} |
|||
|
|||
index := uint32(0) |
|||
if n == 0 && slice == 0 { |
|||
index = 2 // we have already generated the first two blocks
|
|||
if mode == argon2i || mode == argon2id { |
|||
in[6]++ |
|||
processBlock(&addresses, &in, &zero) |
|||
processBlock(&addresses, &addresses, &zero) |
|||
} |
|||
} |
|||
|
|||
offset := lane*lanes + slice*segments + index |
|||
var random uint64 |
|||
for index < segments { |
|||
prev := offset - 1 |
|||
if index == 0 && slice == 0 { |
|||
prev += lanes // last block in lane
|
|||
} |
|||
if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) { |
|||
if index%blockLength == 0 { |
|||
in[6]++ |
|||
processBlock(&addresses, &in, &zero) |
|||
processBlock(&addresses, &addresses, &zero) |
|||
} |
|||
random = addresses[index%blockLength] |
|||
} else { |
|||
random = B[prev][0] |
|||
} |
|||
newOffset := indexAlpha(random, lanes, segments, threads, n, slice, lane, index) |
|||
processBlockXOR(&B[offset], &B[prev], &B[newOffset]) |
|||
index, offset = index+1, offset+1 |
|||
} |
|||
wg.Done() |
|||
} |
|||
|
|||
for n := uint32(0); n < time; n++ { |
|||
for slice := uint32(0); slice < syncPoints; slice++ { |
|||
var wg sync.WaitGroup |
|||
for lane := uint32(0); lane < threads; lane++ { |
|||
wg.Add(1) |
|||
go processSegment(n, slice, lane, &wg) |
|||
} |
|||
wg.Wait() |
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
func extractKey(B []block, memory, threads, keyLen uint32) []byte { |
|||
lanes := memory / threads |
|||
for lane := uint32(0); lane < threads-1; lane++ { |
|||
for i, v := range B[(lane*lanes)+lanes-1] { |
|||
B[memory-1][i] ^= v |
|||
} |
|||
} |
|||
|
|||
var block [1024]byte |
|||
for i, v := range B[memory-1] { |
|||
binary.LittleEndian.PutUint64(block[i*8:], v) |
|||
} |
|||
key := make([]byte, keyLen) |
|||
blake2bHash(key, block[:]) |
|||
return key |
|||
} |
|||
|
|||
func indexAlpha(rand uint64, lanes, segments, threads, n, slice, lane, index uint32) uint32 { |
|||
refLane := uint32(rand>>32) % threads |
|||
if n == 0 && slice == 0 { |
|||
refLane = lane |
|||
} |
|||
m, s := 3*segments, ((slice+1)%syncPoints)*segments |
|||
if lane == refLane { |
|||
m += index |
|||
} |
|||
if n == 0 { |
|||
m, s = slice*segments, 0 |
|||
if slice == 0 || lane == refLane { |
|||
m += index |
|||
} |
|||
} |
|||
if index == 0 || lane == refLane { |
|||
m-- |
|||
} |
|||
return phi(rand, uint64(m), uint64(s), refLane, lanes) |
|||
} |
|||
|
|||
func phi(rand, m, s uint64, lane, lanes uint32) uint32 { |
|||
p := rand & 0xFFFFFFFF |
|||
p = (p * p) >> 32 |
|||
p = (p * m) >> 32 |
|||
return lane*lanes + uint32((s+m-(p+1))%uint64(lanes)) |
|||
} |
@ -0,0 +1,53 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package argon2 |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"hash" |
|||
|
|||
"golang.org/x/crypto/blake2b" |
|||
) |
|||
|
|||
// blake2bHash computes an arbitrary long hash value of in
|
|||
// and writes the hash to out.
|
|||
func blake2bHash(out []byte, in []byte) { |
|||
var b2 hash.Hash |
|||
if n := len(out); n < blake2b.Size { |
|||
b2, _ = blake2b.New(n, nil) |
|||
} else { |
|||
b2, _ = blake2b.New512(nil) |
|||
} |
|||
|
|||
var buffer [blake2b.Size]byte |
|||
binary.LittleEndian.PutUint32(buffer[:4], uint32(len(out))) |
|||
b2.Write(buffer[:4]) |
|||
b2.Write(in) |
|||
|
|||
if len(out) <= blake2b.Size { |
|||
b2.Sum(out[:0]) |
|||
return |
|||
} |
|||
|
|||
outLen := len(out) |
|||
b2.Sum(buffer[:0]) |
|||
b2.Reset() |
|||
copy(out, buffer[:32]) |
|||
out = out[32:] |
|||
for len(out) > blake2b.Size { |
|||
b2.Write(buffer[:]) |
|||
b2.Sum(buffer[:0]) |
|||
copy(out, buffer[:32]) |
|||
out = out[32:] |
|||
b2.Reset() |
|||
} |
|||
|
|||
if outLen%blake2b.Size > 0 { // outLen > 64
|
|||
r := ((outLen + 31) / 32) - 2 // ⌈τ /32⌉-2
|
|||
b2, _ = blake2b.New(outLen-32*r, nil) |
|||
} |
|||
b2.Write(buffer[:]) |
|||
b2.Sum(out[:0]) |
|||
} |
@ -0,0 +1,60 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build amd64 && gc && !purego
|
|||
|
|||
package argon2 |
|||
|
|||
import "golang.org/x/sys/cpu" |
|||
|
|||
func init() { |
|||
useSSE4 = cpu.X86.HasSSE41 |
|||
} |
|||
|
|||
//go:noescape
|
|||
func mixBlocksSSE2(out, a, b, c *block) |
|||
|
|||
//go:noescape
|
|||
func xorBlocksSSE2(out, a, b, c *block) |
|||
|
|||
//go:noescape
|
|||
func blamkaSSE4(b *block) |
|||
|
|||
func processBlockSSE(out, in1, in2 *block, xor bool) { |
|||
var t block |
|||
mixBlocksSSE2(&t, in1, in2, &t) |
|||
if useSSE4 { |
|||
blamkaSSE4(&t) |
|||
} else { |
|||
for i := 0; i < blockLength; i += 16 { |
|||
blamkaGeneric( |
|||
&t[i+0], &t[i+1], &t[i+2], &t[i+3], |
|||
&t[i+4], &t[i+5], &t[i+6], &t[i+7], |
|||
&t[i+8], &t[i+9], &t[i+10], &t[i+11], |
|||
&t[i+12], &t[i+13], &t[i+14], &t[i+15], |
|||
) |
|||
} |
|||
for i := 0; i < blockLength/8; i += 2 { |
|||
blamkaGeneric( |
|||
&t[i], &t[i+1], &t[16+i], &t[16+i+1], |
|||
&t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1], |
|||
&t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1], |
|||
&t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1], |
|||
) |
|||
} |
|||
} |
|||
if xor { |
|||
xorBlocksSSE2(out, in1, in2, &t) |
|||
} else { |
|||
mixBlocksSSE2(out, in1, in2, &t) |
|||
} |
|||
} |
|||
|
|||
func processBlock(out, in1, in2 *block) { |
|||
processBlockSSE(out, in1, in2, false) |
|||
} |
|||
|
|||
func processBlockXOR(out, in1, in2 *block) { |
|||
processBlockSSE(out, in1, in2, true) |
|||
} |
@ -0,0 +1,243 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build amd64 && gc && !purego |
|||
|
|||
#include "textflag.h" |
|||
|
|||
DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 |
|||
DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b |
|||
GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 |
|||
DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a |
|||
GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ |
|||
MOVO v4, t1; \ |
|||
MOVO v5, v4; \ |
|||
MOVO t1, v5; \ |
|||
MOVO v6, t1; \ |
|||
PUNPCKLQDQ v6, t2; \ |
|||
PUNPCKHQDQ v7, v6; \ |
|||
PUNPCKHQDQ t2, v6; \ |
|||
PUNPCKLQDQ v7, t2; \ |
|||
MOVO t1, v7; \ |
|||
MOVO v2, t1; \ |
|||
PUNPCKHQDQ t2, v7; \ |
|||
PUNPCKLQDQ v3, t2; \ |
|||
PUNPCKHQDQ t2, v2; \ |
|||
PUNPCKLQDQ t1, t2; \ |
|||
PUNPCKHQDQ t2, v3 |
|||
|
|||
#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ |
|||
MOVO v4, t1; \ |
|||
MOVO v5, v4; \ |
|||
MOVO t1, v5; \ |
|||
MOVO v2, t1; \ |
|||
PUNPCKLQDQ v2, t2; \ |
|||
PUNPCKHQDQ v3, v2; \ |
|||
PUNPCKHQDQ t2, v2; \ |
|||
PUNPCKLQDQ v3, t2; \ |
|||
MOVO t1, v3; \ |
|||
MOVO v6, t1; \ |
|||
PUNPCKHQDQ t2, v3; \ |
|||
PUNPCKLQDQ v7, t2; \ |
|||
PUNPCKHQDQ t2, v6; \ |
|||
PUNPCKLQDQ t1, t2; \ |
|||
PUNPCKHQDQ t2, v7 |
|||
|
|||
#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \ |
|||
MOVO v0, t0; \ |
|||
PMULULQ v2, t0; \ |
|||
PADDQ v2, v0; \ |
|||
PADDQ t0, v0; \ |
|||
PADDQ t0, v0; \ |
|||
PXOR v0, v6; \ |
|||
PSHUFD $0xB1, v6, v6; \ |
|||
MOVO v4, t0; \ |
|||
PMULULQ v6, t0; \ |
|||
PADDQ v6, v4; \ |
|||
PADDQ t0, v4; \ |
|||
PADDQ t0, v4; \ |
|||
PXOR v4, v2; \ |
|||
PSHUFB c40, v2; \ |
|||
MOVO v0, t0; \ |
|||
PMULULQ v2, t0; \ |
|||
PADDQ v2, v0; \ |
|||
PADDQ t0, v0; \ |
|||
PADDQ t0, v0; \ |
|||
PXOR v0, v6; \ |
|||
PSHUFB c48, v6; \ |
|||
MOVO v4, t0; \ |
|||
PMULULQ v6, t0; \ |
|||
PADDQ v6, v4; \ |
|||
PADDQ t0, v4; \ |
|||
PADDQ t0, v4; \ |
|||
PXOR v4, v2; \ |
|||
MOVO v2, t0; \ |
|||
PADDQ v2, t0; \ |
|||
PSRLQ $63, v2; \ |
|||
PXOR t0, v2; \ |
|||
MOVO v1, t0; \ |
|||
PMULULQ v3, t0; \ |
|||
PADDQ v3, v1; \ |
|||
PADDQ t0, v1; \ |
|||
PADDQ t0, v1; \ |
|||
PXOR v1, v7; \ |
|||
PSHUFD $0xB1, v7, v7; \ |
|||
MOVO v5, t0; \ |
|||
PMULULQ v7, t0; \ |
|||
PADDQ v7, v5; \ |
|||
PADDQ t0, v5; \ |
|||
PADDQ t0, v5; \ |
|||
PXOR v5, v3; \ |
|||
PSHUFB c40, v3; \ |
|||
MOVO v1, t0; \ |
|||
PMULULQ v3, t0; \ |
|||
PADDQ v3, v1; \ |
|||
PADDQ t0, v1; \ |
|||
PADDQ t0, v1; \ |
|||
PXOR v1, v7; \ |
|||
PSHUFB c48, v7; \ |
|||
MOVO v5, t0; \ |
|||
PMULULQ v7, t0; \ |
|||
PADDQ v7, v5; \ |
|||
PADDQ t0, v5; \ |
|||
PADDQ t0, v5; \ |
|||
PXOR v5, v3; \ |
|||
MOVO v3, t0; \ |
|||
PADDQ v3, t0; \ |
|||
PSRLQ $63, v3; \ |
|||
PXOR t0, v3 |
|||
|
|||
#define LOAD_MSG_0(block, off) \ |
|||
MOVOU 8*(off+0)(block), X0; \ |
|||
MOVOU 8*(off+2)(block), X1; \ |
|||
MOVOU 8*(off+4)(block), X2; \ |
|||
MOVOU 8*(off+6)(block), X3; \ |
|||
MOVOU 8*(off+8)(block), X4; \ |
|||
MOVOU 8*(off+10)(block), X5; \ |
|||
MOVOU 8*(off+12)(block), X6; \ |
|||
MOVOU 8*(off+14)(block), X7 |
|||
|
|||
#define STORE_MSG_0(block, off) \ |
|||
MOVOU X0, 8*(off+0)(block); \ |
|||
MOVOU X1, 8*(off+2)(block); \ |
|||
MOVOU X2, 8*(off+4)(block); \ |
|||
MOVOU X3, 8*(off+6)(block); \ |
|||
MOVOU X4, 8*(off+8)(block); \ |
|||
MOVOU X5, 8*(off+10)(block); \ |
|||
MOVOU X6, 8*(off+12)(block); \ |
|||
MOVOU X7, 8*(off+14)(block) |
|||
|
|||
#define LOAD_MSG_1(block, off) \ |
|||
MOVOU 8*off+0*8(block), X0; \ |
|||
MOVOU 8*off+16*8(block), X1; \ |
|||
MOVOU 8*off+32*8(block), X2; \ |
|||
MOVOU 8*off+48*8(block), X3; \ |
|||
MOVOU 8*off+64*8(block), X4; \ |
|||
MOVOU 8*off+80*8(block), X5; \ |
|||
MOVOU 8*off+96*8(block), X6; \ |
|||
MOVOU 8*off+112*8(block), X7 |
|||
|
|||
#define STORE_MSG_1(block, off) \ |
|||
MOVOU X0, 8*off+0*8(block); \ |
|||
MOVOU X1, 8*off+16*8(block); \ |
|||
MOVOU X2, 8*off+32*8(block); \ |
|||
MOVOU X3, 8*off+48*8(block); \ |
|||
MOVOU X4, 8*off+64*8(block); \ |
|||
MOVOU X5, 8*off+80*8(block); \ |
|||
MOVOU X6, 8*off+96*8(block); \ |
|||
MOVOU X7, 8*off+112*8(block) |
|||
|
|||
#define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \ |
|||
LOAD_MSG_0(block, off); \ |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ |
|||
STORE_MSG_0(block, off) |
|||
|
|||
#define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \ |
|||
LOAD_MSG_1(block, off); \ |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ |
|||
STORE_MSG_1(block, off) |
|||
|
|||
// func blamkaSSE4(b *block) |
|||
TEXT ·blamkaSSE4(SB), 4, $0-8 |
|||
MOVQ b+0(FP), AX |
|||
|
|||
MOVOU ·c40<>(SB), X10 |
|||
MOVOU ·c48<>(SB), X11 |
|||
|
|||
BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11) |
|||
|
|||
BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11) |
|||
RET |
|||
|
|||
// func mixBlocksSSE2(out, a, b, c *block) |
|||
TEXT ·mixBlocksSSE2(SB), 4, $0-32 |
|||
MOVQ out+0(FP), DX |
|||
MOVQ a+8(FP), AX |
|||
MOVQ b+16(FP), BX |
|||
MOVQ c+24(FP), CX |
|||
MOVQ $128, DI |
|||
|
|||
loop: |
|||
MOVOU 0(AX), X0 |
|||
MOVOU 0(BX), X1 |
|||
MOVOU 0(CX), X2 |
|||
PXOR X1, X0 |
|||
PXOR X2, X0 |
|||
MOVOU X0, 0(DX) |
|||
ADDQ $16, AX |
|||
ADDQ $16, BX |
|||
ADDQ $16, CX |
|||
ADDQ $16, DX |
|||
SUBQ $2, DI |
|||
JA loop |
|||
RET |
|||
|
|||
// func xorBlocksSSE2(out, a, b, c *block) |
|||
TEXT ·xorBlocksSSE2(SB), 4, $0-32 |
|||
MOVQ out+0(FP), DX |
|||
MOVQ a+8(FP), AX |
|||
MOVQ b+16(FP), BX |
|||
MOVQ c+24(FP), CX |
|||
MOVQ $128, DI |
|||
|
|||
loop: |
|||
MOVOU 0(AX), X0 |
|||
MOVOU 0(BX), X1 |
|||
MOVOU 0(CX), X2 |
|||
MOVOU 0(DX), X3 |
|||
PXOR X1, X0 |
|||
PXOR X2, X0 |
|||
PXOR X3, X0 |
|||
MOVOU X0, 0(DX) |
|||
ADDQ $16, AX |
|||
ADDQ $16, BX |
|||
ADDQ $16, CX |
|||
ADDQ $16, DX |
|||
SUBQ $2, DI |
|||
JA loop |
|||
RET |
@ -0,0 +1,163 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package argon2 |
|||
|
|||
var useSSE4 bool |
|||
|
|||
func processBlockGeneric(out, in1, in2 *block, xor bool) { |
|||
var t block |
|||
for i := range t { |
|||
t[i] = in1[i] ^ in2[i] |
|||
} |
|||
for i := 0; i < blockLength; i += 16 { |
|||
blamkaGeneric( |
|||
&t[i+0], &t[i+1], &t[i+2], &t[i+3], |
|||
&t[i+4], &t[i+5], &t[i+6], &t[i+7], |
|||
&t[i+8], &t[i+9], &t[i+10], &t[i+11], |
|||
&t[i+12], &t[i+13], &t[i+14], &t[i+15], |
|||
) |
|||
} |
|||
for i := 0; i < blockLength/8; i += 2 { |
|||
blamkaGeneric( |
|||
&t[i], &t[i+1], &t[16+i], &t[16+i+1], |
|||
&t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1], |
|||
&t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1], |
|||
&t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1], |
|||
) |
|||
} |
|||
if xor { |
|||
for i := range t { |
|||
out[i] ^= in1[i] ^ in2[i] ^ t[i] |
|||
} |
|||
} else { |
|||
for i := range t { |
|||
out[i] = in1[i] ^ in2[i] ^ t[i] |
|||
} |
|||
} |
|||
} |
|||
|
|||
func blamkaGeneric(t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15 *uint64) { |
|||
v00, v01, v02, v03 := *t00, *t01, *t02, *t03 |
|||
v04, v05, v06, v07 := *t04, *t05, *t06, *t07 |
|||
v08, v09, v10, v11 := *t08, *t09, *t10, *t11 |
|||
v12, v13, v14, v15 := *t12, *t13, *t14, *t15 |
|||
|
|||
v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04)) |
|||
v12 ^= v00 |
|||
v12 = v12>>32 | v12<<32 |
|||
v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12)) |
|||
v04 ^= v08 |
|||
v04 = v04>>24 | v04<<40 |
|||
|
|||
v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04)) |
|||
v12 ^= v00 |
|||
v12 = v12>>16 | v12<<48 |
|||
v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12)) |
|||
v04 ^= v08 |
|||
v04 = v04>>63 | v04<<1 |
|||
|
|||
v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05)) |
|||
v13 ^= v01 |
|||
v13 = v13>>32 | v13<<32 |
|||
v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13)) |
|||
v05 ^= v09 |
|||
v05 = v05>>24 | v05<<40 |
|||
|
|||
v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05)) |
|||
v13 ^= v01 |
|||
v13 = v13>>16 | v13<<48 |
|||
v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13)) |
|||
v05 ^= v09 |
|||
v05 = v05>>63 | v05<<1 |
|||
|
|||
v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06)) |
|||
v14 ^= v02 |
|||
v14 = v14>>32 | v14<<32 |
|||
v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14)) |
|||
v06 ^= v10 |
|||
v06 = v06>>24 | v06<<40 |
|||
|
|||
v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06)) |
|||
v14 ^= v02 |
|||
v14 = v14>>16 | v14<<48 |
|||
v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14)) |
|||
v06 ^= v10 |
|||
v06 = v06>>63 | v06<<1 |
|||
|
|||
v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07)) |
|||
v15 ^= v03 |
|||
v15 = v15>>32 | v15<<32 |
|||
v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15)) |
|||
v07 ^= v11 |
|||
v07 = v07>>24 | v07<<40 |
|||
|
|||
v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07)) |
|||
v15 ^= v03 |
|||
v15 = v15>>16 | v15<<48 |
|||
v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15)) |
|||
v07 ^= v11 |
|||
v07 = v07>>63 | v07<<1 |
|||
|
|||
v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05)) |
|||
v15 ^= v00 |
|||
v15 = v15>>32 | v15<<32 |
|||
v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15)) |
|||
v05 ^= v10 |
|||
v05 = v05>>24 | v05<<40 |
|||
|
|||
v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05)) |
|||
v15 ^= v00 |
|||
v15 = v15>>16 | v15<<48 |
|||
v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15)) |
|||
v05 ^= v10 |
|||
v05 = v05>>63 | v05<<1 |
|||
|
|||
v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06)) |
|||
v12 ^= v01 |
|||
v12 = v12>>32 | v12<<32 |
|||
v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12)) |
|||
v06 ^= v11 |
|||
v06 = v06>>24 | v06<<40 |
|||
|
|||
v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06)) |
|||
v12 ^= v01 |
|||
v12 = v12>>16 | v12<<48 |
|||
v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12)) |
|||
v06 ^= v11 |
|||
v06 = v06>>63 | v06<<1 |
|||
|
|||
v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07)) |
|||
v13 ^= v02 |
|||
v13 = v13>>32 | v13<<32 |
|||
v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13)) |
|||
v07 ^= v08 |
|||
v07 = v07>>24 | v07<<40 |
|||
|
|||
v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07)) |
|||
v13 ^= v02 |
|||
v13 = v13>>16 | v13<<48 |
|||
v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13)) |
|||
v07 ^= v08 |
|||
v07 = v07>>63 | v07<<1 |
|||
|
|||
v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04)) |
|||
v14 ^= v03 |
|||
v14 = v14>>32 | v14<<32 |
|||
v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14)) |
|||
v04 ^= v09 |
|||
v04 = v04>>24 | v04<<40 |
|||
|
|||
v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04)) |
|||
v14 ^= v03 |
|||
v14 = v14>>16 | v14<<48 |
|||
v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14)) |
|||
v04 ^= v09 |
|||
v04 = v04>>63 | v04<<1 |
|||
|
|||
*t00, *t01, *t02, *t03 = v00, v01, v02, v03 |
|||
*t04, *t05, *t06, *t07 = v04, v05, v06, v07 |
|||
*t08, *t09, *t10, *t11 = v08, v09, v10, v11 |
|||
*t12, *t13, *t14, *t15 = v12, v13, v14, v15 |
|||
} |
@ -0,0 +1,15 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !amd64 || purego || !gc
|
|||
|
|||
package argon2 |
|||
|
|||
func processBlock(out, in1, in2 *block) { |
|||
processBlockGeneric(out, in1, in2, false) |
|||
} |
|||
|
|||
func processBlockXOR(out, in1, in2 *block) { |
|||
processBlockGeneric(out, in1, in2, true) |
|||
} |
@ -0,0 +1,291 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
// Package blake2b implements the BLAKE2b hash algorithm defined by RFC 7693
|
|||
// and the extendable output function (XOF) BLAKE2Xb.
|
|||
//
|
|||
// BLAKE2b is optimized for 64-bit platforms—including NEON-enabled ARMs—and
|
|||
// produces digests of any size between 1 and 64 bytes.
|
|||
// For a detailed specification of BLAKE2b see https://blake2.net/blake2.pdf
|
|||
// and for BLAKE2Xb see https://blake2.net/blake2x.pdf
|
|||
//
|
|||
// If you aren't sure which function you need, use BLAKE2b (Sum512 or New512).
|
|||
// If you need a secret-key MAC (message authentication code), use the New512
|
|||
// function with a non-nil key.
|
|||
//
|
|||
// BLAKE2X is a construction to compute hash values larger than 64 bytes. It
|
|||
// can produce hash values between 0 and 4 GiB.
|
|||
package blake2b |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"errors" |
|||
"hash" |
|||
) |
|||
|
|||
const ( |
|||
// The blocksize of BLAKE2b in bytes.
|
|||
BlockSize = 128 |
|||
// The hash size of BLAKE2b-512 in bytes.
|
|||
Size = 64 |
|||
// The hash size of BLAKE2b-384 in bytes.
|
|||
Size384 = 48 |
|||
// The hash size of BLAKE2b-256 in bytes.
|
|||
Size256 = 32 |
|||
) |
|||
|
|||
var ( |
|||
useAVX2 bool |
|||
useAVX bool |
|||
useSSE4 bool |
|||
) |
|||
|
|||
var ( |
|||
errKeySize = errors.New("blake2b: invalid key size") |
|||
errHashSize = errors.New("blake2b: invalid hash size") |
|||
) |
|||
|
|||
var iv = [8]uint64{ |
|||
0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, |
|||
0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, |
|||
} |
|||
|
|||
// Sum512 returns the BLAKE2b-512 checksum of the data.
|
|||
func Sum512(data []byte) [Size]byte { |
|||
var sum [Size]byte |
|||
checkSum(&sum, Size, data) |
|||
return sum |
|||
} |
|||
|
|||
// Sum384 returns the BLAKE2b-384 checksum of the data.
|
|||
func Sum384(data []byte) [Size384]byte { |
|||
var sum [Size]byte |
|||
var sum384 [Size384]byte |
|||
checkSum(&sum, Size384, data) |
|||
copy(sum384[:], sum[:Size384]) |
|||
return sum384 |
|||
} |
|||
|
|||
// Sum256 returns the BLAKE2b-256 checksum of the data.
|
|||
func Sum256(data []byte) [Size256]byte { |
|||
var sum [Size]byte |
|||
var sum256 [Size256]byte |
|||
checkSum(&sum, Size256, data) |
|||
copy(sum256[:], sum[:Size256]) |
|||
return sum256 |
|||
} |
|||
|
|||
// New512 returns a new hash.Hash computing the BLAKE2b-512 checksum. A non-nil
|
|||
// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
|
|||
func New512(key []byte) (hash.Hash, error) { return newDigest(Size, key) } |
|||
|
|||
// New384 returns a new hash.Hash computing the BLAKE2b-384 checksum. A non-nil
|
|||
// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
|
|||
func New384(key []byte) (hash.Hash, error) { return newDigest(Size384, key) } |
|||
|
|||
// New256 returns a new hash.Hash computing the BLAKE2b-256 checksum. A non-nil
|
|||
// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
|
|||
func New256(key []byte) (hash.Hash, error) { return newDigest(Size256, key) } |
|||
|
|||
// New returns a new hash.Hash computing the BLAKE2b checksum with a custom length.
|
|||
// A non-nil key turns the hash into a MAC. The key must be between zero and 64 bytes long.
|
|||
// The hash size can be a value between 1 and 64 but it is highly recommended to use
|
|||
// values equal or greater than:
|
|||
// - 32 if BLAKE2b is used as a hash function (The key is zero bytes long).
|
|||
// - 16 if BLAKE2b is used as a MAC function (The key is at least 16 bytes long).
|
|||
// When the key is nil, the returned hash.Hash implements BinaryMarshaler
|
|||
// and BinaryUnmarshaler for state (de)serialization as documented by hash.Hash.
|
|||
func New(size int, key []byte) (hash.Hash, error) { return newDigest(size, key) } |
|||
|
|||
func newDigest(hashSize int, key []byte) (*digest, error) { |
|||
if hashSize < 1 || hashSize > Size { |
|||
return nil, errHashSize |
|||
} |
|||
if len(key) > Size { |
|||
return nil, errKeySize |
|||
} |
|||
d := &digest{ |
|||
size: hashSize, |
|||
keyLen: len(key), |
|||
} |
|||
copy(d.key[:], key) |
|||
d.Reset() |
|||
return d, nil |
|||
} |
|||
|
|||
func checkSum(sum *[Size]byte, hashSize int, data []byte) { |
|||
h := iv |
|||
h[0] ^= uint64(hashSize) | (1 << 16) | (1 << 24) |
|||
var c [2]uint64 |
|||
|
|||
if length := len(data); length > BlockSize { |
|||
n := length &^ (BlockSize - 1) |
|||
if length == n { |
|||
n -= BlockSize |
|||
} |
|||
hashBlocks(&h, &c, 0, data[:n]) |
|||
data = data[n:] |
|||
} |
|||
|
|||
var block [BlockSize]byte |
|||
offset := copy(block[:], data) |
|||
remaining := uint64(BlockSize - offset) |
|||
if c[0] < remaining { |
|||
c[1]-- |
|||
} |
|||
c[0] -= remaining |
|||
|
|||
hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:]) |
|||
|
|||
for i, v := range h[:(hashSize+7)/8] { |
|||
binary.LittleEndian.PutUint64(sum[8*i:], v) |
|||
} |
|||
} |
|||
|
|||
type digest struct { |
|||
h [8]uint64 |
|||
c [2]uint64 |
|||
size int |
|||
block [BlockSize]byte |
|||
offset int |
|||
|
|||
key [BlockSize]byte |
|||
keyLen int |
|||
} |
|||
|
|||
const ( |
|||
magic = "b2b" |
|||
marshaledSize = len(magic) + 8*8 + 2*8 + 1 + BlockSize + 1 |
|||
) |
|||
|
|||
func (d *digest) MarshalBinary() ([]byte, error) { |
|||
if d.keyLen != 0 { |
|||
return nil, errors.New("crypto/blake2b: cannot marshal MACs") |
|||
} |
|||
b := make([]byte, 0, marshaledSize) |
|||
b = append(b, magic...) |
|||
for i := 0; i < 8; i++ { |
|||
b = appendUint64(b, d.h[i]) |
|||
} |
|||
b = appendUint64(b, d.c[0]) |
|||
b = appendUint64(b, d.c[1]) |
|||
// Maximum value for size is 64
|
|||
b = append(b, byte(d.size)) |
|||
b = append(b, d.block[:]...) |
|||
b = append(b, byte(d.offset)) |
|||
return b, nil |
|||
} |
|||
|
|||
func (d *digest) UnmarshalBinary(b []byte) error { |
|||
if len(b) < len(magic) || string(b[:len(magic)]) != magic { |
|||
return errors.New("crypto/blake2b: invalid hash state identifier") |
|||
} |
|||
if len(b) != marshaledSize { |
|||
return errors.New("crypto/blake2b: invalid hash state size") |
|||
} |
|||
b = b[len(magic):] |
|||
for i := 0; i < 8; i++ { |
|||
b, d.h[i] = consumeUint64(b) |
|||
} |
|||
b, d.c[0] = consumeUint64(b) |
|||
b, d.c[1] = consumeUint64(b) |
|||
d.size = int(b[0]) |
|||
b = b[1:] |
|||
copy(d.block[:], b[:BlockSize]) |
|||
b = b[BlockSize:] |
|||
d.offset = int(b[0]) |
|||
return nil |
|||
} |
|||
|
|||
func (d *digest) BlockSize() int { return BlockSize } |
|||
|
|||
func (d *digest) Size() int { return d.size } |
|||
|
|||
func (d *digest) Reset() { |
|||
d.h = iv |
|||
d.h[0] ^= uint64(d.size) | (uint64(d.keyLen) << 8) | (1 << 16) | (1 << 24) |
|||
d.offset, d.c[0], d.c[1] = 0, 0, 0 |
|||
if d.keyLen > 0 { |
|||
d.block = d.key |
|||
d.offset = BlockSize |
|||
} |
|||
} |
|||
|
|||
func (d *digest) Write(p []byte) (n int, err error) { |
|||
n = len(p) |
|||
|
|||
if d.offset > 0 { |
|||
remaining := BlockSize - d.offset |
|||
if n <= remaining { |
|||
d.offset += copy(d.block[d.offset:], p) |
|||
return |
|||
} |
|||
copy(d.block[d.offset:], p[:remaining]) |
|||
hashBlocks(&d.h, &d.c, 0, d.block[:]) |
|||
d.offset = 0 |
|||
p = p[remaining:] |
|||
} |
|||
|
|||
if length := len(p); length > BlockSize { |
|||
nn := length &^ (BlockSize - 1) |
|||
if length == nn { |
|||
nn -= BlockSize |
|||
} |
|||
hashBlocks(&d.h, &d.c, 0, p[:nn]) |
|||
p = p[nn:] |
|||
} |
|||
|
|||
if len(p) > 0 { |
|||
d.offset += copy(d.block[:], p) |
|||
} |
|||
|
|||
return |
|||
} |
|||
|
|||
func (d *digest) Sum(sum []byte) []byte { |
|||
var hash [Size]byte |
|||
d.finalize(&hash) |
|||
return append(sum, hash[:d.size]...) |
|||
} |
|||
|
|||
func (d *digest) finalize(hash *[Size]byte) { |
|||
var block [BlockSize]byte |
|||
copy(block[:], d.block[:d.offset]) |
|||
remaining := uint64(BlockSize - d.offset) |
|||
|
|||
c := d.c |
|||
if c[0] < remaining { |
|||
c[1]-- |
|||
} |
|||
c[0] -= remaining |
|||
|
|||
h := d.h |
|||
hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:]) |
|||
|
|||
for i, v := range h { |
|||
binary.LittleEndian.PutUint64(hash[8*i:], v) |
|||
} |
|||
} |
|||
|
|||
func appendUint64(b []byte, x uint64) []byte { |
|||
var a [8]byte |
|||
binary.BigEndian.PutUint64(a[:], x) |
|||
return append(b, a[:]...) |
|||
} |
|||
|
|||
func appendUint32(b []byte, x uint32) []byte { |
|||
var a [4]byte |
|||
binary.BigEndian.PutUint32(a[:], x) |
|||
return append(b, a[:]...) |
|||
} |
|||
|
|||
func consumeUint64(b []byte) ([]byte, uint64) { |
|||
x := binary.BigEndian.Uint64(b) |
|||
return b[8:], x |
|||
} |
|||
|
|||
func consumeUint32(b []byte) ([]byte, uint32) { |
|||
x := binary.BigEndian.Uint32(b) |
|||
return b[4:], x |
|||
} |
@ -0,0 +1,37 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build amd64 && gc && !purego
|
|||
|
|||
package blake2b |
|||
|
|||
import "golang.org/x/sys/cpu" |
|||
|
|||
func init() { |
|||
useAVX2 = cpu.X86.HasAVX2 |
|||
useAVX = cpu.X86.HasAVX |
|||
useSSE4 = cpu.X86.HasSSE41 |
|||
} |
|||
|
|||
//go:noescape
|
|||
func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
|
|||
//go:noescape
|
|||
func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
|
|||
//go:noescape
|
|||
func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
|
|||
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { |
|||
switch { |
|||
case useAVX2: |
|||
hashBlocksAVX2(h, c, flag, blocks) |
|||
case useAVX: |
|||
hashBlocksAVX(h, c, flag, blocks) |
|||
case useSSE4: |
|||
hashBlocksSSE4(h, c, flag, blocks) |
|||
default: |
|||
hashBlocksGeneric(h, c, flag, blocks) |
|||
} |
|||
} |
@ -0,0 +1,744 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build amd64 && gc && !purego |
|||
|
|||
#include "textflag.h" |
|||
|
|||
DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 |
|||
DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b |
|||
DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b |
|||
DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1 |
|||
GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32 |
|||
|
|||
DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1 |
|||
DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f |
|||
DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b |
|||
DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179 |
|||
GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32 |
|||
|
|||
DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403 |
|||
DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b |
|||
DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403 |
|||
DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b |
|||
GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32 |
|||
|
|||
DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302 |
|||
DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a |
|||
DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302 |
|||
DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a |
|||
GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32 |
|||
|
|||
DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 |
|||
DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b |
|||
GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b |
|||
DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 |
|||
GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1 |
|||
DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f |
|||
GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b |
|||
DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 |
|||
GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403 |
|||
DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b |
|||
GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302 |
|||
DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a |
|||
GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39 |
|||
#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93 |
|||
#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e |
|||
#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93 |
|||
#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39 |
|||
|
|||
#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \ |
|||
VPADDQ m0, Y0, Y0; \ |
|||
VPADDQ Y1, Y0, Y0; \ |
|||
VPXOR Y0, Y3, Y3; \ |
|||
VPSHUFD $-79, Y3, Y3; \ |
|||
VPADDQ Y3, Y2, Y2; \ |
|||
VPXOR Y2, Y1, Y1; \ |
|||
VPSHUFB c40, Y1, Y1; \ |
|||
VPADDQ m1, Y0, Y0; \ |
|||
VPADDQ Y1, Y0, Y0; \ |
|||
VPXOR Y0, Y3, Y3; \ |
|||
VPSHUFB c48, Y3, Y3; \ |
|||
VPADDQ Y3, Y2, Y2; \ |
|||
VPXOR Y2, Y1, Y1; \ |
|||
VPADDQ Y1, Y1, t; \ |
|||
VPSRLQ $63, Y1, Y1; \ |
|||
VPXOR t, Y1, Y1; \ |
|||
VPERMQ_0x39_Y1_Y1; \ |
|||
VPERMQ_0x4E_Y2_Y2; \ |
|||
VPERMQ_0x93_Y3_Y3; \ |
|||
VPADDQ m2, Y0, Y0; \ |
|||
VPADDQ Y1, Y0, Y0; \ |
|||
VPXOR Y0, Y3, Y3; \ |
|||
VPSHUFD $-79, Y3, Y3; \ |
|||
VPADDQ Y3, Y2, Y2; \ |
|||
VPXOR Y2, Y1, Y1; \ |
|||
VPSHUFB c40, Y1, Y1; \ |
|||
VPADDQ m3, Y0, Y0; \ |
|||
VPADDQ Y1, Y0, Y0; \ |
|||
VPXOR Y0, Y3, Y3; \ |
|||
VPSHUFB c48, Y3, Y3; \ |
|||
VPADDQ Y3, Y2, Y2; \ |
|||
VPXOR Y2, Y1, Y1; \ |
|||
VPADDQ Y1, Y1, t; \ |
|||
VPSRLQ $63, Y1, Y1; \ |
|||
VPXOR t, Y1, Y1; \ |
|||
VPERMQ_0x39_Y3_Y3; \ |
|||
VPERMQ_0x4E_Y2_Y2; \ |
|||
VPERMQ_0x93_Y1_Y1 |
|||
|
|||
#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E |
|||
#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26 |
|||
#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E |
|||
#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36 |
|||
#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E |
|||
|
|||
#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n |
|||
#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n |
|||
#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n |
|||
#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n |
|||
#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n |
|||
|
|||
#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01 |
|||
|
|||
#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01 |
|||
|
|||
#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8 |
|||
#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01 |
|||
|
|||
// load msg: Y12 = (i0, i1, i2, i3) |
|||
// i0, i1, i2, i3 must not be 0 |
|||
#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \ |
|||
VMOVQ_SI_X12(i0*8); \ |
|||
VMOVQ_SI_X11(i2*8); \ |
|||
VPINSRQ_1_SI_X12(i1*8); \ |
|||
VPINSRQ_1_SI_X11(i3*8); \ |
|||
VINSERTI128 $1, X11, Y12, Y12 |
|||
|
|||
// load msg: Y13 = (i0, i1, i2, i3) |
|||
// i0, i1, i2, i3 must not be 0 |
|||
#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \ |
|||
VMOVQ_SI_X13(i0*8); \ |
|||
VMOVQ_SI_X11(i2*8); \ |
|||
VPINSRQ_1_SI_X13(i1*8); \ |
|||
VPINSRQ_1_SI_X11(i3*8); \ |
|||
VINSERTI128 $1, X11, Y13, Y13 |
|||
|
|||
// load msg: Y14 = (i0, i1, i2, i3) |
|||
// i0, i1, i2, i3 must not be 0 |
|||
#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \ |
|||
VMOVQ_SI_X14(i0*8); \ |
|||
VMOVQ_SI_X11(i2*8); \ |
|||
VPINSRQ_1_SI_X14(i1*8); \ |
|||
VPINSRQ_1_SI_X11(i3*8); \ |
|||
VINSERTI128 $1, X11, Y14, Y14 |
|||
|
|||
// load msg: Y15 = (i0, i1, i2, i3) |
|||
// i0, i1, i2, i3 must not be 0 |
|||
#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \ |
|||
VMOVQ_SI_X15(i0*8); \ |
|||
VMOVQ_SI_X11(i2*8); \ |
|||
VPINSRQ_1_SI_X15(i1*8); \ |
|||
VPINSRQ_1_SI_X11(i3*8); \ |
|||
VINSERTI128 $1, X11, Y15, Y15 |
|||
|
|||
#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \ |
|||
VMOVQ_SI_X12_0; \ |
|||
VMOVQ_SI_X11(4*8); \ |
|||
VPINSRQ_1_SI_X12(2*8); \ |
|||
VPINSRQ_1_SI_X11(6*8); \ |
|||
VINSERTI128 $1, X11, Y12, Y12; \ |
|||
LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \ |
|||
LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \ |
|||
LOAD_MSG_AVX2_Y15(9, 11, 13, 15) |
|||
|
|||
#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \ |
|||
LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \ |
|||
LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \ |
|||
VMOVQ_SI_X11(11*8); \ |
|||
VPSHUFD $0x4E, 0*8(SI), X14; \ |
|||
VPINSRQ_1_SI_X11(5*8); \ |
|||
VINSERTI128 $1, X11, Y14, Y14; \ |
|||
LOAD_MSG_AVX2_Y15(12, 2, 7, 3) |
|||
|
|||
#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \ |
|||
VMOVQ_SI_X11(5*8); \ |
|||
VMOVDQU 11*8(SI), X12; \ |
|||
VPINSRQ_1_SI_X11(15*8); \ |
|||
VINSERTI128 $1, X11, Y12, Y12; \ |
|||
VMOVQ_SI_X13(8*8); \ |
|||
VMOVQ_SI_X11(2*8); \ |
|||
VPINSRQ_1_SI_X13_0; \ |
|||
VPINSRQ_1_SI_X11(13*8); \ |
|||
VINSERTI128 $1, X11, Y13, Y13; \ |
|||
LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \ |
|||
LOAD_MSG_AVX2_Y15(14, 6, 1, 4) |
|||
|
|||
#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \ |
|||
LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \ |
|||
LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \ |
|||
LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \ |
|||
VMOVQ_SI_X15(6*8); \ |
|||
VMOVQ_SI_X11_0; \ |
|||
VPINSRQ_1_SI_X15(10*8); \ |
|||
VPINSRQ_1_SI_X11(8*8); \ |
|||
VINSERTI128 $1, X11, Y15, Y15 |
|||
|
|||
#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \ |
|||
LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \ |
|||
VMOVQ_SI_X13_0; \ |
|||
VMOVQ_SI_X11(4*8); \ |
|||
VPINSRQ_1_SI_X13(7*8); \ |
|||
VPINSRQ_1_SI_X11(15*8); \ |
|||
VINSERTI128 $1, X11, Y13, Y13; \ |
|||
LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \ |
|||
LOAD_MSG_AVX2_Y15(1, 12, 8, 13) |
|||
|
|||
#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \ |
|||
VMOVQ_SI_X12(2*8); \ |
|||
VMOVQ_SI_X11_0; \ |
|||
VPINSRQ_1_SI_X12(6*8); \ |
|||
VPINSRQ_1_SI_X11(8*8); \ |
|||
VINSERTI128 $1, X11, Y12, Y12; \ |
|||
LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \ |
|||
LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \ |
|||
LOAD_MSG_AVX2_Y15(13, 5, 14, 9) |
|||
|
|||
#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \ |
|||
LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \ |
|||
LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \ |
|||
VMOVQ_SI_X14_0; \ |
|||
VPSHUFD $0x4E, 8*8(SI), X11; \ |
|||
VPINSRQ_1_SI_X14(6*8); \ |
|||
VINSERTI128 $1, X11, Y14, Y14; \ |
|||
LOAD_MSG_AVX2_Y15(7, 3, 2, 11) |
|||
|
|||
#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \ |
|||
LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \ |
|||
LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \ |
|||
LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \ |
|||
VMOVQ_SI_X15_0; \ |
|||
VMOVQ_SI_X11(6*8); \ |
|||
VPINSRQ_1_SI_X15(4*8); \ |
|||
VPINSRQ_1_SI_X11(10*8); \ |
|||
VINSERTI128 $1, X11, Y15, Y15 |
|||
|
|||
#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \ |
|||
VMOVQ_SI_X12(6*8); \ |
|||
VMOVQ_SI_X11(11*8); \ |
|||
VPINSRQ_1_SI_X12(14*8); \ |
|||
VPINSRQ_1_SI_X11_0; \ |
|||
VINSERTI128 $1, X11, Y12, Y12; \ |
|||
LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \ |
|||
VMOVQ_SI_X11(1*8); \ |
|||
VMOVDQU 12*8(SI), X14; \ |
|||
VPINSRQ_1_SI_X11(10*8); \ |
|||
VINSERTI128 $1, X11, Y14, Y14; \ |
|||
VMOVQ_SI_X15(2*8); \ |
|||
VMOVDQU 4*8(SI), X11; \ |
|||
VPINSRQ_1_SI_X15(7*8); \ |
|||
VINSERTI128 $1, X11, Y15, Y15 |
|||
|
|||
#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \ |
|||
LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \ |
|||
VMOVQ_SI_X13(2*8); \ |
|||
VPSHUFD $0x4E, 5*8(SI), X11; \ |
|||
VPINSRQ_1_SI_X13(4*8); \ |
|||
VINSERTI128 $1, X11, Y13, Y13; \ |
|||
LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \ |
|||
VMOVQ_SI_X15(11*8); \ |
|||
VMOVQ_SI_X11(12*8); \ |
|||
VPINSRQ_1_SI_X15(14*8); \ |
|||
VPINSRQ_1_SI_X11_0; \ |
|||
VINSERTI128 $1, X11, Y15, Y15 |
|||
|
|||
// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment |
|||
MOVQ h+0(FP), AX |
|||
MOVQ c+8(FP), BX |
|||
MOVQ flag+16(FP), CX |
|||
MOVQ blocks_base+24(FP), SI |
|||
MOVQ blocks_len+32(FP), DI |
|||
|
|||
MOVQ SP, DX |
|||
ADDQ $31, DX |
|||
ANDQ $~31, DX |
|||
|
|||
MOVQ CX, 16(DX) |
|||
XORQ CX, CX |
|||
MOVQ CX, 24(DX) |
|||
|
|||
VMOVDQU ·AVX2_c40<>(SB), Y4 |
|||
VMOVDQU ·AVX2_c48<>(SB), Y5 |
|||
|
|||
VMOVDQU 0(AX), Y8 |
|||
VMOVDQU 32(AX), Y9 |
|||
VMOVDQU ·AVX2_iv0<>(SB), Y6 |
|||
VMOVDQU ·AVX2_iv1<>(SB), Y7 |
|||
|
|||
MOVQ 0(BX), R8 |
|||
MOVQ 8(BX), R9 |
|||
MOVQ R9, 8(DX) |
|||
|
|||
loop: |
|||
ADDQ $128, R8 |
|||
MOVQ R8, 0(DX) |
|||
CMPQ R8, $128 |
|||
JGE noinc |
|||
INCQ R9 |
|||
MOVQ R9, 8(DX) |
|||
|
|||
noinc: |
|||
VMOVDQA Y8, Y0 |
|||
VMOVDQA Y9, Y1 |
|||
VMOVDQA Y6, Y2 |
|||
VPXOR 0(DX), Y7, Y3 |
|||
|
|||
LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() |
|||
VMOVDQA Y12, 32(DX) |
|||
VMOVDQA Y13, 64(DX) |
|||
VMOVDQA Y14, 96(DX) |
|||
VMOVDQA Y15, 128(DX) |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() |
|||
VMOVDQA Y12, 160(DX) |
|||
VMOVDQA Y13, 192(DX) |
|||
VMOVDQA Y14, 224(DX) |
|||
VMOVDQA Y15, 256(DX) |
|||
|
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
|
|||
ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5) |
|||
ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5) |
|||
|
|||
VPXOR Y0, Y8, Y8 |
|||
VPXOR Y1, Y9, Y9 |
|||
VPXOR Y2, Y8, Y8 |
|||
VPXOR Y3, Y9, Y9 |
|||
|
|||
LEAQ 128(SI), SI |
|||
SUBQ $128, DI |
|||
JNE loop |
|||
|
|||
MOVQ R8, 0(BX) |
|||
MOVQ R9, 8(BX) |
|||
|
|||
VMOVDQU Y8, 0(AX) |
|||
VMOVDQU Y9, 32(AX) |
|||
VZEROUPPER |
|||
|
|||
RET |
|||
|
|||
#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA |
|||
#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB |
|||
#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF |
|||
#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD |
|||
#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE |
|||
|
|||
#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7 |
|||
#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF |
|||
#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7 |
|||
#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF |
|||
#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7 |
|||
#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7 |
|||
#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF |
|||
#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF |
|||
|
|||
#define SHUFFLE_AVX() \ |
|||
VMOVDQA X6, X13; \ |
|||
VMOVDQA X2, X14; \ |
|||
VMOVDQA X4, X6; \ |
|||
VPUNPCKLQDQ_X13_X13_X15; \ |
|||
VMOVDQA X5, X4; \ |
|||
VMOVDQA X6, X5; \ |
|||
VPUNPCKHQDQ_X15_X7_X6; \ |
|||
VPUNPCKLQDQ_X7_X7_X15; \ |
|||
VPUNPCKHQDQ_X15_X13_X7; \ |
|||
VPUNPCKLQDQ_X3_X3_X15; \ |
|||
VPUNPCKHQDQ_X15_X2_X2; \ |
|||
VPUNPCKLQDQ_X14_X14_X15; \ |
|||
VPUNPCKHQDQ_X15_X3_X3; \ |
|||
|
|||
#define SHUFFLE_AVX_INV() \ |
|||
VMOVDQA X2, X13; \ |
|||
VMOVDQA X4, X14; \ |
|||
VPUNPCKLQDQ_X2_X2_X15; \ |
|||
VMOVDQA X5, X4; \ |
|||
VPUNPCKHQDQ_X15_X3_X2; \ |
|||
VMOVDQA X14, X5; \ |
|||
VPUNPCKLQDQ_X3_X3_X15; \ |
|||
VMOVDQA X6, X14; \ |
|||
VPUNPCKHQDQ_X15_X13_X3; \ |
|||
VPUNPCKLQDQ_X7_X7_X15; \ |
|||
VPUNPCKHQDQ_X15_X6_X6; \ |
|||
VPUNPCKLQDQ_X14_X14_X15; \ |
|||
VPUNPCKHQDQ_X15_X7_X7; \ |
|||
|
|||
#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ |
|||
VPADDQ m0, v0, v0; \ |
|||
VPADDQ v2, v0, v0; \ |
|||
VPADDQ m1, v1, v1; \ |
|||
VPADDQ v3, v1, v1; \ |
|||
VPXOR v0, v6, v6; \ |
|||
VPXOR v1, v7, v7; \ |
|||
VPSHUFD $-79, v6, v6; \ |
|||
VPSHUFD $-79, v7, v7; \ |
|||
VPADDQ v6, v4, v4; \ |
|||
VPADDQ v7, v5, v5; \ |
|||
VPXOR v4, v2, v2; \ |
|||
VPXOR v5, v3, v3; \ |
|||
VPSHUFB c40, v2, v2; \ |
|||
VPSHUFB c40, v3, v3; \ |
|||
VPADDQ m2, v0, v0; \ |
|||
VPADDQ v2, v0, v0; \ |
|||
VPADDQ m3, v1, v1; \ |
|||
VPADDQ v3, v1, v1; \ |
|||
VPXOR v0, v6, v6; \ |
|||
VPXOR v1, v7, v7; \ |
|||
VPSHUFB c48, v6, v6; \ |
|||
VPSHUFB c48, v7, v7; \ |
|||
VPADDQ v6, v4, v4; \ |
|||
VPADDQ v7, v5, v5; \ |
|||
VPXOR v4, v2, v2; \ |
|||
VPXOR v5, v3, v3; \ |
|||
VPADDQ v2, v2, t0; \ |
|||
VPSRLQ $63, v2, v2; \ |
|||
VPXOR t0, v2, v2; \ |
|||
VPADDQ v3, v3, t0; \ |
|||
VPSRLQ $63, v3, v3; \ |
|||
VPXOR t0, v3, v3 |
|||
|
|||
// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7) |
|||
// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0 |
|||
#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \ |
|||
VMOVQ_SI_X12(i0*8); \ |
|||
VMOVQ_SI_X13(i2*8); \ |
|||
VMOVQ_SI_X14(i4*8); \ |
|||
VMOVQ_SI_X15(i6*8); \ |
|||
VPINSRQ_1_SI_X12(i1*8); \ |
|||
VPINSRQ_1_SI_X13(i3*8); \ |
|||
VPINSRQ_1_SI_X14(i5*8); \ |
|||
VPINSRQ_1_SI_X15(i7*8) |
|||
|
|||
// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7) |
|||
#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \ |
|||
VMOVQ_SI_X12_0; \ |
|||
VMOVQ_SI_X13(4*8); \ |
|||
VMOVQ_SI_X14(1*8); \ |
|||
VMOVQ_SI_X15(5*8); \ |
|||
VPINSRQ_1_SI_X12(2*8); \ |
|||
VPINSRQ_1_SI_X13(6*8); \ |
|||
VPINSRQ_1_SI_X14(3*8); \ |
|||
VPINSRQ_1_SI_X15(7*8) |
|||
|
|||
// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3) |
|||
#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \ |
|||
VPSHUFD $0x4E, 0*8(SI), X12; \ |
|||
VMOVQ_SI_X13(11*8); \ |
|||
VMOVQ_SI_X14(12*8); \ |
|||
VMOVQ_SI_X15(7*8); \ |
|||
VPINSRQ_1_SI_X13(5*8); \ |
|||
VPINSRQ_1_SI_X14(2*8); \ |
|||
VPINSRQ_1_SI_X15(3*8) |
|||
|
|||
// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13) |
|||
#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \ |
|||
VMOVDQU 11*8(SI), X12; \ |
|||
VMOVQ_SI_X13(5*8); \ |
|||
VMOVQ_SI_X14(8*8); \ |
|||
VMOVQ_SI_X15(2*8); \ |
|||
VPINSRQ_1_SI_X13(15*8); \ |
|||
VPINSRQ_1_SI_X14_0; \ |
|||
VPINSRQ_1_SI_X15(13*8) |
|||
|
|||
// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8) |
|||
#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \ |
|||
VMOVQ_SI_X12(2*8); \ |
|||
VMOVQ_SI_X13(4*8); \ |
|||
VMOVQ_SI_X14(6*8); \ |
|||
VMOVQ_SI_X15_0; \ |
|||
VPINSRQ_1_SI_X12(5*8); \ |
|||
VPINSRQ_1_SI_X13(15*8); \ |
|||
VPINSRQ_1_SI_X14(10*8); \ |
|||
VPINSRQ_1_SI_X15(8*8) |
|||
|
|||
// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15) |
|||
#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \ |
|||
VMOVQ_SI_X12(9*8); \ |
|||
VMOVQ_SI_X13(2*8); \ |
|||
VMOVQ_SI_X14_0; \ |
|||
VMOVQ_SI_X15(4*8); \ |
|||
VPINSRQ_1_SI_X12(5*8); \ |
|||
VPINSRQ_1_SI_X13(10*8); \ |
|||
VPINSRQ_1_SI_X14(7*8); \ |
|||
VPINSRQ_1_SI_X15(15*8) |
|||
|
|||
// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3) |
|||
#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \ |
|||
VMOVQ_SI_X12(2*8); \ |
|||
VMOVQ_SI_X13_0; \ |
|||
VMOVQ_SI_X14(12*8); \ |
|||
VMOVQ_SI_X15(11*8); \ |
|||
VPINSRQ_1_SI_X12(6*8); \ |
|||
VPINSRQ_1_SI_X13(8*8); \ |
|||
VPINSRQ_1_SI_X14(10*8); \ |
|||
VPINSRQ_1_SI_X15(3*8) |
|||
|
|||
// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11) |
|||
#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \ |
|||
MOVQ 0*8(SI), X12; \ |
|||
VPSHUFD $0x4E, 8*8(SI), X13; \ |
|||
MOVQ 7*8(SI), X14; \ |
|||
MOVQ 2*8(SI), X15; \ |
|||
VPINSRQ_1_SI_X12(6*8); \ |
|||
VPINSRQ_1_SI_X14(3*8); \ |
|||
VPINSRQ_1_SI_X15(11*8) |
|||
|
|||
// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8) |
|||
#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \ |
|||
MOVQ 6*8(SI), X12; \ |
|||
MOVQ 11*8(SI), X13; \ |
|||
MOVQ 15*8(SI), X14; \ |
|||
MOVQ 3*8(SI), X15; \ |
|||
VPINSRQ_1_SI_X12(14*8); \ |
|||
VPINSRQ_1_SI_X13_0; \ |
|||
VPINSRQ_1_SI_X14(9*8); \ |
|||
VPINSRQ_1_SI_X15(8*8) |
|||
|
|||
// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10) |
|||
#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \ |
|||
MOVQ 5*8(SI), X12; \ |
|||
MOVQ 8*8(SI), X13; \ |
|||
MOVQ 0*8(SI), X14; \ |
|||
MOVQ 6*8(SI), X15; \ |
|||
VPINSRQ_1_SI_X12(15*8); \ |
|||
VPINSRQ_1_SI_X13(2*8); \ |
|||
VPINSRQ_1_SI_X14(4*8); \ |
|||
VPINSRQ_1_SI_X15(10*8) |
|||
|
|||
// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5) |
|||
#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \ |
|||
VMOVDQU 12*8(SI), X12; \ |
|||
MOVQ 1*8(SI), X13; \ |
|||
MOVQ 2*8(SI), X14; \ |
|||
VPINSRQ_1_SI_X13(10*8); \ |
|||
VPINSRQ_1_SI_X14(7*8); \ |
|||
VMOVDQU 4*8(SI), X15 |
|||
|
|||
// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0) |
|||
#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \ |
|||
MOVQ 15*8(SI), X12; \ |
|||
MOVQ 3*8(SI), X13; \ |
|||
MOVQ 11*8(SI), X14; \ |
|||
MOVQ 12*8(SI), X15; \ |
|||
VPINSRQ_1_SI_X12(9*8); \ |
|||
VPINSRQ_1_SI_X13(13*8); \ |
|||
VPINSRQ_1_SI_X14(14*8); \ |
|||
VPINSRQ_1_SI_X15_0 |
|||
|
|||
// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment |
|||
MOVQ h+0(FP), AX |
|||
MOVQ c+8(FP), BX |
|||
MOVQ flag+16(FP), CX |
|||
MOVQ blocks_base+24(FP), SI |
|||
MOVQ blocks_len+32(FP), DI |
|||
|
|||
MOVQ SP, R10 |
|||
ADDQ $15, R10 |
|||
ANDQ $~15, R10 |
|||
|
|||
VMOVDQU ·AVX_c40<>(SB), X0 |
|||
VMOVDQU ·AVX_c48<>(SB), X1 |
|||
VMOVDQA X0, X8 |
|||
VMOVDQA X1, X9 |
|||
|
|||
VMOVDQU ·AVX_iv3<>(SB), X0 |
|||
VMOVDQA X0, 0(R10) |
|||
XORQ CX, 0(R10) // 0(R10) = ·AVX_iv3 ^ (CX || 0) |
|||
|
|||
VMOVDQU 0(AX), X10 |
|||
VMOVDQU 16(AX), X11 |
|||
VMOVDQU 32(AX), X2 |
|||
VMOVDQU 48(AX), X3 |
|||
|
|||
MOVQ 0(BX), R8 |
|||
MOVQ 8(BX), R9 |
|||
|
|||
loop: |
|||
ADDQ $128, R8 |
|||
CMPQ R8, $128 |
|||
JGE noinc |
|||
INCQ R9 |
|||
|
|||
noinc: |
|||
VMOVQ_R8_X15 |
|||
VPINSRQ_1_R9_X15 |
|||
|
|||
VMOVDQA X10, X0 |
|||
VMOVDQA X11, X1 |
|||
VMOVDQU ·AVX_iv0<>(SB), X4 |
|||
VMOVDQU ·AVX_iv1<>(SB), X5 |
|||
VMOVDQU ·AVX_iv2<>(SB), X6 |
|||
|
|||
VPXOR X15, X6, X6 |
|||
VMOVDQA 0(R10), X7 |
|||
|
|||
LOAD_MSG_AVX_0_2_4_6_1_3_5_7() |
|||
VMOVDQA X12, 16(R10) |
|||
VMOVDQA X13, 32(R10) |
|||
VMOVDQA X14, 48(R10) |
|||
VMOVDQA X15, 64(R10) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15) |
|||
VMOVDQA X12, 80(R10) |
|||
VMOVDQA X13, 96(R10) |
|||
VMOVDQA X14, 112(R10) |
|||
VMOVDQA X15, 128(R10) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6) |
|||
VMOVDQA X12, 144(R10) |
|||
VMOVDQA X13, 160(R10) |
|||
VMOVDQA X14, 176(R10) |
|||
VMOVDQA X15, 192(R10) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_1_0_11_5_12_2_7_3() |
|||
VMOVDQA X12, 208(R10) |
|||
VMOVDQA X13, 224(R10) |
|||
VMOVDQA X14, 240(R10) |
|||
VMOVDQA X15, 256(R10) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX_11_12_5_15_8_0_2_13() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_2_5_4_15_6_10_0_8() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX_9_5_2_10_0_7_4_15() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX_2_6_0_8_12_10_11_3() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_0_6_9_8_7_3_2_11() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_5_15_8_2_0_4_6_10() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX_6_14_11_0_15_9_3_8() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_12_13_1_10_2_7_4_5() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_15_9_3_13_11_14_12_0() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
VMOVDQU 32(AX), X14 |
|||
VMOVDQU 48(AX), X15 |
|||
VPXOR X0, X10, X10 |
|||
VPXOR X1, X11, X11 |
|||
VPXOR X2, X14, X14 |
|||
VPXOR X3, X15, X15 |
|||
VPXOR X4, X10, X10 |
|||
VPXOR X5, X11, X11 |
|||
VPXOR X6, X14, X2 |
|||
VPXOR X7, X15, X3 |
|||
VMOVDQU X2, 32(AX) |
|||
VMOVDQU X3, 48(AX) |
|||
|
|||
LEAQ 128(SI), SI |
|||
SUBQ $128, DI |
|||
JNE loop |
|||
|
|||
VMOVDQU X10, 0(AX) |
|||
VMOVDQU X11, 16(AX) |
|||
|
|||
MOVQ R8, 0(BX) |
|||
MOVQ R9, 8(BX) |
|||
VZEROUPPER |
|||
|
|||
RET |
@ -0,0 +1,278 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build amd64 && gc && !purego |
|||
|
|||
#include "textflag.h" |
|||
|
|||
DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 |
|||
DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b |
|||
GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b |
|||
DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 |
|||
GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1 |
|||
DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f |
|||
GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b |
|||
DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 |
|||
GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 |
|||
DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b |
|||
GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 |
|||
DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a |
|||
GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ |
|||
MOVO v4, t1; \ |
|||
MOVO v5, v4; \ |
|||
MOVO t1, v5; \ |
|||
MOVO v6, t1; \ |
|||
PUNPCKLQDQ v6, t2; \ |
|||
PUNPCKHQDQ v7, v6; \ |
|||
PUNPCKHQDQ t2, v6; \ |
|||
PUNPCKLQDQ v7, t2; \ |
|||
MOVO t1, v7; \ |
|||
MOVO v2, t1; \ |
|||
PUNPCKHQDQ t2, v7; \ |
|||
PUNPCKLQDQ v3, t2; \ |
|||
PUNPCKHQDQ t2, v2; \ |
|||
PUNPCKLQDQ t1, t2; \ |
|||
PUNPCKHQDQ t2, v3 |
|||
|
|||
#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ |
|||
MOVO v4, t1; \ |
|||
MOVO v5, v4; \ |
|||
MOVO t1, v5; \ |
|||
MOVO v2, t1; \ |
|||
PUNPCKLQDQ v2, t2; \ |
|||
PUNPCKHQDQ v3, v2; \ |
|||
PUNPCKHQDQ t2, v2; \ |
|||
PUNPCKLQDQ v3, t2; \ |
|||
MOVO t1, v3; \ |
|||
MOVO v6, t1; \ |
|||
PUNPCKHQDQ t2, v3; \ |
|||
PUNPCKLQDQ v7, t2; \ |
|||
PUNPCKHQDQ t2, v6; \ |
|||
PUNPCKLQDQ t1, t2; \ |
|||
PUNPCKHQDQ t2, v7 |
|||
|
|||
#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ |
|||
PADDQ m0, v0; \ |
|||
PADDQ m1, v1; \ |
|||
PADDQ v2, v0; \ |
|||
PADDQ v3, v1; \ |
|||
PXOR v0, v6; \ |
|||
PXOR v1, v7; \ |
|||
PSHUFD $0xB1, v6, v6; \ |
|||
PSHUFD $0xB1, v7, v7; \ |
|||
PADDQ v6, v4; \ |
|||
PADDQ v7, v5; \ |
|||
PXOR v4, v2; \ |
|||
PXOR v5, v3; \ |
|||
PSHUFB c40, v2; \ |
|||
PSHUFB c40, v3; \ |
|||
PADDQ m2, v0; \ |
|||
PADDQ m3, v1; \ |
|||
PADDQ v2, v0; \ |
|||
PADDQ v3, v1; \ |
|||
PXOR v0, v6; \ |
|||
PXOR v1, v7; \ |
|||
PSHUFB c48, v6; \ |
|||
PSHUFB c48, v7; \ |
|||
PADDQ v6, v4; \ |
|||
PADDQ v7, v5; \ |
|||
PXOR v4, v2; \ |
|||
PXOR v5, v3; \ |
|||
MOVOU v2, t0; \ |
|||
PADDQ v2, t0; \ |
|||
PSRLQ $63, v2; \ |
|||
PXOR t0, v2; \ |
|||
MOVOU v3, t0; \ |
|||
PADDQ v3, t0; \ |
|||
PSRLQ $63, v3; \ |
|||
PXOR t0, v3 |
|||
|
|||
#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \ |
|||
MOVQ i0*8(src), m0; \ |
|||
PINSRQ $1, i1*8(src), m0; \ |
|||
MOVQ i2*8(src), m1; \ |
|||
PINSRQ $1, i3*8(src), m1; \ |
|||
MOVQ i4*8(src), m2; \ |
|||
PINSRQ $1, i5*8(src), m2; \ |
|||
MOVQ i6*8(src), m3; \ |
|||
PINSRQ $1, i7*8(src), m3 |
|||
|
|||
// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment |
|||
MOVQ h+0(FP), AX |
|||
MOVQ c+8(FP), BX |
|||
MOVQ flag+16(FP), CX |
|||
MOVQ blocks_base+24(FP), SI |
|||
MOVQ blocks_len+32(FP), DI |
|||
|
|||
MOVQ SP, R10 |
|||
ADDQ $15, R10 |
|||
ANDQ $~15, R10 |
|||
|
|||
MOVOU ·iv3<>(SB), X0 |
|||
MOVO X0, 0(R10) |
|||
XORQ CX, 0(R10) // 0(R10) = ·iv3 ^ (CX || 0) |
|||
|
|||
MOVOU ·c40<>(SB), X13 |
|||
MOVOU ·c48<>(SB), X14 |
|||
|
|||
MOVOU 0(AX), X12 |
|||
MOVOU 16(AX), X15 |
|||
|
|||
MOVQ 0(BX), R8 |
|||
MOVQ 8(BX), R9 |
|||
|
|||
loop: |
|||
ADDQ $128, R8 |
|||
CMPQ R8, $128 |
|||
JGE noinc |
|||
INCQ R9 |
|||
|
|||
noinc: |
|||
MOVQ R8, X8 |
|||
PINSRQ $1, R9, X8 |
|||
|
|||
MOVO X12, X0 |
|||
MOVO X15, X1 |
|||
MOVOU 32(AX), X2 |
|||
MOVOU 48(AX), X3 |
|||
MOVOU ·iv0<>(SB), X4 |
|||
MOVOU ·iv1<>(SB), X5 |
|||
MOVOU ·iv2<>(SB), X6 |
|||
|
|||
PXOR X8, X6 |
|||
MOVO 0(R10), X7 |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7) |
|||
MOVO X8, 16(R10) |
|||
MOVO X9, 32(R10) |
|||
MOVO X10, 48(R10) |
|||
MOVO X11, 64(R10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15) |
|||
MOVO X8, 80(R10) |
|||
MOVO X9, 96(R10) |
|||
MOVO X10, 112(R10) |
|||
MOVO X11, 128(R10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6) |
|||
MOVO X8, 144(R10) |
|||
MOVO X9, 160(R10) |
|||
MOVO X10, 176(R10) |
|||
MOVO X11, 192(R10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3) |
|||
MOVO X8, 208(R10) |
|||
MOVO X9, 224(R10) |
|||
MOVO X10, 240(R10) |
|||
MOVO X11, 256(R10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
MOVOU 32(AX), X10 |
|||
MOVOU 48(AX), X11 |
|||
PXOR X0, X12 |
|||
PXOR X1, X15 |
|||
PXOR X2, X10 |
|||
PXOR X3, X11 |
|||
PXOR X4, X12 |
|||
PXOR X5, X15 |
|||
PXOR X6, X10 |
|||
PXOR X7, X11 |
|||
MOVOU X10, 32(AX) |
|||
MOVOU X11, 48(AX) |
|||
|
|||
LEAQ 128(SI), SI |
|||
SUBQ $128, DI |
|||
JNE loop |
|||
|
|||
MOVOU X12, 0(AX) |
|||
MOVOU X15, 16(AX) |
|||
|
|||
MOVQ R8, 0(BX) |
|||
MOVQ R9, 8(BX) |
|||
|
|||
RET |
@ -0,0 +1,182 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package blake2b |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"math/bits" |
|||
) |
|||
|
|||
// the precomputed values for BLAKE2b
|
|||
// there are 12 16-byte arrays - one for each round
|
|||
// the entries are calculated from the sigma constants.
|
|||
var precomputed = [12][16]byte{ |
|||
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, |
|||
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, |
|||
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4}, |
|||
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8}, |
|||
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13}, |
|||
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9}, |
|||
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11}, |
|||
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10}, |
|||
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5}, |
|||
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0}, |
|||
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, // equal to the first
|
|||
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, // equal to the second
|
|||
} |
|||
|
|||
func hashBlocksGeneric(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { |
|||
var m [16]uint64 |
|||
c0, c1 := c[0], c[1] |
|||
|
|||
for i := 0; i < len(blocks); { |
|||
c0 += BlockSize |
|||
if c0 < BlockSize { |
|||
c1++ |
|||
} |
|||
|
|||
v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7] |
|||
v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7] |
|||
v12 ^= c0 |
|||
v13 ^= c1 |
|||
v14 ^= flag |
|||
|
|||
for j := range m { |
|||
m[j] = binary.LittleEndian.Uint64(blocks[i:]) |
|||
i += 8 |
|||
} |
|||
|
|||
for j := range precomputed { |
|||
s := &(precomputed[j]) |
|||
|
|||
v0 += m[s[0]] |
|||
v0 += v4 |
|||
v12 ^= v0 |
|||
v12 = bits.RotateLeft64(v12, -32) |
|||
v8 += v12 |
|||
v4 ^= v8 |
|||
v4 = bits.RotateLeft64(v4, -24) |
|||
v1 += m[s[1]] |
|||
v1 += v5 |
|||
v13 ^= v1 |
|||
v13 = bits.RotateLeft64(v13, -32) |
|||
v9 += v13 |
|||
v5 ^= v9 |
|||
v5 = bits.RotateLeft64(v5, -24) |
|||
v2 += m[s[2]] |
|||
v2 += v6 |
|||
v14 ^= v2 |
|||
v14 = bits.RotateLeft64(v14, -32) |
|||
v10 += v14 |
|||
v6 ^= v10 |
|||
v6 = bits.RotateLeft64(v6, -24) |
|||
v3 += m[s[3]] |
|||
v3 += v7 |
|||
v15 ^= v3 |
|||
v15 = bits.RotateLeft64(v15, -32) |
|||
v11 += v15 |
|||
v7 ^= v11 |
|||
v7 = bits.RotateLeft64(v7, -24) |
|||
|
|||
v0 += m[s[4]] |
|||
v0 += v4 |
|||
v12 ^= v0 |
|||
v12 = bits.RotateLeft64(v12, -16) |
|||
v8 += v12 |
|||
v4 ^= v8 |
|||
v4 = bits.RotateLeft64(v4, -63) |
|||
v1 += m[s[5]] |
|||
v1 += v5 |
|||
v13 ^= v1 |
|||
v13 = bits.RotateLeft64(v13, -16) |
|||
v9 += v13 |
|||
v5 ^= v9 |
|||
v5 = bits.RotateLeft64(v5, -63) |
|||
v2 += m[s[6]] |
|||
v2 += v6 |
|||
v14 ^= v2 |
|||
v14 = bits.RotateLeft64(v14, -16) |
|||
v10 += v14 |
|||
v6 ^= v10 |
|||
v6 = bits.RotateLeft64(v6, -63) |
|||
v3 += m[s[7]] |
|||
v3 += v7 |
|||
v15 ^= v3 |
|||
v15 = bits.RotateLeft64(v15, -16) |
|||
v11 += v15 |
|||
v7 ^= v11 |
|||
v7 = bits.RotateLeft64(v7, -63) |
|||
|
|||
v0 += m[s[8]] |
|||
v0 += v5 |
|||
v15 ^= v0 |
|||
v15 = bits.RotateLeft64(v15, -32) |
|||
v10 += v15 |
|||
v5 ^= v10 |
|||
v5 = bits.RotateLeft64(v5, -24) |
|||
v1 += m[s[9]] |
|||
v1 += v6 |
|||
v12 ^= v1 |
|||
v12 = bits.RotateLeft64(v12, -32) |
|||
v11 += v12 |
|||
v6 ^= v11 |
|||
v6 = bits.RotateLeft64(v6, -24) |
|||
v2 += m[s[10]] |
|||
v2 += v7 |
|||
v13 ^= v2 |
|||
v13 = bits.RotateLeft64(v13, -32) |
|||
v8 += v13 |
|||
v7 ^= v8 |
|||
v7 = bits.RotateLeft64(v7, -24) |
|||
v3 += m[s[11]] |
|||
v3 += v4 |
|||
v14 ^= v3 |
|||
v14 = bits.RotateLeft64(v14, -32) |
|||
v9 += v14 |
|||
v4 ^= v9 |
|||
v4 = bits.RotateLeft64(v4, -24) |
|||
|
|||
v0 += m[s[12]] |
|||
v0 += v5 |
|||
v15 ^= v0 |
|||
v15 = bits.RotateLeft64(v15, -16) |
|||
v10 += v15 |
|||
v5 ^= v10 |
|||
v5 = bits.RotateLeft64(v5, -63) |
|||
v1 += m[s[13]] |
|||
v1 += v6 |
|||
v12 ^= v1 |
|||
v12 = bits.RotateLeft64(v12, -16) |
|||
v11 += v12 |
|||
v6 ^= v11 |
|||
v6 = bits.RotateLeft64(v6, -63) |
|||
v2 += m[s[14]] |
|||
v2 += v7 |
|||
v13 ^= v2 |
|||
v13 = bits.RotateLeft64(v13, -16) |
|||
v8 += v13 |
|||
v7 ^= v8 |
|||
v7 = bits.RotateLeft64(v7, -63) |
|||
v3 += m[s[15]] |
|||
v3 += v4 |
|||
v14 ^= v3 |
|||
v14 = bits.RotateLeft64(v14, -16) |
|||
v9 += v14 |
|||
v4 ^= v9 |
|||
v4 = bits.RotateLeft64(v4, -63) |
|||
|
|||
} |
|||
|
|||
h[0] ^= v0 ^ v8 |
|||
h[1] ^= v1 ^ v9 |
|||
h[2] ^= v2 ^ v10 |
|||
h[3] ^= v3 ^ v11 |
|||
h[4] ^= v4 ^ v12 |
|||
h[5] ^= v5 ^ v13 |
|||
h[6] ^= v6 ^ v14 |
|||
h[7] ^= v7 ^ v15 |
|||
} |
|||
c[0], c[1] = c0, c1 |
|||
} |
@ -0,0 +1,11 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !amd64 || purego || !gc
|
|||
|
|||
package blake2b |
|||
|
|||
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { |
|||
hashBlocksGeneric(h, c, flag, blocks) |
|||
} |
@ -0,0 +1,177 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package blake2b |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"errors" |
|||
"io" |
|||
) |
|||
|
|||
// XOF defines the interface to hash functions that
|
|||
// support arbitrary-length output.
|
|||
type XOF interface { |
|||
// Write absorbs more data into the hash's state. It panics if called
|
|||
// after Read.
|
|||
io.Writer |
|||
|
|||
// Read reads more output from the hash. It returns io.EOF if the limit
|
|||
// has been reached.
|
|||
io.Reader |
|||
|
|||
// Clone returns a copy of the XOF in its current state.
|
|||
Clone() XOF |
|||
|
|||
// Reset resets the XOF to its initial state.
|
|||
Reset() |
|||
} |
|||
|
|||
// OutputLengthUnknown can be used as the size argument to NewXOF to indicate
|
|||
// the length of the output is not known in advance.
|
|||
const OutputLengthUnknown = 0 |
|||
|
|||
// magicUnknownOutputLength is a magic value for the output size that indicates
|
|||
// an unknown number of output bytes.
|
|||
const magicUnknownOutputLength = (1 << 32) - 1 |
|||
|
|||
// maxOutputLength is the absolute maximum number of bytes to produce when the
|
|||
// number of output bytes is unknown.
|
|||
const maxOutputLength = (1 << 32) * 64 |
|||
|
|||
// NewXOF creates a new variable-output-length hash. The hash either produce a
|
|||
// known number of bytes (1 <= size < 2**32-1), or an unknown number of bytes
|
|||
// (size == OutputLengthUnknown). In the latter case, an absolute limit of
|
|||
// 256GiB applies.
|
|||
//
|
|||
// A non-nil key turns the hash into a MAC. The key must between
|
|||
// zero and 32 bytes long.
|
|||
func NewXOF(size uint32, key []byte) (XOF, error) { |
|||
if len(key) > Size { |
|||
return nil, errKeySize |
|||
} |
|||
if size == magicUnknownOutputLength { |
|||
// 2^32-1 indicates an unknown number of bytes and thus isn't a
|
|||
// valid length.
|
|||
return nil, errors.New("blake2b: XOF length too large") |
|||
} |
|||
if size == OutputLengthUnknown { |
|||
size = magicUnknownOutputLength |
|||
} |
|||
x := &xof{ |
|||
d: digest{ |
|||
size: Size, |
|||
keyLen: len(key), |
|||
}, |
|||
length: size, |
|||
} |
|||
copy(x.d.key[:], key) |
|||
x.Reset() |
|||
return x, nil |
|||
} |
|||
|
|||
type xof struct { |
|||
d digest |
|||
length uint32 |
|||
remaining uint64 |
|||
cfg, root, block [Size]byte |
|||
offset int |
|||
nodeOffset uint32 |
|||
readMode bool |
|||
} |
|||
|
|||
func (x *xof) Write(p []byte) (n int, err error) { |
|||
if x.readMode { |
|||
panic("blake2b: write to XOF after read") |
|||
} |
|||
return x.d.Write(p) |
|||
} |
|||
|
|||
func (x *xof) Clone() XOF { |
|||
clone := *x |
|||
return &clone |
|||
} |
|||
|
|||
func (x *xof) Reset() { |
|||
x.cfg[0] = byte(Size) |
|||
binary.LittleEndian.PutUint32(x.cfg[4:], uint32(Size)) // leaf length
|
|||
binary.LittleEndian.PutUint32(x.cfg[12:], x.length) // XOF length
|
|||
x.cfg[17] = byte(Size) // inner hash size
|
|||
|
|||
x.d.Reset() |
|||
x.d.h[1] ^= uint64(x.length) << 32 |
|||
|
|||
x.remaining = uint64(x.length) |
|||
if x.remaining == magicUnknownOutputLength { |
|||
x.remaining = maxOutputLength |
|||
} |
|||
x.offset, x.nodeOffset = 0, 0 |
|||
x.readMode = false |
|||
} |
|||
|
|||
func (x *xof) Read(p []byte) (n int, err error) { |
|||
if !x.readMode { |
|||
x.d.finalize(&x.root) |
|||
x.readMode = true |
|||
} |
|||
|
|||
if x.remaining == 0 { |
|||
return 0, io.EOF |
|||
} |
|||
|
|||
n = len(p) |
|||
if uint64(n) > x.remaining { |
|||
n = int(x.remaining) |
|||
p = p[:n] |
|||
} |
|||
|
|||
if x.offset > 0 { |
|||
blockRemaining := Size - x.offset |
|||
if n < blockRemaining { |
|||
x.offset += copy(p, x.block[x.offset:]) |
|||
x.remaining -= uint64(n) |
|||
return |
|||
} |
|||
copy(p, x.block[x.offset:]) |
|||
p = p[blockRemaining:] |
|||
x.offset = 0 |
|||
x.remaining -= uint64(blockRemaining) |
|||
} |
|||
|
|||
for len(p) >= Size { |
|||
binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset) |
|||
x.nodeOffset++ |
|||
|
|||
x.d.initConfig(&x.cfg) |
|||
x.d.Write(x.root[:]) |
|||
x.d.finalize(&x.block) |
|||
|
|||
copy(p, x.block[:]) |
|||
p = p[Size:] |
|||
x.remaining -= uint64(Size) |
|||
} |
|||
|
|||
if todo := len(p); todo > 0 { |
|||
if x.remaining < uint64(Size) { |
|||
x.cfg[0] = byte(x.remaining) |
|||
} |
|||
binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset) |
|||
x.nodeOffset++ |
|||
|
|||
x.d.initConfig(&x.cfg) |
|||
x.d.Write(x.root[:]) |
|||
x.d.finalize(&x.block) |
|||
|
|||
x.offset = copy(p, x.block[:todo]) |
|||
x.remaining -= uint64(todo) |
|||
} |
|||
return |
|||
} |
|||
|
|||
func (d *digest) initConfig(cfg *[Size]byte) { |
|||
d.offset, d.c[0], d.c[1] = 0, 0, 0 |
|||
for i := range d.h { |
|||
d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(cfg[i*8:]) |
|||
} |
|||
} |
@ -0,0 +1,30 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package blake2b |
|||
|
|||
import ( |
|||
"crypto" |
|||
"hash" |
|||
) |
|||
|
|||
func init() { |
|||
newHash256 := func() hash.Hash { |
|||
h, _ := New256(nil) |
|||
return h |
|||
} |
|||
newHash384 := func() hash.Hash { |
|||
h, _ := New384(nil) |
|||
return h |
|||
} |
|||
|
|||
newHash512 := func() hash.Hash { |
|||
h, _ := New512(nil) |
|||
return h |
|||
} |
|||
|
|||
crypto.RegisterHash(crypto.BLAKE2b_256, newHash256) |
|||
crypto.RegisterHash(crypto.BLAKE2b_384, newHash384) |
|||
crypto.RegisterHash(crypto.BLAKE2b_512, newHash512) |
|||
} |
@ -0,0 +1,119 @@ |
|||
// Copyright 2023 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package http2 |
|||
|
|||
import ( |
|||
"fmt" |
|||
"math" |
|||
) |
|||
|
|||
type roundRobinWriteScheduler struct { |
|||
// control contains control frames (SETTINGS, PING, etc.).
|
|||
control writeQueue |
|||
|
|||
// streams maps stream ID to a queue.
|
|||
streams map[uint32]*writeQueue |
|||
|
|||
// stream queues are stored in a circular linked list.
|
|||
// head is the next stream to write, or nil if there are no streams open.
|
|||
head *writeQueue |
|||
|
|||
// pool of empty queues for reuse.
|
|||
queuePool writeQueuePool |
|||
} |
|||
|
|||
// newRoundRobinWriteScheduler constructs a new write scheduler.
|
|||
// The round robin scheduler priorizes control frames
|
|||
// like SETTINGS and PING over DATA frames.
|
|||
// When there are no control frames to send, it performs a round-robin
|
|||
// selection from the ready streams.
|
|||
func newRoundRobinWriteScheduler() WriteScheduler { |
|||
ws := &roundRobinWriteScheduler{ |
|||
streams: make(map[uint32]*writeQueue), |
|||
} |
|||
return ws |
|||
} |
|||
|
|||
func (ws *roundRobinWriteScheduler) OpenStream(streamID uint32, options OpenStreamOptions) { |
|||
if ws.streams[streamID] != nil { |
|||
panic(fmt.Errorf("stream %d already opened", streamID)) |
|||
} |
|||
q := ws.queuePool.get() |
|||
ws.streams[streamID] = q |
|||
if ws.head == nil { |
|||
ws.head = q |
|||
q.next = q |
|||
q.prev = q |
|||
} else { |
|||
// Queues are stored in a ring.
|
|||
// Insert the new stream before ws.head, putting it at the end of the list.
|
|||
q.prev = ws.head.prev |
|||
q.next = ws.head |
|||
q.prev.next = q |
|||
q.next.prev = q |
|||
} |
|||
} |
|||
|
|||
func (ws *roundRobinWriteScheduler) CloseStream(streamID uint32) { |
|||
q := ws.streams[streamID] |
|||
if q == nil { |
|||
return |
|||
} |
|||
if q.next == q { |
|||
// This was the only open stream.
|
|||
ws.head = nil |
|||
} else { |
|||
q.prev.next = q.next |
|||
q.next.prev = q.prev |
|||
if ws.head == q { |
|||
ws.head = q.next |
|||
} |
|||
} |
|||
delete(ws.streams, streamID) |
|||
ws.queuePool.put(q) |
|||
} |
|||
|
|||
func (ws *roundRobinWriteScheduler) AdjustStream(streamID uint32, priority PriorityParam) {} |
|||
|
|||
func (ws *roundRobinWriteScheduler) Push(wr FrameWriteRequest) { |
|||
if wr.isControl() { |
|||
ws.control.push(wr) |
|||
return |
|||
} |
|||
q := ws.streams[wr.StreamID()] |
|||
if q == nil { |
|||
// This is a closed stream.
|
|||
// wr should not be a HEADERS or DATA frame.
|
|||
// We push the request onto the control queue.
|
|||
if wr.DataSize() > 0 { |
|||
panic("add DATA on non-open stream") |
|||
} |
|||
ws.control.push(wr) |
|||
return |
|||
} |
|||
q.push(wr) |
|||
} |
|||
|
|||
func (ws *roundRobinWriteScheduler) Pop() (FrameWriteRequest, bool) { |
|||
// Control and RST_STREAM frames first.
|
|||
if !ws.control.empty() { |
|||
return ws.control.shift(), true |
|||
} |
|||
if ws.head == nil { |
|||
return FrameWriteRequest{}, false |
|||
} |
|||
q := ws.head |
|||
for { |
|||
if wr, ok := q.consume(math.MaxInt32); ok { |
|||
ws.head = q.next |
|||
return wr, true |
|||
} |
|||
q = q.next |
|||
if q == ws.head { |
|||
break |
|||
} |
|||
} |
|||
return FrameWriteRequest{}, false |
|||
} |
File diff suppressed because it is too large
@ -0,0 +1,30 @@ |
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
|||
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !go1.16
|
|||
|
|||
package idna |
|||
|
|||
// appendMapping appends the mapping for the respective rune. isMapped must be
|
|||
// true. A mapping is a categorization of a rune as defined in UTS #46.
|
|||
func (c info) appendMapping(b []byte, s string) []byte { |
|||
index := int(c >> indexShift) |
|||
if c&xorBit == 0 { |
|||
s := mappings[index:] |
|||
return append(b, s[1:s[0]+1]...) |
|||
} |
|||
b = append(b, s...) |
|||
if c&inlineXOR == inlineXOR { |
|||
// TODO: support and handle two-byte inline masks
|
|||
b[len(b)-1] ^= byte(index) |
|||
} else { |
|||
for p := len(b) - int(xorData[index]); p < len(b); p++ { |
|||
index++ |
|||
b[p] ^= xorData[index] |
|||
} |
|||
} |
|||
return b |
|||
} |
@ -0,0 +1,30 @@ |
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
|||
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build go1.16
|
|||
|
|||
package idna |
|||
|
|||
// appendMapping appends the mapping for the respective rune. isMapped must be
|
|||
// true. A mapping is a categorization of a rune as defined in UTS #46.
|
|||
func (c info) appendMapping(b []byte, s string) []byte { |
|||
index := int(c >> indexShift) |
|||
if c&xorBit == 0 { |
|||
p := index |
|||
return append(b, mappings[mappingIndex[p]:mappingIndex[p+1]]...) |
|||
} |
|||
b = append(b, s...) |
|||
if c&inlineXOR == inlineXOR { |
|||
// TODO: support and handle two-byte inline masks
|
|||
b[len(b)-1] ^= byte(index) |
|||
} else { |
|||
for p := len(b) - int(xorData[index]); p < len(b); p++ { |
|||
index++ |
|||
b[p] ^= xorData[index] |
|||
} |
|||
} |
|||
return b |
|||
} |
@ -0,0 +1,17 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build gc |
|||
|
|||
#include "textflag.h" |
|||
|
|||
// |
|||
// System calls for ppc64, AIX are implemented in runtime/syscall_aix.go |
|||
// |
|||
|
|||
TEXT ·syscall6(SB),NOSPLIT,$0-88 |
|||
JMP syscall·syscall6(SB) |
|||
|
|||
TEXT ·rawSyscall6(SB),NOSPLIT,$0-88 |
|||
JMP syscall·rawSyscall6(SB) |
@ -0,0 +1,66 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
import ( |
|||
"runtime" |
|||
) |
|||
|
|||
// byteOrder is a subset of encoding/binary.ByteOrder.
|
|||
type byteOrder interface { |
|||
Uint32([]byte) uint32 |
|||
Uint64([]byte) uint64 |
|||
} |
|||
|
|||
type littleEndian struct{} |
|||
type bigEndian struct{} |
|||
|
|||
func (littleEndian) Uint32(b []byte) uint32 { |
|||
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
|
|||
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 |
|||
} |
|||
|
|||
func (littleEndian) Uint64(b []byte) uint64 { |
|||
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
|
|||
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | |
|||
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 |
|||
} |
|||
|
|||
func (bigEndian) Uint32(b []byte) uint32 { |
|||
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
|
|||
return uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24 |
|||
} |
|||
|
|||
func (bigEndian) Uint64(b []byte) uint64 { |
|||
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
|
|||
return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | |
|||
uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56 |
|||
} |
|||
|
|||
// hostByteOrder returns littleEndian on little-endian machines and
|
|||
// bigEndian on big-endian machines.
|
|||
func hostByteOrder() byteOrder { |
|||
switch runtime.GOARCH { |
|||
case "386", "amd64", "amd64p32", |
|||
"alpha", |
|||
"arm", "arm64", |
|||
"loong64", |
|||
"mipsle", "mips64le", "mips64p32le", |
|||
"nios2", |
|||
"ppc64le", |
|||
"riscv", "riscv64", |
|||
"sh": |
|||
return littleEndian{} |
|||
case "armbe", "arm64be", |
|||
"m68k", |
|||
"mips", "mips64", "mips64p32", |
|||
"ppc", "ppc64", |
|||
"s390", "s390x", |
|||
"shbe", |
|||
"sparc", "sparc64": |
|||
return bigEndian{} |
|||
} |
|||
panic("unknown architecture") |
|||
} |
@ -0,0 +1,290 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
// Package cpu implements processor feature detection for
|
|||
// various CPU architectures.
|
|||
package cpu |
|||
|
|||
import ( |
|||
"os" |
|||
"strings" |
|||
) |
|||
|
|||
// Initialized reports whether the CPU features were initialized.
|
|||
//
|
|||
// For some GOOS/GOARCH combinations initialization of the CPU features depends
|
|||
// on reading an operating specific file, e.g. /proc/self/auxv on linux/arm
|
|||
// Initialized will report false if reading the file fails.
|
|||
var Initialized bool |
|||
|
|||
// CacheLinePad is used to pad structs to avoid false sharing.
|
|||
type CacheLinePad struct{ _ [cacheLineSize]byte } |
|||
|
|||
// X86 contains the supported CPU features of the
|
|||
// current X86/AMD64 platform. If the current platform
|
|||
// is not X86/AMD64 then all feature flags are false.
|
|||
//
|
|||
// X86 is padded to avoid false sharing. Further the HasAVX
|
|||
// and HasAVX2 are only set if the OS supports XMM and YMM
|
|||
// registers in addition to the CPUID feature bit being set.
|
|||
var X86 struct { |
|||
_ CacheLinePad |
|||
HasAES bool // AES hardware implementation (AES NI)
|
|||
HasADX bool // Multi-precision add-carry instruction extensions
|
|||
HasAVX bool // Advanced vector extension
|
|||
HasAVX2 bool // Advanced vector extension 2
|
|||
HasAVX512 bool // Advanced vector extension 512
|
|||
HasAVX512F bool // Advanced vector extension 512 Foundation Instructions
|
|||
HasAVX512CD bool // Advanced vector extension 512 Conflict Detection Instructions
|
|||
HasAVX512ER bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
|
|||
HasAVX512PF bool // Advanced vector extension 512 Prefetch Instructions
|
|||
HasAVX512VL bool // Advanced vector extension 512 Vector Length Extensions
|
|||
HasAVX512BW bool // Advanced vector extension 512 Byte and Word Instructions
|
|||
HasAVX512DQ bool // Advanced vector extension 512 Doubleword and Quadword Instructions
|
|||
HasAVX512IFMA bool // Advanced vector extension 512 Integer Fused Multiply Add
|
|||
HasAVX512VBMI bool // Advanced vector extension 512 Vector Byte Manipulation Instructions
|
|||
HasAVX5124VNNIW bool // Advanced vector extension 512 Vector Neural Network Instructions Word variable precision
|
|||
HasAVX5124FMAPS bool // Advanced vector extension 512 Fused Multiply Accumulation Packed Single precision
|
|||
HasAVX512VPOPCNTDQ bool // Advanced vector extension 512 Double and quad word population count instructions
|
|||
HasAVX512VPCLMULQDQ bool // Advanced vector extension 512 Vector carry-less multiply operations
|
|||
HasAVX512VNNI bool // Advanced vector extension 512 Vector Neural Network Instructions
|
|||
HasAVX512GFNI bool // Advanced vector extension 512 Galois field New Instructions
|
|||
HasAVX512VAES bool // Advanced vector extension 512 Vector AES instructions
|
|||
HasAVX512VBMI2 bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
|
|||
HasAVX512BITALG bool // Advanced vector extension 512 Bit Algorithms
|
|||
HasAVX512BF16 bool // Advanced vector extension 512 BFloat16 Instructions
|
|||
HasAMXTile bool // Advanced Matrix Extension Tile instructions
|
|||
HasAMXInt8 bool // Advanced Matrix Extension Int8 instructions
|
|||
HasAMXBF16 bool // Advanced Matrix Extension BFloat16 instructions
|
|||
HasBMI1 bool // Bit manipulation instruction set 1
|
|||
HasBMI2 bool // Bit manipulation instruction set 2
|
|||
HasCX16 bool // Compare and exchange 16 Bytes
|
|||
HasERMS bool // Enhanced REP for MOVSB and STOSB
|
|||
HasFMA bool // Fused-multiply-add instructions
|
|||
HasOSXSAVE bool // OS supports XSAVE/XRESTOR for saving/restoring XMM registers.
|
|||
HasPCLMULQDQ bool // PCLMULQDQ instruction - most often used for AES-GCM
|
|||
HasPOPCNT bool // Hamming weight instruction POPCNT.
|
|||
HasRDRAND bool // RDRAND instruction (on-chip random number generator)
|
|||
HasRDSEED bool // RDSEED instruction (on-chip random number generator)
|
|||
HasSSE2 bool // Streaming SIMD extension 2 (always available on amd64)
|
|||
HasSSE3 bool // Streaming SIMD extension 3
|
|||
HasSSSE3 bool // Supplemental streaming SIMD extension 3
|
|||
HasSSE41 bool // Streaming SIMD extension 4 and 4.1
|
|||
HasSSE42 bool // Streaming SIMD extension 4 and 4.2
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
// ARM64 contains the supported CPU features of the
|
|||
// current ARMv8(aarch64) platform. If the current platform
|
|||
// is not arm64 then all feature flags are false.
|
|||
var ARM64 struct { |
|||
_ CacheLinePad |
|||
HasFP bool // Floating-point instruction set (always available)
|
|||
HasASIMD bool // Advanced SIMD (always available)
|
|||
HasEVTSTRM bool // Event stream support
|
|||
HasAES bool // AES hardware implementation
|
|||
HasPMULL bool // Polynomial multiplication instruction set
|
|||
HasSHA1 bool // SHA1 hardware implementation
|
|||
HasSHA2 bool // SHA2 hardware implementation
|
|||
HasCRC32 bool // CRC32 hardware implementation
|
|||
HasATOMICS bool // Atomic memory operation instruction set
|
|||
HasFPHP bool // Half precision floating-point instruction set
|
|||
HasASIMDHP bool // Advanced SIMD half precision instruction set
|
|||
HasCPUID bool // CPUID identification scheme registers
|
|||
HasASIMDRDM bool // Rounding double multiply add/subtract instruction set
|
|||
HasJSCVT bool // Javascript conversion from floating-point to integer
|
|||
HasFCMA bool // Floating-point multiplication and addition of complex numbers
|
|||
HasLRCPC bool // Release Consistent processor consistent support
|
|||
HasDCPOP bool // Persistent memory support
|
|||
HasSHA3 bool // SHA3 hardware implementation
|
|||
HasSM3 bool // SM3 hardware implementation
|
|||
HasSM4 bool // SM4 hardware implementation
|
|||
HasASIMDDP bool // Advanced SIMD double precision instruction set
|
|||
HasSHA512 bool // SHA512 hardware implementation
|
|||
HasSVE bool // Scalable Vector Extensions
|
|||
HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
// ARM contains the supported CPU features of the current ARM (32-bit) platform.
|
|||
// All feature flags are false if:
|
|||
// 1. the current platform is not arm, or
|
|||
// 2. the current operating system is not Linux.
|
|||
var ARM struct { |
|||
_ CacheLinePad |
|||
HasSWP bool // SWP instruction support
|
|||
HasHALF bool // Half-word load and store support
|
|||
HasTHUMB bool // ARM Thumb instruction set
|
|||
Has26BIT bool // Address space limited to 26-bits
|
|||
HasFASTMUL bool // 32-bit operand, 64-bit result multiplication support
|
|||
HasFPA bool // Floating point arithmetic support
|
|||
HasVFP bool // Vector floating point support
|
|||
HasEDSP bool // DSP Extensions support
|
|||
HasJAVA bool // Java instruction set
|
|||
HasIWMMXT bool // Intel Wireless MMX technology support
|
|||
HasCRUNCH bool // MaverickCrunch context switching and handling
|
|||
HasTHUMBEE bool // Thumb EE instruction set
|
|||
HasNEON bool // NEON instruction set
|
|||
HasVFPv3 bool // Vector floating point version 3 support
|
|||
HasVFPv3D16 bool // Vector floating point version 3 D8-D15
|
|||
HasTLS bool // Thread local storage support
|
|||
HasVFPv4 bool // Vector floating point version 4 support
|
|||
HasIDIVA bool // Integer divide instruction support in ARM mode
|
|||
HasIDIVT bool // Integer divide instruction support in Thumb mode
|
|||
HasVFPD32 bool // Vector floating point version 3 D15-D31
|
|||
HasLPAE bool // Large Physical Address Extensions
|
|||
HasEVTSTRM bool // Event stream support
|
|||
HasAES bool // AES hardware implementation
|
|||
HasPMULL bool // Polynomial multiplication instruction set
|
|||
HasSHA1 bool // SHA1 hardware implementation
|
|||
HasSHA2 bool // SHA2 hardware implementation
|
|||
HasCRC32 bool // CRC32 hardware implementation
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
// MIPS64X contains the supported CPU features of the current mips64/mips64le
|
|||
// platforms. If the current platform is not mips64/mips64le or the current
|
|||
// operating system is not Linux then all feature flags are false.
|
|||
var MIPS64X struct { |
|||
_ CacheLinePad |
|||
HasMSA bool // MIPS SIMD architecture
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
// PPC64 contains the supported CPU features of the current ppc64/ppc64le platforms.
|
|||
// If the current platform is not ppc64/ppc64le then all feature flags are false.
|
|||
//
|
|||
// For ppc64/ppc64le, it is safe to check only for ISA level starting on ISA v3.00,
|
|||
// since there are no optional categories. There are some exceptions that also
|
|||
// require kernel support to work (DARN, SCV), so there are feature bits for
|
|||
// those as well. The struct is padded to avoid false sharing.
|
|||
var PPC64 struct { |
|||
_ CacheLinePad |
|||
HasDARN bool // Hardware random number generator (requires kernel enablement)
|
|||
HasSCV bool // Syscall vectored (requires kernel enablement)
|
|||
IsPOWER8 bool // ISA v2.07 (POWER8)
|
|||
IsPOWER9 bool // ISA v3.00 (POWER9), implies IsPOWER8
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
// S390X contains the supported CPU features of the current IBM Z
|
|||
// (s390x) platform. If the current platform is not IBM Z then all
|
|||
// feature flags are false.
|
|||
//
|
|||
// S390X is padded to avoid false sharing. Further HasVX is only set
|
|||
// if the OS supports vector registers in addition to the STFLE
|
|||
// feature bit being set.
|
|||
var S390X struct { |
|||
_ CacheLinePad |
|||
HasZARCH bool // z/Architecture mode is active [mandatory]
|
|||
HasSTFLE bool // store facility list extended
|
|||
HasLDISP bool // long (20-bit) displacements
|
|||
HasEIMM bool // 32-bit immediates
|
|||
HasDFP bool // decimal floating point
|
|||
HasETF3EH bool // ETF-3 enhanced
|
|||
HasMSA bool // message security assist (CPACF)
|
|||
HasAES bool // KM-AES{128,192,256} functions
|
|||
HasAESCBC bool // KMC-AES{128,192,256} functions
|
|||
HasAESCTR bool // KMCTR-AES{128,192,256} functions
|
|||
HasAESGCM bool // KMA-GCM-AES{128,192,256} functions
|
|||
HasGHASH bool // KIMD-GHASH function
|
|||
HasSHA1 bool // K{I,L}MD-SHA-1 functions
|
|||
HasSHA256 bool // K{I,L}MD-SHA-256 functions
|
|||
HasSHA512 bool // K{I,L}MD-SHA-512 functions
|
|||
HasSHA3 bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions
|
|||
HasVX bool // vector facility
|
|||
HasVXE bool // vector-enhancements facility 1
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
func init() { |
|||
archInit() |
|||
initOptions() |
|||
processOptions() |
|||
} |
|||
|
|||
// options contains the cpu debug options that can be used in GODEBUG.
|
|||
// Options are arch dependent and are added by the arch specific initOptions functions.
|
|||
// Features that are mandatory for the specific GOARCH should have the Required field set
|
|||
// (e.g. SSE2 on amd64).
|
|||
var options []option |
|||
|
|||
// Option names should be lower case. e.g. avx instead of AVX.
|
|||
type option struct { |
|||
Name string |
|||
Feature *bool |
|||
Specified bool // whether feature value was specified in GODEBUG
|
|||
Enable bool // whether feature should be enabled
|
|||
Required bool // whether feature is mandatory and can not be disabled
|
|||
} |
|||
|
|||
func processOptions() { |
|||
env := os.Getenv("GODEBUG") |
|||
field: |
|||
for env != "" { |
|||
field := "" |
|||
i := strings.IndexByte(env, ',') |
|||
if i < 0 { |
|||
field, env = env, "" |
|||
} else { |
|||
field, env = env[:i], env[i+1:] |
|||
} |
|||
if len(field) < 4 || field[:4] != "cpu." { |
|||
continue |
|||
} |
|||
i = strings.IndexByte(field, '=') |
|||
if i < 0 { |
|||
print("GODEBUG sys/cpu: no value specified for \"", field, "\"\n") |
|||
continue |
|||
} |
|||
key, value := field[4:i], field[i+1:] // e.g. "SSE2", "on"
|
|||
|
|||
var enable bool |
|||
switch value { |
|||
case "on": |
|||
enable = true |
|||
case "off": |
|||
enable = false |
|||
default: |
|||
print("GODEBUG sys/cpu: value \"", value, "\" not supported for cpu option \"", key, "\"\n") |
|||
continue field |
|||
} |
|||
|
|||
if key == "all" { |
|||
for i := range options { |
|||
options[i].Specified = true |
|||
options[i].Enable = enable || options[i].Required |
|||
} |
|||
continue field |
|||
} |
|||
|
|||
for i := range options { |
|||
if options[i].Name == key { |
|||
options[i].Specified = true |
|||
options[i].Enable = enable |
|||
continue field |
|||
} |
|||
} |
|||
|
|||
print("GODEBUG sys/cpu: unknown cpu feature \"", key, "\"\n") |
|||
} |
|||
|
|||
for _, o := range options { |
|||
if !o.Specified { |
|||
continue |
|||
} |
|||
|
|||
if o.Enable && !*o.Feature { |
|||
print("GODEBUG sys/cpu: can not enable \"", o.Name, "\", missing CPU support\n") |
|||
continue |
|||
} |
|||
|
|||
if !o.Enable && o.Required { |
|||
print("GODEBUG sys/cpu: can not disable \"", o.Name, "\", required CPU feature\n") |
|||
continue |
|||
} |
|||
|
|||
*o.Feature = o.Enable |
|||
} |
|||
} |
@ -0,0 +1,33 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build aix
|
|||
|
|||
package cpu |
|||
|
|||
const ( |
|||
// getsystemcfg constants
|
|||
_SC_IMPL = 2 |
|||
_IMPL_POWER8 = 0x10000 |
|||
_IMPL_POWER9 = 0x20000 |
|||
) |
|||
|
|||
func archInit() { |
|||
impl := getsystemcfg(_SC_IMPL) |
|||
if impl&_IMPL_POWER8 != 0 { |
|||
PPC64.IsPOWER8 = true |
|||
} |
|||
if impl&_IMPL_POWER9 != 0 { |
|||
PPC64.IsPOWER8 = true |
|||
PPC64.IsPOWER9 = true |
|||
} |
|||
|
|||
Initialized = true |
|||
} |
|||
|
|||
func getsystemcfg(label int) (n uint64) { |
|||
r0, _ := callgetsystemcfg(label) |
|||
n = uint64(r0) |
|||
return |
|||
} |
@ -0,0 +1,73 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
const cacheLineSize = 32 |
|||
|
|||
// HWCAP/HWCAP2 bits.
|
|||
// These are specific to Linux.
|
|||
const ( |
|||
hwcap_SWP = 1 << 0 |
|||
hwcap_HALF = 1 << 1 |
|||
hwcap_THUMB = 1 << 2 |
|||
hwcap_26BIT = 1 << 3 |
|||
hwcap_FAST_MULT = 1 << 4 |
|||
hwcap_FPA = 1 << 5 |
|||
hwcap_VFP = 1 << 6 |
|||
hwcap_EDSP = 1 << 7 |
|||
hwcap_JAVA = 1 << 8 |
|||
hwcap_IWMMXT = 1 << 9 |
|||
hwcap_CRUNCH = 1 << 10 |
|||
hwcap_THUMBEE = 1 << 11 |
|||
hwcap_NEON = 1 << 12 |
|||
hwcap_VFPv3 = 1 << 13 |
|||
hwcap_VFPv3D16 = 1 << 14 |
|||
hwcap_TLS = 1 << 15 |
|||
hwcap_VFPv4 = 1 << 16 |
|||
hwcap_IDIVA = 1 << 17 |
|||
hwcap_IDIVT = 1 << 18 |
|||
hwcap_VFPD32 = 1 << 19 |
|||
hwcap_LPAE = 1 << 20 |
|||
hwcap_EVTSTRM = 1 << 21 |
|||
|
|||
hwcap2_AES = 1 << 0 |
|||
hwcap2_PMULL = 1 << 1 |
|||
hwcap2_SHA1 = 1 << 2 |
|||
hwcap2_SHA2 = 1 << 3 |
|||
hwcap2_CRC32 = 1 << 4 |
|||
) |
|||
|
|||
func initOptions() { |
|||
options = []option{ |
|||
{Name: "pmull", Feature: &ARM.HasPMULL}, |
|||
{Name: "sha1", Feature: &ARM.HasSHA1}, |
|||
{Name: "sha2", Feature: &ARM.HasSHA2}, |
|||
{Name: "swp", Feature: &ARM.HasSWP}, |
|||
{Name: "thumb", Feature: &ARM.HasTHUMB}, |
|||
{Name: "thumbee", Feature: &ARM.HasTHUMBEE}, |
|||
{Name: "tls", Feature: &ARM.HasTLS}, |
|||
{Name: "vfp", Feature: &ARM.HasVFP}, |
|||
{Name: "vfpd32", Feature: &ARM.HasVFPD32}, |
|||
{Name: "vfpv3", Feature: &ARM.HasVFPv3}, |
|||
{Name: "vfpv3d16", Feature: &ARM.HasVFPv3D16}, |
|||
{Name: "vfpv4", Feature: &ARM.HasVFPv4}, |
|||
{Name: "half", Feature: &ARM.HasHALF}, |
|||
{Name: "26bit", Feature: &ARM.Has26BIT}, |
|||
{Name: "fastmul", Feature: &ARM.HasFASTMUL}, |
|||
{Name: "fpa", Feature: &ARM.HasFPA}, |
|||
{Name: "edsp", Feature: &ARM.HasEDSP}, |
|||
{Name: "java", Feature: &ARM.HasJAVA}, |
|||
{Name: "iwmmxt", Feature: &ARM.HasIWMMXT}, |
|||
{Name: "crunch", Feature: &ARM.HasCRUNCH}, |
|||
{Name: "neon", Feature: &ARM.HasNEON}, |
|||
{Name: "idivt", Feature: &ARM.HasIDIVT}, |
|||
{Name: "idiva", Feature: &ARM.HasIDIVA}, |
|||
{Name: "lpae", Feature: &ARM.HasLPAE}, |
|||
{Name: "evtstrm", Feature: &ARM.HasEVTSTRM}, |
|||
{Name: "aes", Feature: &ARM.HasAES}, |
|||
{Name: "crc32", Feature: &ARM.HasCRC32}, |
|||
} |
|||
|
|||
} |
@ -0,0 +1,172 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
import "runtime" |
|||
|
|||
// cacheLineSize is used to prevent false sharing of cache lines.
|
|||
// We choose 128 because Apple Silicon, a.k.a. M1, has 128-byte cache line size.
|
|||
// It doesn't cost much and is much more future-proof.
|
|||
const cacheLineSize = 128 |
|||
|
|||
func initOptions() { |
|||
options = []option{ |
|||
{Name: "fp", Feature: &ARM64.HasFP}, |
|||
{Name: "asimd", Feature: &ARM64.HasASIMD}, |
|||
{Name: "evstrm", Feature: &ARM64.HasEVTSTRM}, |
|||
{Name: "aes", Feature: &ARM64.HasAES}, |
|||
{Name: "fphp", Feature: &ARM64.HasFPHP}, |
|||
{Name: "jscvt", Feature: &ARM64.HasJSCVT}, |
|||
{Name: "lrcpc", Feature: &ARM64.HasLRCPC}, |
|||
{Name: "pmull", Feature: &ARM64.HasPMULL}, |
|||
{Name: "sha1", Feature: &ARM64.HasSHA1}, |
|||
{Name: "sha2", Feature: &ARM64.HasSHA2}, |
|||
{Name: "sha3", Feature: &ARM64.HasSHA3}, |
|||
{Name: "sha512", Feature: &ARM64.HasSHA512}, |
|||
{Name: "sm3", Feature: &ARM64.HasSM3}, |
|||
{Name: "sm4", Feature: &ARM64.HasSM4}, |
|||
{Name: "sve", Feature: &ARM64.HasSVE}, |
|||
{Name: "crc32", Feature: &ARM64.HasCRC32}, |
|||
{Name: "atomics", Feature: &ARM64.HasATOMICS}, |
|||
{Name: "asimdhp", Feature: &ARM64.HasASIMDHP}, |
|||
{Name: "cpuid", Feature: &ARM64.HasCPUID}, |
|||
{Name: "asimrdm", Feature: &ARM64.HasASIMDRDM}, |
|||
{Name: "fcma", Feature: &ARM64.HasFCMA}, |
|||
{Name: "dcpop", Feature: &ARM64.HasDCPOP}, |
|||
{Name: "asimddp", Feature: &ARM64.HasASIMDDP}, |
|||
{Name: "asimdfhm", Feature: &ARM64.HasASIMDFHM}, |
|||
} |
|||
} |
|||
|
|||
func archInit() { |
|||
switch runtime.GOOS { |
|||
case "freebsd": |
|||
readARM64Registers() |
|||
case "linux", "netbsd", "openbsd": |
|||
doinit() |
|||
default: |
|||
// Many platforms don't seem to allow reading these registers.
|
|||
setMinimalFeatures() |
|||
} |
|||
} |
|||
|
|||
// setMinimalFeatures fakes the minimal ARM64 features expected by
|
|||
// TestARM64minimalFeatures.
|
|||
func setMinimalFeatures() { |
|||
ARM64.HasASIMD = true |
|||
ARM64.HasFP = true |
|||
} |
|||
|
|||
func readARM64Registers() { |
|||
Initialized = true |
|||
|
|||
parseARM64SystemRegisters(getisar0(), getisar1(), getpfr0()) |
|||
} |
|||
|
|||
func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) { |
|||
// ID_AA64ISAR0_EL1
|
|||
switch extractBits(isar0, 4, 7) { |
|||
case 1: |
|||
ARM64.HasAES = true |
|||
case 2: |
|||
ARM64.HasAES = true |
|||
ARM64.HasPMULL = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 8, 11) { |
|||
case 1: |
|||
ARM64.HasSHA1 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 12, 15) { |
|||
case 1: |
|||
ARM64.HasSHA2 = true |
|||
case 2: |
|||
ARM64.HasSHA2 = true |
|||
ARM64.HasSHA512 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 16, 19) { |
|||
case 1: |
|||
ARM64.HasCRC32 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 20, 23) { |
|||
case 2: |
|||
ARM64.HasATOMICS = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 28, 31) { |
|||
case 1: |
|||
ARM64.HasASIMDRDM = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 32, 35) { |
|||
case 1: |
|||
ARM64.HasSHA3 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 36, 39) { |
|||
case 1: |
|||
ARM64.HasSM3 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 40, 43) { |
|||
case 1: |
|||
ARM64.HasSM4 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 44, 47) { |
|||
case 1: |
|||
ARM64.HasASIMDDP = true |
|||
} |
|||
|
|||
// ID_AA64ISAR1_EL1
|
|||
switch extractBits(isar1, 0, 3) { |
|||
case 1: |
|||
ARM64.HasDCPOP = true |
|||
} |
|||
|
|||
switch extractBits(isar1, 12, 15) { |
|||
case 1: |
|||
ARM64.HasJSCVT = true |
|||
} |
|||
|
|||
switch extractBits(isar1, 16, 19) { |
|||
case 1: |
|||
ARM64.HasFCMA = true |
|||
} |
|||
|
|||
switch extractBits(isar1, 20, 23) { |
|||
case 1: |
|||
ARM64.HasLRCPC = true |
|||
} |
|||
|
|||
// ID_AA64PFR0_EL1
|
|||
switch extractBits(pfr0, 16, 19) { |
|||
case 0: |
|||
ARM64.HasFP = true |
|||
case 1: |
|||
ARM64.HasFP = true |
|||
ARM64.HasFPHP = true |
|||
} |
|||
|
|||
switch extractBits(pfr0, 20, 23) { |
|||
case 0: |
|||
ARM64.HasASIMD = true |
|||
case 1: |
|||
ARM64.HasASIMD = true |
|||
ARM64.HasASIMDHP = true |
|||
} |
|||
|
|||
switch extractBits(pfr0, 32, 35) { |
|||
case 1: |
|||
ARM64.HasSVE = true |
|||
} |
|||
} |
|||
|
|||
func extractBits(data uint64, start, end uint) uint { |
|||
return (uint)(data>>start) & ((1 << (end - start + 1)) - 1) |
|||
} |
@ -0,0 +1,31 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build gc |
|||
|
|||
#include "textflag.h" |
|||
|
|||
// func getisar0() uint64 |
|||
TEXT ·getisar0(SB),NOSPLIT,$0-8 |
|||
// get Instruction Set Attributes 0 into x0 |
|||
// mrs x0, ID_AA64ISAR0_EL1 = d5380600 |
|||
WORD $0xd5380600 |
|||
MOVD R0, ret+0(FP) |
|||
RET |
|||
|
|||
// func getisar1() uint64 |
|||
TEXT ·getisar1(SB),NOSPLIT,$0-8 |
|||
// get Instruction Set Attributes 1 into x0 |
|||
// mrs x0, ID_AA64ISAR1_EL1 = d5380620 |
|||
WORD $0xd5380620 |
|||
MOVD R0, ret+0(FP) |
|||
RET |
|||
|
|||
// func getpfr0() uint64 |
|||
TEXT ·getpfr0(SB),NOSPLIT,$0-8 |
|||
// get Processor Feature Register 0 into x0 |
|||
// mrs x0, ID_AA64PFR0_EL1 = d5380400 |
|||
WORD $0xd5380400 |
|||
MOVD R0, ret+0(FP) |
|||
RET |
@ -0,0 +1,11 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build gc
|
|||
|
|||
package cpu |
|||
|
|||
func getisar0() uint64 |
|||
func getisar1() uint64 |
|||
func getpfr0() uint64 |
@ -0,0 +1,21 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build gc
|
|||
|
|||
package cpu |
|||
|
|||
// haveAsmFunctions reports whether the other functions in this file can
|
|||
// be safely called.
|
|||
func haveAsmFunctions() bool { return true } |
|||
|
|||
// The following feature detection functions are defined in cpu_s390x.s.
|
|||
// They are likely to be expensive to call so the results should be cached.
|
|||
func stfle() facilityList |
|||
func kmQuery() queryResult |
|||
func kmcQuery() queryResult |
|||
func kmctrQuery() queryResult |
|||
func kmaQuery() queryResult |
|||
func kimdQuery() queryResult |
|||
func klmdQuery() queryResult |
@ -0,0 +1,15 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build (386 || amd64 || amd64p32) && gc
|
|||
|
|||
package cpu |
|||
|
|||
// cpuid is implemented in cpu_x86.s for gc compiler
|
|||
// and in cpu_gccgo.c for gccgo.
|
|||
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) |
|||
|
|||
// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler
|
|||
// and in cpu_gccgo.c for gccgo.
|
|||
func xgetbv() (eax, edx uint32) |
@ -0,0 +1,11 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build gccgo
|
|||
|
|||
package cpu |
|||
|
|||
func getisar0() uint64 { return 0 } |
|||
func getisar1() uint64 { return 0 } |
|||
func getpfr0() uint64 { return 0 } |
@ -0,0 +1,22 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build gccgo
|
|||
|
|||
package cpu |
|||
|
|||
// haveAsmFunctions reports whether the other functions in this file can
|
|||
// be safely called.
|
|||
func haveAsmFunctions() bool { return false } |
|||
|
|||
// TODO(mundaym): the following feature detection functions are currently
|
|||
// stubs. See https://golang.org/cl/162887 for how to fix this.
|
|||
// They are likely to be expensive to call so the results should be cached.
|
|||
func stfle() facilityList { panic("not implemented for gccgo") } |
|||
func kmQuery() queryResult { panic("not implemented for gccgo") } |
|||
func kmcQuery() queryResult { panic("not implemented for gccgo") } |
|||
func kmctrQuery() queryResult { panic("not implemented for gccgo") } |
|||
func kmaQuery() queryResult { panic("not implemented for gccgo") } |
|||
func kimdQuery() queryResult { panic("not implemented for gccgo") } |
|||
func klmdQuery() queryResult { panic("not implemented for gccgo") } |
@ -0,0 +1,37 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build (386 || amd64 || amd64p32) && gccgo
|
|||
|
|||
#include <cpuid.h> |
|||
#include <stdint.h> |
|||
#include <x86intrin.h> |
|||
|
|||
// Need to wrap __get_cpuid_count because it's declared as static.
|
|||
int |
|||
gccgoGetCpuidCount(uint32_t leaf, uint32_t subleaf, |
|||
uint32_t *eax, uint32_t *ebx, |
|||
uint32_t *ecx, uint32_t *edx) |
|||
{ |
|||
return __get_cpuid_count(leaf, subleaf, eax, ebx, ecx, edx); |
|||
} |
|||
|
|||
#pragma GCC diagnostic ignored "-Wunknown-pragmas" |
|||
#pragma GCC push_options |
|||
#pragma GCC target("xsave") |
|||
#pragma clang attribute push (__attribute__((target("xsave"))), apply_to=function) |
|||
|
|||
// xgetbv reads the contents of an XCR (Extended Control Register)
|
|||
// specified in the ECX register into registers EDX:EAX.
|
|||
// Currently, the only supported value for XCR is 0.
|
|||
void |
|||
gccgoXgetbv(uint32_t *eax, uint32_t *edx) |
|||
{ |
|||
uint64_t v = _xgetbv(0); |
|||
*eax = v & 0xffffffff; |
|||
*edx = v >> 32; |
|||
} |
|||
|
|||
#pragma clang attribute pop |
|||
#pragma GCC pop_options |
@ -0,0 +1,31 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build (386 || amd64 || amd64p32) && gccgo
|
|||
|
|||
package cpu |
|||
|
|||
//extern gccgoGetCpuidCount
|
|||
func gccgoGetCpuidCount(eaxArg, ecxArg uint32, eax, ebx, ecx, edx *uint32) |
|||
|
|||
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) { |
|||
var a, b, c, d uint32 |
|||
gccgoGetCpuidCount(eaxArg, ecxArg, &a, &b, &c, &d) |
|||
return a, b, c, d |
|||
} |
|||
|
|||
//extern gccgoXgetbv
|
|||
func gccgoXgetbv(eax, edx *uint32) |
|||
|
|||
func xgetbv() (eax, edx uint32) { |
|||
var a, d uint32 |
|||
gccgoXgetbv(&a, &d) |
|||
return a, d |
|||
} |
|||
|
|||
// gccgo doesn't build on Darwin, per:
|
|||
// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76
|
|||
func darwinSupportsAVX512() bool { |
|||
return false |
|||
} |
@ -0,0 +1,15 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !386 && !amd64 && !amd64p32 && !arm64
|
|||
|
|||
package cpu |
|||
|
|||
func archInit() { |
|||
if err := readHWCAP(); err != nil { |
|||
return |
|||
} |
|||
doinit() |
|||
Initialized = true |
|||
} |
@ -0,0 +1,39 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
func doinit() { |
|||
ARM.HasSWP = isSet(hwCap, hwcap_SWP) |
|||
ARM.HasHALF = isSet(hwCap, hwcap_HALF) |
|||
ARM.HasTHUMB = isSet(hwCap, hwcap_THUMB) |
|||
ARM.Has26BIT = isSet(hwCap, hwcap_26BIT) |
|||
ARM.HasFASTMUL = isSet(hwCap, hwcap_FAST_MULT) |
|||
ARM.HasFPA = isSet(hwCap, hwcap_FPA) |
|||
ARM.HasVFP = isSet(hwCap, hwcap_VFP) |
|||
ARM.HasEDSP = isSet(hwCap, hwcap_EDSP) |
|||
ARM.HasJAVA = isSet(hwCap, hwcap_JAVA) |
|||
ARM.HasIWMMXT = isSet(hwCap, hwcap_IWMMXT) |
|||
ARM.HasCRUNCH = isSet(hwCap, hwcap_CRUNCH) |
|||
ARM.HasTHUMBEE = isSet(hwCap, hwcap_THUMBEE) |
|||
ARM.HasNEON = isSet(hwCap, hwcap_NEON) |
|||
ARM.HasVFPv3 = isSet(hwCap, hwcap_VFPv3) |
|||
ARM.HasVFPv3D16 = isSet(hwCap, hwcap_VFPv3D16) |
|||
ARM.HasTLS = isSet(hwCap, hwcap_TLS) |
|||
ARM.HasVFPv4 = isSet(hwCap, hwcap_VFPv4) |
|||
ARM.HasIDIVA = isSet(hwCap, hwcap_IDIVA) |
|||
ARM.HasIDIVT = isSet(hwCap, hwcap_IDIVT) |
|||
ARM.HasVFPD32 = isSet(hwCap, hwcap_VFPD32) |
|||
ARM.HasLPAE = isSet(hwCap, hwcap_LPAE) |
|||
ARM.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM) |
|||
ARM.HasAES = isSet(hwCap2, hwcap2_AES) |
|||
ARM.HasPMULL = isSet(hwCap2, hwcap2_PMULL) |
|||
ARM.HasSHA1 = isSet(hwCap2, hwcap2_SHA1) |
|||
ARM.HasSHA2 = isSet(hwCap2, hwcap2_SHA2) |
|||
ARM.HasCRC32 = isSet(hwCap2, hwcap2_CRC32) |
|||
} |
|||
|
|||
func isSet(hwc uint, value uint) bool { |
|||
return hwc&value != 0 |
|||
} |
@ -0,0 +1,111 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
import ( |
|||
"strings" |
|||
"syscall" |
|||
) |
|||
|
|||
// HWCAP/HWCAP2 bits. These are exposed by Linux.
|
|||
const ( |
|||
hwcap_FP = 1 << 0 |
|||
hwcap_ASIMD = 1 << 1 |
|||
hwcap_EVTSTRM = 1 << 2 |
|||
hwcap_AES = 1 << 3 |
|||
hwcap_PMULL = 1 << 4 |
|||
hwcap_SHA1 = 1 << 5 |
|||
hwcap_SHA2 = 1 << 6 |
|||
hwcap_CRC32 = 1 << 7 |
|||
hwcap_ATOMICS = 1 << 8 |
|||
hwcap_FPHP = 1 << 9 |
|||
hwcap_ASIMDHP = 1 << 10 |
|||
hwcap_CPUID = 1 << 11 |
|||
hwcap_ASIMDRDM = 1 << 12 |
|||
hwcap_JSCVT = 1 << 13 |
|||
hwcap_FCMA = 1 << 14 |
|||
hwcap_LRCPC = 1 << 15 |
|||
hwcap_DCPOP = 1 << 16 |
|||
hwcap_SHA3 = 1 << 17 |
|||
hwcap_SM3 = 1 << 18 |
|||
hwcap_SM4 = 1 << 19 |
|||
hwcap_ASIMDDP = 1 << 20 |
|||
hwcap_SHA512 = 1 << 21 |
|||
hwcap_SVE = 1 << 22 |
|||
hwcap_ASIMDFHM = 1 << 23 |
|||
) |
|||
|
|||
// linuxKernelCanEmulateCPUID reports whether we're running
|
|||
// on Linux 4.11+. Ideally we'd like to ask the question about
|
|||
// whether the current kernel contains
|
|||
// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=77c97b4ee21290f5f083173d957843b615abbff2
|
|||
// but the version number will have to do.
|
|||
func linuxKernelCanEmulateCPUID() bool { |
|||
var un syscall.Utsname |
|||
syscall.Uname(&un) |
|||
var sb strings.Builder |
|||
for _, b := range un.Release[:] { |
|||
if b == 0 { |
|||
break |
|||
} |
|||
sb.WriteByte(byte(b)) |
|||
} |
|||
major, minor, _, ok := parseRelease(sb.String()) |
|||
return ok && (major > 4 || major == 4 && minor >= 11) |
|||
} |
|||
|
|||
func doinit() { |
|||
if err := readHWCAP(); err != nil { |
|||
// We failed to read /proc/self/auxv. This can happen if the binary has
|
|||
// been given extra capabilities(7) with /bin/setcap.
|
|||
//
|
|||
// When this happens, we have two options. If the Linux kernel is new
|
|||
// enough (4.11+), we can read the arm64 registers directly which'll
|
|||
// trap into the kernel and then return back to userspace.
|
|||
//
|
|||
// But on older kernels, such as Linux 4.4.180 as used on many Synology
|
|||
// devices, calling readARM64Registers (specifically getisar0) will
|
|||
// cause a SIGILL and we'll die. So for older kernels, parse /proc/cpuinfo
|
|||
// instead.
|
|||
//
|
|||
// See golang/go#57336.
|
|||
if linuxKernelCanEmulateCPUID() { |
|||
readARM64Registers() |
|||
} else { |
|||
readLinuxProcCPUInfo() |
|||
} |
|||
return |
|||
} |
|||
|
|||
// HWCAP feature bits
|
|||
ARM64.HasFP = isSet(hwCap, hwcap_FP) |
|||
ARM64.HasASIMD = isSet(hwCap, hwcap_ASIMD) |
|||
ARM64.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM) |
|||
ARM64.HasAES = isSet(hwCap, hwcap_AES) |
|||
ARM64.HasPMULL = isSet(hwCap, hwcap_PMULL) |
|||
ARM64.HasSHA1 = isSet(hwCap, hwcap_SHA1) |
|||
ARM64.HasSHA2 = isSet(hwCap, hwcap_SHA2) |
|||
ARM64.HasCRC32 = isSet(hwCap, hwcap_CRC32) |
|||
ARM64.HasATOMICS = isSet(hwCap, hwcap_ATOMICS) |
|||
ARM64.HasFPHP = isSet(hwCap, hwcap_FPHP) |
|||
ARM64.HasASIMDHP = isSet(hwCap, hwcap_ASIMDHP) |
|||
ARM64.HasCPUID = isSet(hwCap, hwcap_CPUID) |
|||
ARM64.HasASIMDRDM = isSet(hwCap, hwcap_ASIMDRDM) |
|||
ARM64.HasJSCVT = isSet(hwCap, hwcap_JSCVT) |
|||
ARM64.HasFCMA = isSet(hwCap, hwcap_FCMA) |
|||
ARM64.HasLRCPC = isSet(hwCap, hwcap_LRCPC) |
|||
ARM64.HasDCPOP = isSet(hwCap, hwcap_DCPOP) |
|||
ARM64.HasSHA3 = isSet(hwCap, hwcap_SHA3) |
|||
ARM64.HasSM3 = isSet(hwCap, hwcap_SM3) |
|||
ARM64.HasSM4 = isSet(hwCap, hwcap_SM4) |
|||
ARM64.HasASIMDDP = isSet(hwCap, hwcap_ASIMDDP) |
|||
ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512) |
|||
ARM64.HasSVE = isSet(hwCap, hwcap_SVE) |
|||
ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM) |
|||
} |
|||
|
|||
func isSet(hwc uint, value uint) bool { |
|||
return hwc&value != 0 |
|||
} |
@ -0,0 +1,22 @@ |
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build linux && (mips64 || mips64le)
|
|||
|
|||
package cpu |
|||
|
|||
// HWCAP bits. These are exposed by the Linux kernel 5.4.
|
|||
const ( |
|||
// CPU features
|
|||
hwcap_MIPS_MSA = 1 << 1 |
|||
) |
|||
|
|||
func doinit() { |
|||
// HWCAP feature bits
|
|||
MIPS64X.HasMSA = isSet(hwCap, hwcap_MIPS_MSA) |
|||
} |
|||
|
|||
func isSet(hwc uint, value uint) bool { |
|||
return hwc&value != 0 |
|||
} |
@ -0,0 +1,9 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
|
|||
|
|||
package cpu |
|||
|
|||
func doinit() {} |
@ -0,0 +1,30 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build linux && (ppc64 || ppc64le)
|
|||
|
|||
package cpu |
|||
|
|||
// HWCAP/HWCAP2 bits. These are exposed by the kernel.
|
|||
const ( |
|||
// ISA Level
|
|||
_PPC_FEATURE2_ARCH_2_07 = 0x80000000 |
|||
_PPC_FEATURE2_ARCH_3_00 = 0x00800000 |
|||
|
|||
// CPU features
|
|||
_PPC_FEATURE2_DARN = 0x00200000 |
|||
_PPC_FEATURE2_SCV = 0x00100000 |
|||
) |
|||
|
|||
func doinit() { |
|||
// HWCAP2 feature bits
|
|||
PPC64.IsPOWER8 = isSet(hwCap2, _PPC_FEATURE2_ARCH_2_07) |
|||
PPC64.IsPOWER9 = isSet(hwCap2, _PPC_FEATURE2_ARCH_3_00) |
|||
PPC64.HasDARN = isSet(hwCap2, _PPC_FEATURE2_DARN) |
|||
PPC64.HasSCV = isSet(hwCap2, _PPC_FEATURE2_SCV) |
|||
} |
|||
|
|||
func isSet(hwc uint, value uint) bool { |
|||
return hwc&value != 0 |
|||
} |
@ -0,0 +1,40 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
const ( |
|||
// bit mask values from /usr/include/bits/hwcap.h
|
|||
hwcap_ZARCH = 2 |
|||
hwcap_STFLE = 4 |
|||
hwcap_MSA = 8 |
|||
hwcap_LDISP = 16 |
|||
hwcap_EIMM = 32 |
|||
hwcap_DFP = 64 |
|||
hwcap_ETF3EH = 256 |
|||
hwcap_VX = 2048 |
|||
hwcap_VXE = 8192 |
|||
) |
|||
|
|||
func initS390Xbase() { |
|||
// test HWCAP bit vector
|
|||
has := func(featureMask uint) bool { |
|||
return hwCap&featureMask == featureMask |
|||
} |
|||
|
|||
// mandatory
|
|||
S390X.HasZARCH = has(hwcap_ZARCH) |
|||
|
|||
// optional
|
|||
S390X.HasSTFLE = has(hwcap_STFLE) |
|||
S390X.HasLDISP = has(hwcap_LDISP) |
|||
S390X.HasEIMM = has(hwcap_EIMM) |
|||
S390X.HasETF3EH = has(hwcap_ETF3EH) |
|||
S390X.HasDFP = has(hwcap_DFP) |
|||
S390X.HasMSA = has(hwcap_MSA) |
|||
S390X.HasVX = has(hwcap_VX) |
|||
if S390X.HasVX { |
|||
S390X.HasVXE = has(hwcap_VXE) |
|||
} |
|||
} |
@ -0,0 +1,12 @@ |
|||
// Copyright 2022 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build loong64
|
|||
|
|||
package cpu |
|||
|
|||
const cacheLineSize = 64 |
|||
|
|||
func initOptions() { |
|||
} |
@ -0,0 +1,15 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build mips64 || mips64le
|
|||
|
|||
package cpu |
|||
|
|||
const cacheLineSize = 32 |
|||
|
|||
func initOptions() { |
|||
options = []option{ |
|||
{Name: "msa", Feature: &MIPS64X.HasMSA}, |
|||
} |
|||
} |
@ -0,0 +1,11 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build mips || mipsle
|
|||
|
|||
package cpu |
|||
|
|||
const cacheLineSize = 32 |
|||
|
|||
func initOptions() {} |
@ -0,0 +1,173 @@ |
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
import ( |
|||
"syscall" |
|||
"unsafe" |
|||
) |
|||
|
|||
// Minimal copy of functionality from x/sys/unix so the cpu package can call
|
|||
// sysctl without depending on x/sys/unix.
|
|||
|
|||
const ( |
|||
_CTL_QUERY = -2 |
|||
|
|||
_SYSCTL_VERS_1 = 0x1000000 |
|||
) |
|||
|
|||
var _zero uintptr |
|||
|
|||
func sysctl(mib []int32, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { |
|||
var _p0 unsafe.Pointer |
|||
if len(mib) > 0 { |
|||
_p0 = unsafe.Pointer(&mib[0]) |
|||
} else { |
|||
_p0 = unsafe.Pointer(&_zero) |
|||
} |
|||
_, _, errno := syscall.Syscall6( |
|||
syscall.SYS___SYSCTL, |
|||
uintptr(_p0), |
|||
uintptr(len(mib)), |
|||
uintptr(unsafe.Pointer(old)), |
|||
uintptr(unsafe.Pointer(oldlen)), |
|||
uintptr(unsafe.Pointer(new)), |
|||
uintptr(newlen)) |
|||
if errno != 0 { |
|||
return errno |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
type sysctlNode struct { |
|||
Flags uint32 |
|||
Num int32 |
|||
Name [32]int8 |
|||
Ver uint32 |
|||
__rsvd uint32 |
|||
Un [16]byte |
|||
_sysctl_size [8]byte |
|||
_sysctl_func [8]byte |
|||
_sysctl_parent [8]byte |
|||
_sysctl_desc [8]byte |
|||
} |
|||
|
|||
func sysctlNodes(mib []int32) ([]sysctlNode, error) { |
|||
var olen uintptr |
|||
|
|||
// Get a list of all sysctl nodes below the given MIB by performing
|
|||
// a sysctl for the given MIB with CTL_QUERY appended.
|
|||
mib = append(mib, _CTL_QUERY) |
|||
qnode := sysctlNode{Flags: _SYSCTL_VERS_1} |
|||
qp := (*byte)(unsafe.Pointer(&qnode)) |
|||
sz := unsafe.Sizeof(qnode) |
|||
if err := sysctl(mib, nil, &olen, qp, sz); err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
// Now that we know the size, get the actual nodes.
|
|||
nodes := make([]sysctlNode, olen/sz) |
|||
np := (*byte)(unsafe.Pointer(&nodes[0])) |
|||
if err := sysctl(mib, np, &olen, qp, sz); err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
return nodes, nil |
|||
} |
|||
|
|||
func nametomib(name string) ([]int32, error) { |
|||
// Split name into components.
|
|||
var parts []string |
|||
last := 0 |
|||
for i := 0; i < len(name); i++ { |
|||
if name[i] == '.' { |
|||
parts = append(parts, name[last:i]) |
|||
last = i + 1 |
|||
} |
|||
} |
|||
parts = append(parts, name[last:]) |
|||
|
|||
mib := []int32{} |
|||
// Discover the nodes and construct the MIB OID.
|
|||
for partno, part := range parts { |
|||
nodes, err := sysctlNodes(mib) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
for _, node := range nodes { |
|||
n := make([]byte, 0) |
|||
for i := range node.Name { |
|||
if node.Name[i] != 0 { |
|||
n = append(n, byte(node.Name[i])) |
|||
} |
|||
} |
|||
if string(n) == part { |
|||
mib = append(mib, int32(node.Num)) |
|||
break |
|||
} |
|||
} |
|||
if len(mib) != partno+1 { |
|||
return nil, err |
|||
} |
|||
} |
|||
|
|||
return mib, nil |
|||
} |
|||
|
|||
// aarch64SysctlCPUID is struct aarch64_sysctl_cpu_id from NetBSD's <aarch64/armreg.h>
|
|||
type aarch64SysctlCPUID struct { |
|||
midr uint64 /* Main ID Register */ |
|||
revidr uint64 /* Revision ID Register */ |
|||
mpidr uint64 /* Multiprocessor Affinity Register */ |
|||
aa64dfr0 uint64 /* A64 Debug Feature Register 0 */ |
|||
aa64dfr1 uint64 /* A64 Debug Feature Register 1 */ |
|||
aa64isar0 uint64 /* A64 Instruction Set Attribute Register 0 */ |
|||
aa64isar1 uint64 /* A64 Instruction Set Attribute Register 1 */ |
|||
aa64mmfr0 uint64 /* A64 Memory Model Feature Register 0 */ |
|||
aa64mmfr1 uint64 /* A64 Memory Model Feature Register 1 */ |
|||
aa64mmfr2 uint64 /* A64 Memory Model Feature Register 2 */ |
|||
aa64pfr0 uint64 /* A64 Processor Feature Register 0 */ |
|||
aa64pfr1 uint64 /* A64 Processor Feature Register 1 */ |
|||
aa64zfr0 uint64 /* A64 SVE Feature ID Register 0 */ |
|||
mvfr0 uint32 /* Media and VFP Feature Register 0 */ |
|||
mvfr1 uint32 /* Media and VFP Feature Register 1 */ |
|||
mvfr2 uint32 /* Media and VFP Feature Register 2 */ |
|||
pad uint32 |
|||
clidr uint64 /* Cache Level ID Register */ |
|||
ctr uint64 /* Cache Type Register */ |
|||
} |
|||
|
|||
func sysctlCPUID(name string) (*aarch64SysctlCPUID, error) { |
|||
mib, err := nametomib(name) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
|
|||
out := aarch64SysctlCPUID{} |
|||
n := unsafe.Sizeof(out) |
|||
_, _, errno := syscall.Syscall6( |
|||
syscall.SYS___SYSCTL, |
|||
uintptr(unsafe.Pointer(&mib[0])), |
|||
uintptr(len(mib)), |
|||
uintptr(unsafe.Pointer(&out)), |
|||
uintptr(unsafe.Pointer(&n)), |
|||
uintptr(0), |
|||
uintptr(0)) |
|||
if errno != 0 { |
|||
return nil, errno |
|||
} |
|||
return &out, nil |
|||
} |
|||
|
|||
func doinit() { |
|||
cpuid, err := sysctlCPUID("machdep.cpu0.cpu_id") |
|||
if err != nil { |
|||
setMinimalFeatures() |
|||
return |
|||
} |
|||
parseARM64SystemRegisters(cpuid.aa64isar0, cpuid.aa64isar1, cpuid.aa64pfr0) |
|||
|
|||
Initialized = true |
|||
} |
@ -0,0 +1,65 @@ |
|||
// Copyright 2022 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
import ( |
|||
"syscall" |
|||
"unsafe" |
|||
) |
|||
|
|||
// Minimal copy of functionality from x/sys/unix so the cpu package can call
|
|||
// sysctl without depending on x/sys/unix.
|
|||
|
|||
const ( |
|||
// From OpenBSD's sys/sysctl.h.
|
|||
_CTL_MACHDEP = 7 |
|||
|
|||
// From OpenBSD's machine/cpu.h.
|
|||
_CPU_ID_AA64ISAR0 = 2 |
|||
_CPU_ID_AA64ISAR1 = 3 |
|||
) |
|||
|
|||
// Implemented in the runtime package (runtime/sys_openbsd3.go)
|
|||
func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err syscall.Errno) |
|||
|
|||
//go:linkname syscall_syscall6 syscall.syscall6
|
|||
|
|||
func sysctl(mib []uint32, old *byte, oldlen *uintptr, new *byte, newlen uintptr) (err error) { |
|||
_, _, errno := syscall_syscall6(libc_sysctl_trampoline_addr, uintptr(unsafe.Pointer(&mib[0])), uintptr(len(mib)), uintptr(unsafe.Pointer(old)), uintptr(unsafe.Pointer(oldlen)), uintptr(unsafe.Pointer(new)), uintptr(newlen)) |
|||
if errno != 0 { |
|||
return errno |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
var libc_sysctl_trampoline_addr uintptr |
|||
|
|||
//go:cgo_import_dynamic libc_sysctl sysctl "libc.so"
|
|||
|
|||
func sysctlUint64(mib []uint32) (uint64, bool) { |
|||
var out uint64 |
|||
nout := unsafe.Sizeof(out) |
|||
if err := sysctl(mib, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0); err != nil { |
|||
return 0, false |
|||
} |
|||
return out, true |
|||
} |
|||
|
|||
func doinit() { |
|||
setMinimalFeatures() |
|||
|
|||
// Get ID_AA64ISAR0 and ID_AA64ISAR1 from sysctl.
|
|||
isar0, ok := sysctlUint64([]uint32{_CTL_MACHDEP, _CPU_ID_AA64ISAR0}) |
|||
if !ok { |
|||
return |
|||
} |
|||
isar1, ok := sysctlUint64([]uint32{_CTL_MACHDEP, _CPU_ID_AA64ISAR1}) |
|||
if !ok { |
|||
return |
|||
} |
|||
parseARM64SystemRegisters(isar0, isar1, 0) |
|||
|
|||
Initialized = true |
|||
} |
@ -0,0 +1,11 @@ |
|||
// Copyright 2022 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
#include "textflag.h" |
|||
|
|||
TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 |
|||
JMP libc_sysctl(SB) |
|||
|
|||
GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 |
|||
DATA ·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB) |
@ -0,0 +1,9 @@ |
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !linux && arm
|
|||
|
|||
package cpu |
|||
|
|||
func archInit() {} |
@ -0,0 +1,9 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !linux && !netbsd && !openbsd && arm64
|
|||
|
|||
package cpu |
|||
|
|||
func doinit() {} |
@ -0,0 +1,11 @@ |
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !linux && (mips64 || mips64le)
|
|||
|
|||
package cpu |
|||
|
|||
func archInit() { |
|||
Initialized = true |
|||
} |
@ -0,0 +1,12 @@ |
|||
// Copyright 2022 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !aix && !linux && (ppc64 || ppc64le)
|
|||
|
|||
package cpu |
|||
|
|||
func archInit() { |
|||
PPC64.IsPOWER8 = true |
|||
Initialized = true |
|||
} |
@ -0,0 +1,11 @@ |
|||
// Copyright 2022 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !linux && riscv64
|
|||
|
|||
package cpu |
|||
|
|||
func archInit() { |
|||
Initialized = true |
|||
} |
@ -0,0 +1,16 @@ |
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build ppc64 || ppc64le
|
|||
|
|||
package cpu |
|||
|
|||
const cacheLineSize = 128 |
|||
|
|||
func initOptions() { |
|||
options = []option{ |
|||
{Name: "darn", Feature: &PPC64.HasDARN}, |
|||
{Name: "scv", Feature: &PPC64.HasSCV}, |
|||
} |
|||
} |
@ -0,0 +1,11 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build riscv64
|
|||
|
|||
package cpu |
|||
|
|||
const cacheLineSize = 64 |
|||
|
|||
func initOptions() {} |
@ -0,0 +1,172 @@ |
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
const cacheLineSize = 256 |
|||
|
|||
func initOptions() { |
|||
options = []option{ |
|||
{Name: "zarch", Feature: &S390X.HasZARCH, Required: true}, |
|||
{Name: "stfle", Feature: &S390X.HasSTFLE, Required: true}, |
|||
{Name: "ldisp", Feature: &S390X.HasLDISP, Required: true}, |
|||
{Name: "eimm", Feature: &S390X.HasEIMM, Required: true}, |
|||
{Name: "dfp", Feature: &S390X.HasDFP}, |
|||
{Name: "etf3eh", Feature: &S390X.HasETF3EH}, |
|||
{Name: "msa", Feature: &S390X.HasMSA}, |
|||
{Name: "aes", Feature: &S390X.HasAES}, |
|||
{Name: "aescbc", Feature: &S390X.HasAESCBC}, |
|||
{Name: "aesctr", Feature: &S390X.HasAESCTR}, |
|||
{Name: "aesgcm", Feature: &S390X.HasAESGCM}, |
|||
{Name: "ghash", Feature: &S390X.HasGHASH}, |
|||
{Name: "sha1", Feature: &S390X.HasSHA1}, |
|||
{Name: "sha256", Feature: &S390X.HasSHA256}, |
|||
{Name: "sha3", Feature: &S390X.HasSHA3}, |
|||
{Name: "sha512", Feature: &S390X.HasSHA512}, |
|||
{Name: "vx", Feature: &S390X.HasVX}, |
|||
{Name: "vxe", Feature: &S390X.HasVXE}, |
|||
} |
|||
} |
|||
|
|||
// bitIsSet reports whether the bit at index is set. The bit index
|
|||
// is in big endian order, so bit index 0 is the leftmost bit.
|
|||
func bitIsSet(bits []uint64, index uint) bool { |
|||
return bits[index/64]&((1<<63)>>(index%64)) != 0 |
|||
} |
|||
|
|||
// facility is a bit index for the named facility.
|
|||
type facility uint8 |
|||
|
|||
const ( |
|||
// mandatory facilities
|
|||
zarch facility = 1 // z architecture mode is active
|
|||
stflef facility = 7 // store-facility-list-extended
|
|||
ldisp facility = 18 // long-displacement
|
|||
eimm facility = 21 // extended-immediate
|
|||
|
|||
// miscellaneous facilities
|
|||
dfp facility = 42 // decimal-floating-point
|
|||
etf3eh facility = 30 // extended-translation 3 enhancement
|
|||
|
|||
// cryptography facilities
|
|||
msa facility = 17 // message-security-assist
|
|||
msa3 facility = 76 // message-security-assist extension 3
|
|||
msa4 facility = 77 // message-security-assist extension 4
|
|||
msa5 facility = 57 // message-security-assist extension 5
|
|||
msa8 facility = 146 // message-security-assist extension 8
|
|||
msa9 facility = 155 // message-security-assist extension 9
|
|||
|
|||
// vector facilities
|
|||
vx facility = 129 // vector facility
|
|||
vxe facility = 135 // vector-enhancements 1
|
|||
vxe2 facility = 148 // vector-enhancements 2
|
|||
) |
|||
|
|||
// facilityList contains the result of an STFLE call.
|
|||
// Bits are numbered in big endian order so the
|
|||
// leftmost bit (the MSB) is at index 0.
|
|||
type facilityList struct { |
|||
bits [4]uint64 |
|||
} |
|||
|
|||
// Has reports whether the given facilities are present.
|
|||
func (s *facilityList) Has(fs ...facility) bool { |
|||
if len(fs) == 0 { |
|||
panic("no facility bits provided") |
|||
} |
|||
for _, f := range fs { |
|||
if !bitIsSet(s.bits[:], uint(f)) { |
|||
return false |
|||
} |
|||
} |
|||
return true |
|||
} |
|||
|
|||
// function is the code for the named cryptographic function.
|
|||
type function uint8 |
|||
|
|||
const ( |
|||
// KM{,A,C,CTR} function codes
|
|||
aes128 function = 18 // AES-128
|
|||
aes192 function = 19 // AES-192
|
|||
aes256 function = 20 // AES-256
|
|||
|
|||
// K{I,L}MD function codes
|
|||
sha1 function = 1 // SHA-1
|
|||
sha256 function = 2 // SHA-256
|
|||
sha512 function = 3 // SHA-512
|
|||
sha3_224 function = 32 // SHA3-224
|
|||
sha3_256 function = 33 // SHA3-256
|
|||
sha3_384 function = 34 // SHA3-384
|
|||
sha3_512 function = 35 // SHA3-512
|
|||
shake128 function = 36 // SHAKE-128
|
|||
shake256 function = 37 // SHAKE-256
|
|||
|
|||
// KLMD function codes
|
|||
ghash function = 65 // GHASH
|
|||
) |
|||
|
|||
// queryResult contains the result of a Query function
|
|||
// call. Bits are numbered in big endian order so the
|
|||
// leftmost bit (the MSB) is at index 0.
|
|||
type queryResult struct { |
|||
bits [2]uint64 |
|||
} |
|||
|
|||
// Has reports whether the given functions are present.
|
|||
func (q *queryResult) Has(fns ...function) bool { |
|||
if len(fns) == 0 { |
|||
panic("no function codes provided") |
|||
} |
|||
for _, f := range fns { |
|||
if !bitIsSet(q.bits[:], uint(f)) { |
|||
return false |
|||
} |
|||
} |
|||
return true |
|||
} |
|||
|
|||
func doinit() { |
|||
initS390Xbase() |
|||
|
|||
// We need implementations of stfle, km and so on
|
|||
// to detect cryptographic features.
|
|||
if !haveAsmFunctions() { |
|||
return |
|||
} |
|||
|
|||
// optional cryptographic functions
|
|||
if S390X.HasMSA { |
|||
aes := []function{aes128, aes192, aes256} |
|||
|
|||
// cipher message
|
|||
km, kmc := kmQuery(), kmcQuery() |
|||
S390X.HasAES = km.Has(aes...) |
|||
S390X.HasAESCBC = kmc.Has(aes...) |
|||
if S390X.HasSTFLE { |
|||
facilities := stfle() |
|||
if facilities.Has(msa4) { |
|||
kmctr := kmctrQuery() |
|||
S390X.HasAESCTR = kmctr.Has(aes...) |
|||
} |
|||
if facilities.Has(msa8) { |
|||
kma := kmaQuery() |
|||
S390X.HasAESGCM = kma.Has(aes...) |
|||
} |
|||
} |
|||
|
|||
// compute message digest
|
|||
kimd := kimdQuery() // intermediate (no padding)
|
|||
klmd := klmdQuery() // last (padding)
|
|||
S390X.HasSHA1 = kimd.Has(sha1) && klmd.Has(sha1) |
|||
S390X.HasSHA256 = kimd.Has(sha256) && klmd.Has(sha256) |
|||
S390X.HasSHA512 = kimd.Has(sha512) && klmd.Has(sha512) |
|||
S390X.HasGHASH = kimd.Has(ghash) // KLMD-GHASH does not exist
|
|||
sha3 := []function{ |
|||
sha3_224, sha3_256, sha3_384, sha3_512, |
|||
shake128, shake256, |
|||
} |
|||
S390X.HasSHA3 = kimd.Has(sha3...) && klmd.Has(sha3...) |
|||
} |
|||
} |
@ -0,0 +1,57 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build gc |
|||
|
|||
#include "textflag.h" |
|||
|
|||
// func stfle() facilityList |
|||
TEXT ·stfle(SB), NOSPLIT|NOFRAME, $0-32 |
|||
MOVD $ret+0(FP), R1 |
|||
MOVD $3, R0 // last doubleword index to store |
|||
XC $32, (R1), (R1) // clear 4 doublewords (32 bytes) |
|||
WORD $0xb2b01000 // store facility list extended (STFLE) |
|||
RET |
|||
|
|||
// func kmQuery() queryResult |
|||
TEXT ·kmQuery(SB), NOSPLIT|NOFRAME, $0-16 |
|||
MOVD $0, R0 // set function code to 0 (KM-Query) |
|||
MOVD $ret+0(FP), R1 // address of 16-byte return value |
|||
WORD $0xB92E0024 // cipher message (KM) |
|||
RET |
|||
|
|||
// func kmcQuery() queryResult |
|||
TEXT ·kmcQuery(SB), NOSPLIT|NOFRAME, $0-16 |
|||
MOVD $0, R0 // set function code to 0 (KMC-Query) |
|||
MOVD $ret+0(FP), R1 // address of 16-byte return value |
|||
WORD $0xB92F0024 // cipher message with chaining (KMC) |
|||
RET |
|||
|
|||
// func kmctrQuery() queryResult |
|||
TEXT ·kmctrQuery(SB), NOSPLIT|NOFRAME, $0-16 |
|||
MOVD $0, R0 // set function code to 0 (KMCTR-Query) |
|||
MOVD $ret+0(FP), R1 // address of 16-byte return value |
|||
WORD $0xB92D4024 // cipher message with counter (KMCTR) |
|||
RET |
|||
|
|||
// func kmaQuery() queryResult |
|||
TEXT ·kmaQuery(SB), NOSPLIT|NOFRAME, $0-16 |
|||
MOVD $0, R0 // set function code to 0 (KMA-Query) |
|||
MOVD $ret+0(FP), R1 // address of 16-byte return value |
|||
WORD $0xb9296024 // cipher message with authentication (KMA) |
|||
RET |
|||
|
|||
// func kimdQuery() queryResult |
|||
TEXT ·kimdQuery(SB), NOSPLIT|NOFRAME, $0-16 |
|||
MOVD $0, R0 // set function code to 0 (KIMD-Query) |
|||
MOVD $ret+0(FP), R1 // address of 16-byte return value |
|||
WORD $0xB93E0024 // compute intermediate message digest (KIMD) |
|||
RET |
|||
|
|||
// func klmdQuery() queryResult |
|||
TEXT ·klmdQuery(SB), NOSPLIT|NOFRAME, $0-16 |
|||
MOVD $0, R0 // set function code to 0 (KLMD-Query) |
|||
MOVD $ret+0(FP), R1 // address of 16-byte return value |
|||
WORD $0xB93F0024 // compute last message digest (KLMD) |
|||
RET |
@ -0,0 +1,17 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build wasm
|
|||
|
|||
package cpu |
|||
|
|||
// We're compiling the cpu package for an unknown (software-abstracted) CPU.
|
|||
// Make CacheLinePad an empty struct and hope that the usual struct alignment
|
|||
// rules are good enough.
|
|||
|
|||
const cacheLineSize = 0 |
|||
|
|||
func initOptions() {} |
|||
|
|||
func archInit() {} |
@ -0,0 +1,151 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build 386 || amd64 || amd64p32
|
|||
|
|||
package cpu |
|||
|
|||
import "runtime" |
|||
|
|||
const cacheLineSize = 64 |
|||
|
|||
func initOptions() { |
|||
options = []option{ |
|||
{Name: "adx", Feature: &X86.HasADX}, |
|||
{Name: "aes", Feature: &X86.HasAES}, |
|||
{Name: "avx", Feature: &X86.HasAVX}, |
|||
{Name: "avx2", Feature: &X86.HasAVX2}, |
|||
{Name: "avx512", Feature: &X86.HasAVX512}, |
|||
{Name: "avx512f", Feature: &X86.HasAVX512F}, |
|||
{Name: "avx512cd", Feature: &X86.HasAVX512CD}, |
|||
{Name: "avx512er", Feature: &X86.HasAVX512ER}, |
|||
{Name: "avx512pf", Feature: &X86.HasAVX512PF}, |
|||
{Name: "avx512vl", Feature: &X86.HasAVX512VL}, |
|||
{Name: "avx512bw", Feature: &X86.HasAVX512BW}, |
|||
{Name: "avx512dq", Feature: &X86.HasAVX512DQ}, |
|||
{Name: "avx512ifma", Feature: &X86.HasAVX512IFMA}, |
|||
{Name: "avx512vbmi", Feature: &X86.HasAVX512VBMI}, |
|||
{Name: "avx512vnniw", Feature: &X86.HasAVX5124VNNIW}, |
|||
{Name: "avx5124fmaps", Feature: &X86.HasAVX5124FMAPS}, |
|||
{Name: "avx512vpopcntdq", Feature: &X86.HasAVX512VPOPCNTDQ}, |
|||
{Name: "avx512vpclmulqdq", Feature: &X86.HasAVX512VPCLMULQDQ}, |
|||
{Name: "avx512vnni", Feature: &X86.HasAVX512VNNI}, |
|||
{Name: "avx512gfni", Feature: &X86.HasAVX512GFNI}, |
|||
{Name: "avx512vaes", Feature: &X86.HasAVX512VAES}, |
|||
{Name: "avx512vbmi2", Feature: &X86.HasAVX512VBMI2}, |
|||
{Name: "avx512bitalg", Feature: &X86.HasAVX512BITALG}, |
|||
{Name: "avx512bf16", Feature: &X86.HasAVX512BF16}, |
|||
{Name: "amxtile", Feature: &X86.HasAMXTile}, |
|||
{Name: "amxint8", Feature: &X86.HasAMXInt8}, |
|||
{Name: "amxbf16", Feature: &X86.HasAMXBF16}, |
|||
{Name: "bmi1", Feature: &X86.HasBMI1}, |
|||
{Name: "bmi2", Feature: &X86.HasBMI2}, |
|||
{Name: "cx16", Feature: &X86.HasCX16}, |
|||
{Name: "erms", Feature: &X86.HasERMS}, |
|||
{Name: "fma", Feature: &X86.HasFMA}, |
|||
{Name: "osxsave", Feature: &X86.HasOSXSAVE}, |
|||
{Name: "pclmulqdq", Feature: &X86.HasPCLMULQDQ}, |
|||
{Name: "popcnt", Feature: &X86.HasPOPCNT}, |
|||
{Name: "rdrand", Feature: &X86.HasRDRAND}, |
|||
{Name: "rdseed", Feature: &X86.HasRDSEED}, |
|||
{Name: "sse3", Feature: &X86.HasSSE3}, |
|||
{Name: "sse41", Feature: &X86.HasSSE41}, |
|||
{Name: "sse42", Feature: &X86.HasSSE42}, |
|||
{Name: "ssse3", Feature: &X86.HasSSSE3}, |
|||
|
|||
// These capabilities should always be enabled on amd64:
|
|||
{Name: "sse2", Feature: &X86.HasSSE2, Required: runtime.GOARCH == "amd64"}, |
|||
} |
|||
} |
|||
|
|||
func archInit() { |
|||
|
|||
Initialized = true |
|||
|
|||
maxID, _, _, _ := cpuid(0, 0) |
|||
|
|||
if maxID < 1 { |
|||
return |
|||
} |
|||
|
|||
_, _, ecx1, edx1 := cpuid(1, 0) |
|||
X86.HasSSE2 = isSet(26, edx1) |
|||
|
|||
X86.HasSSE3 = isSet(0, ecx1) |
|||
X86.HasPCLMULQDQ = isSet(1, ecx1) |
|||
X86.HasSSSE3 = isSet(9, ecx1) |
|||
X86.HasFMA = isSet(12, ecx1) |
|||
X86.HasCX16 = isSet(13, ecx1) |
|||
X86.HasSSE41 = isSet(19, ecx1) |
|||
X86.HasSSE42 = isSet(20, ecx1) |
|||
X86.HasPOPCNT = isSet(23, ecx1) |
|||
X86.HasAES = isSet(25, ecx1) |
|||
X86.HasOSXSAVE = isSet(27, ecx1) |
|||
X86.HasRDRAND = isSet(30, ecx1) |
|||
|
|||
var osSupportsAVX, osSupportsAVX512 bool |
|||
// For XGETBV, OSXSAVE bit is required and sufficient.
|
|||
if X86.HasOSXSAVE { |
|||
eax, _ := xgetbv() |
|||
// Check if XMM and YMM registers have OS support.
|
|||
osSupportsAVX = isSet(1, eax) && isSet(2, eax) |
|||
|
|||
if runtime.GOOS == "darwin" { |
|||
// Darwin doesn't save/restore AVX-512 mask registers correctly across signal handlers.
|
|||
// Since users can't rely on mask register contents, let's not advertise AVX-512 support.
|
|||
// See issue 49233.
|
|||
osSupportsAVX512 = false |
|||
} else { |
|||
// Check if OPMASK and ZMM registers have OS support.
|
|||
osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax) |
|||
} |
|||
} |
|||
|
|||
X86.HasAVX = isSet(28, ecx1) && osSupportsAVX |
|||
|
|||
if maxID < 7 { |
|||
return |
|||
} |
|||
|
|||
_, ebx7, ecx7, edx7 := cpuid(7, 0) |
|||
X86.HasBMI1 = isSet(3, ebx7) |
|||
X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX |
|||
X86.HasBMI2 = isSet(8, ebx7) |
|||
X86.HasERMS = isSet(9, ebx7) |
|||
X86.HasRDSEED = isSet(18, ebx7) |
|||
X86.HasADX = isSet(19, ebx7) |
|||
|
|||
X86.HasAVX512 = isSet(16, ebx7) && osSupportsAVX512 // Because avx-512 foundation is the core required extension
|
|||
if X86.HasAVX512 { |
|||
X86.HasAVX512F = true |
|||
X86.HasAVX512CD = isSet(28, ebx7) |
|||
X86.HasAVX512ER = isSet(27, ebx7) |
|||
X86.HasAVX512PF = isSet(26, ebx7) |
|||
X86.HasAVX512VL = isSet(31, ebx7) |
|||
X86.HasAVX512BW = isSet(30, ebx7) |
|||
X86.HasAVX512DQ = isSet(17, ebx7) |
|||
X86.HasAVX512IFMA = isSet(21, ebx7) |
|||
X86.HasAVX512VBMI = isSet(1, ecx7) |
|||
X86.HasAVX5124VNNIW = isSet(2, edx7) |
|||
X86.HasAVX5124FMAPS = isSet(3, edx7) |
|||
X86.HasAVX512VPOPCNTDQ = isSet(14, ecx7) |
|||
X86.HasAVX512VPCLMULQDQ = isSet(10, ecx7) |
|||
X86.HasAVX512VNNI = isSet(11, ecx7) |
|||
X86.HasAVX512GFNI = isSet(8, ecx7) |
|||
X86.HasAVX512VAES = isSet(9, ecx7) |
|||
X86.HasAVX512VBMI2 = isSet(6, ecx7) |
|||
X86.HasAVX512BITALG = isSet(12, ecx7) |
|||
|
|||
eax71, _, _, _ := cpuid(7, 1) |
|||
X86.HasAVX512BF16 = isSet(5, eax71) |
|||
} |
|||
|
|||
X86.HasAMXTile = isSet(24, edx7) |
|||
X86.HasAMXInt8 = isSet(25, edx7) |
|||
X86.HasAMXBF16 = isSet(22, edx7) |
|||
} |
|||
|
|||
func isSet(bitpos uint, value uint32) bool { |
|||
return value&(1<<bitpos) != 0 |
|||
} |
@ -0,0 +1,26 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build (386 || amd64 || amd64p32) && gc |
|||
|
|||
#include "textflag.h" |
|||
|
|||
// func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) |
|||
TEXT ·cpuid(SB), NOSPLIT, $0-24 |
|||
MOVL eaxArg+0(FP), AX |
|||
MOVL ecxArg+4(FP), CX |
|||
CPUID |
|||
MOVL AX, eax+8(FP) |
|||
MOVL BX, ebx+12(FP) |
|||
MOVL CX, ecx+16(FP) |
|||
MOVL DX, edx+20(FP) |
|||
RET |
|||
|
|||
// func xgetbv() (eax, edx uint32) |
|||
TEXT ·xgetbv(SB),NOSPLIT,$0-8 |
|||
MOVL $0, CX |
|||
XGETBV |
|||
MOVL AX, eax+0(FP) |
|||
MOVL DX, edx+4(FP) |
|||
RET |
@ -0,0 +1,10 @@ |
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
func archInit() { |
|||
doinit() |
|||
Initialized = true |
|||
} |
@ -0,0 +1,25 @@ |
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
func initS390Xbase() { |
|||
// get the facilities list
|
|||
facilities := stfle() |
|||
|
|||
// mandatory
|
|||
S390X.HasZARCH = facilities.Has(zarch) |
|||
S390X.HasSTFLE = facilities.Has(stflef) |
|||
S390X.HasLDISP = facilities.Has(ldisp) |
|||
S390X.HasEIMM = facilities.Has(eimm) |
|||
|
|||
// optional
|
|||
S390X.HasETF3EH = facilities.Has(etf3eh) |
|||
S390X.HasDFP = facilities.Has(dfp) |
|||
S390X.HasMSA = facilities.Has(msa) |
|||
S390X.HasVX = facilities.Has(vx) |
|||
if S390X.HasVX { |
|||
S390X.HasVXE = facilities.Has(vxe) |
|||
} |
|||
} |
@ -0,0 +1,10 @@ |
|||
// Copyright 2023 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64
|
|||
|
|||
package cpu |
|||
|
|||
// IsBigEndian records whether the GOARCH's byte order is big endian.
|
|||
const IsBigEndian = true |
@ -0,0 +1,10 @@ |
|||
// Copyright 2023 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm
|
|||
|
|||
package cpu |
|||
|
|||
// IsBigEndian records whether the GOARCH's byte order is big endian.
|
|||
const IsBigEndian = false |
@ -0,0 +1,71 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
import ( |
|||
"os" |
|||
) |
|||
|
|||
const ( |
|||
_AT_HWCAP = 16 |
|||
_AT_HWCAP2 = 26 |
|||
|
|||
procAuxv = "/proc/self/auxv" |
|||
|
|||
uintSize = int(32 << (^uint(0) >> 63)) |
|||
) |
|||
|
|||
// For those platforms don't have a 'cpuid' equivalent we use HWCAP/HWCAP2
|
|||
// These are initialized in cpu_$GOARCH.go
|
|||
// and should not be changed after they are initialized.
|
|||
var hwCap uint |
|||
var hwCap2 uint |
|||
|
|||
func readHWCAP() error { |
|||
// For Go 1.21+, get auxv from the Go runtime.
|
|||
if a := getAuxv(); len(a) > 0 { |
|||
for len(a) >= 2 { |
|||
tag, val := a[0], uint(a[1]) |
|||
a = a[2:] |
|||
switch tag { |
|||
case _AT_HWCAP: |
|||
hwCap = val |
|||
case _AT_HWCAP2: |
|||
hwCap2 = val |
|||
} |
|||
} |
|||
return nil |
|||
} |
|||
|
|||
buf, err := os.ReadFile(procAuxv) |
|||
if err != nil { |
|||
// e.g. on android /proc/self/auxv is not accessible, so silently
|
|||
// ignore the error and leave Initialized = false. On some
|
|||
// architectures (e.g. arm64) doinit() implements a fallback
|
|||
// readout and will set Initialized = true again.
|
|||
return err |
|||
} |
|||
bo := hostByteOrder() |
|||
for len(buf) >= 2*(uintSize/8) { |
|||
var tag, val uint |
|||
switch uintSize { |
|||
case 32: |
|||
tag = uint(bo.Uint32(buf[0:])) |
|||
val = uint(bo.Uint32(buf[4:])) |
|||
buf = buf[8:] |
|||
case 64: |
|||
tag = uint(bo.Uint64(buf[0:])) |
|||
val = uint(bo.Uint64(buf[8:])) |
|||
buf = buf[16:] |
|||
} |
|||
switch tag { |
|||
case _AT_HWCAP: |
|||
hwCap = val |
|||
case _AT_HWCAP2: |
|||
hwCap2 = val |
|||
} |
|||
} |
|||
return nil |
|||
} |
@ -0,0 +1,43 @@ |
|||
// Copyright 2022 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
import "strconv" |
|||
|
|||
// parseRelease parses a dot-separated version number. It follows the semver
|
|||
// syntax, but allows the minor and patch versions to be elided.
|
|||
//
|
|||
// This is a copy of the Go runtime's parseRelease from
|
|||
// https://golang.org/cl/209597.
|
|||
func parseRelease(rel string) (major, minor, patch int, ok bool) { |
|||
// Strip anything after a dash or plus.
|
|||
for i := 0; i < len(rel); i++ { |
|||
if rel[i] == '-' || rel[i] == '+' { |
|||
rel = rel[:i] |
|||
break |
|||
} |
|||
} |
|||
|
|||
next := func() (int, bool) { |
|||
for i := 0; i < len(rel); i++ { |
|||
if rel[i] == '.' { |
|||
ver, err := strconv.Atoi(rel[:i]) |
|||
rel = rel[i+1:] |
|||
return ver, err == nil |
|||
} |
|||
} |
|||
ver, err := strconv.Atoi(rel) |
|||
rel = "" |
|||
return ver, err == nil |
|||
} |
|||
if major, ok = next(); !ok || rel == "" { |
|||
return |
|||
} |
|||
if minor, ok = next(); !ok || rel == "" { |
|||
return |
|||
} |
|||
patch, ok = next() |
|||
return |
|||
} |
@ -0,0 +1,53 @@ |
|||
// Copyright 2022 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build linux && arm64
|
|||
|
|||
package cpu |
|||
|
|||
import ( |
|||
"errors" |
|||
"io" |
|||
"os" |
|||
"strings" |
|||
) |
|||
|
|||
func readLinuxProcCPUInfo() error { |
|||
f, err := os.Open("/proc/cpuinfo") |
|||
if err != nil { |
|||
return err |
|||
} |
|||
defer f.Close() |
|||
|
|||
var buf [1 << 10]byte // enough for first CPU
|
|||
n, err := io.ReadFull(f, buf[:]) |
|||
if err != nil && err != io.ErrUnexpectedEOF { |
|||
return err |
|||
} |
|||
in := string(buf[:n]) |
|||
const features = "\nFeatures : " |
|||
i := strings.Index(in, features) |
|||
if i == -1 { |
|||
return errors.New("no CPU features found") |
|||
} |
|||
in = in[i+len(features):] |
|||
if i := strings.Index(in, "\n"); i != -1 { |
|||
in = in[:i] |
|||
} |
|||
m := map[string]*bool{} |
|||
|
|||
initOptions() // need it early here; it's harmless to call twice
|
|||
for _, o := range options { |
|||
m[o.Name] = o.Feature |
|||
} |
|||
// The EVTSTRM field has alias "evstrm" in Go, but Linux calls it "evtstrm".
|
|||
m["evtstrm"] = &ARM64.HasEVTSTRM |
|||
|
|||
for _, f := range strings.Fields(in) { |
|||
if p, ok := m[f]; ok { |
|||
*p = true |
|||
} |
|||
} |
|||
return nil |
|||
} |
@ -0,0 +1,16 @@ |
|||
// Copyright 2023 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
// getAuxvFn is non-nil on Go 1.21+ (via runtime_auxv_go121.go init)
|
|||
// on platforms that use auxv.
|
|||
var getAuxvFn func() []uintptr |
|||
|
|||
func getAuxv() []uintptr { |
|||
if getAuxvFn == nil { |
|||
return nil |
|||
} |
|||
return getAuxvFn() |
|||
} |
@ -0,0 +1,18 @@ |
|||
// Copyright 2023 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build go1.21
|
|||
|
|||
package cpu |
|||
|
|||
import ( |
|||
_ "unsafe" // for linkname
|
|||
) |
|||
|
|||
//go:linkname runtime_getAuxv runtime.getAuxv
|
|||
func runtime_getAuxv() []uintptr |
|||
|
|||
func init() { |
|||
getAuxvFn = runtime_getAuxv |
|||
} |
@ -0,0 +1,26 @@ |
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
// Recreate a getsystemcfg syscall handler instead of
|
|||
// using the one provided by x/sys/unix to avoid having
|
|||
// the dependency between them. (See golang.org/issue/32102)
|
|||
// Moreover, this file will be used during the building of
|
|||
// gccgo's libgo and thus must not used a CGo method.
|
|||
|
|||
//go:build aix && gccgo
|
|||
|
|||
package cpu |
|||
|
|||
import ( |
|||
"syscall" |
|||
) |
|||
|
|||
//extern getsystemcfg
|
|||
func gccgoGetsystemcfg(label uint32) (r uint64) |
|||
|
|||
func callgetsystemcfg(label int) (r1 uintptr, e1 syscall.Errno) { |
|||
r1 = uintptr(gccgoGetsystemcfg(uint32(label))) |
|||
e1 = syscall.GetErrno() |
|||
return |
|||
} |
@ -0,0 +1,35 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
// Minimal copy of x/sys/unix so the cpu package can make a
|
|||
// system call on AIX without depending on x/sys/unix.
|
|||
// (See golang.org/issue/32102)
|
|||
|
|||
//go:build aix && ppc64 && gc
|
|||
|
|||
package cpu |
|||
|
|||
import ( |
|||
"syscall" |
|||
"unsafe" |
|||
) |
|||
|
|||
//go:cgo_import_dynamic libc_getsystemcfg getsystemcfg "libc.a/shr_64.o"
|
|||
|
|||
//go:linkname libc_getsystemcfg libc_getsystemcfg
|
|||
|
|||
type syscallFunc uintptr |
|||
|
|||
var libc_getsystemcfg syscallFunc |
|||
|
|||
type errno = syscall.Errno |
|||
|
|||
// Implemented in runtime/syscall_aix.go.
|
|||
func rawSyscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err errno) |
|||
func syscall6(trap, nargs, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err errno) |
|||
|
|||
func callgetsystemcfg(label int) (r1 uintptr, e1 errno) { |
|||
r1, _, e1 = syscall6(uintptr(unsafe.Pointer(&libc_getsystemcfg)), 1, uintptr(label), 0, 0, 0, 0, 0) |
|||
return |
|||
} |
@ -0,0 +1,13 @@ |
|||
// Copyright 2023 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build aix || darwin || dragonfly || freebsd || openbsd || solaris
|
|||
|
|||
package unix |
|||
|
|||
var mapper = &mmapper{ |
|||
active: make(map[*byte][]byte), |
|||
mmap: mmap, |
|||
munmap: munmap, |
|||
} |
@ -0,0 +1,52 @@ |
|||
// Copyright 2023 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build linux || netbsd
|
|||
|
|||
package unix |
|||
|
|||
import "unsafe" |
|||
|
|||
type mremapMmapper struct { |
|||
mmapper |
|||
mremap func(oldaddr uintptr, oldlength uintptr, newlength uintptr, flags int, newaddr uintptr) (xaddr uintptr, err error) |
|||
} |
|||
|
|||
var mapper = &mremapMmapper{ |
|||
mmapper: mmapper{ |
|||
active: make(map[*byte][]byte), |
|||
mmap: mmap, |
|||
munmap: munmap, |
|||
}, |
|||
mremap: mremap, |
|||
} |
|||
|
|||
func (m *mremapMmapper) Mremap(oldData []byte, newLength int, flags int) (data []byte, err error) { |
|||
if newLength <= 0 || len(oldData) == 0 || len(oldData) != cap(oldData) || flags&mremapFixed != 0 { |
|||
return nil, EINVAL |
|||
} |
|||
|
|||
pOld := &oldData[cap(oldData)-1] |
|||
m.Lock() |
|||
defer m.Unlock() |
|||
bOld := m.active[pOld] |
|||
if bOld == nil || &bOld[0] != &oldData[0] { |
|||
return nil, EINVAL |
|||
} |
|||
newAddr, errno := m.mremap(uintptr(unsafe.Pointer(&bOld[0])), uintptr(len(bOld)), uintptr(newLength), flags, 0) |
|||
if errno != nil { |
|||
return nil, errno |
|||
} |
|||
bNew := unsafe.Slice((*byte)(unsafe.Pointer(newAddr)), newLength) |
|||
pNew := &bNew[cap(bNew)-1] |
|||
if flags&mremapDontunmap == 0 { |
|||
delete(m.active, pOld) |
|||
} |
|||
m.active[pNew] = bNew |
|||
return bNew, nil |
|||
} |
|||
|
|||
func Mremap(oldData []byte, newLength int, flags int) (data []byte, err error) { |
|||
return mapper.Mremap(oldData, newLength, flags) |
|||
} |
File diff suppressed because it is too large
File diff suppressed because it is too large
Loading…
Reference in new issue