Wmuga
3 weeks ago
457 changed files with 26573 additions and 4843 deletions
@ -1,184 +0,0 @@ |
|||
package lib |
|||
|
|||
import ( |
|||
"fmt" |
|||
"sync" |
|||
"time" |
|||
|
|||
"github.com/ReneKroon/ttlcache" |
|||
|
|||
"github.com/pkg/errors" |
|||
) |
|||
|
|||
const cacheKeyPrefix = "cache." |
|||
|
|||
var ( |
|||
cacheCollection cache |
|||
) |
|||
|
|||
type cache struct { |
|||
items map[string]*cacheItem |
|||
mx sync.RWMutex |
|||
} |
|||
|
|||
type cacheItem struct { |
|||
// Getter определяет механизм получения данных от любого источника к/р поддерживает интерфейс
|
|||
reader Reader |
|||
cache *ttlcache.Cache |
|||
persistentCache *ttlcache.Cache |
|||
locks locks |
|||
cacheTTL time.Duration |
|||
} |
|||
|
|||
type Reader interface { |
|||
ReadSource() (res []byte, err error) |
|||
} |
|||
|
|||
func Cache() *cache { |
|||
if &cacheCollection == nil { |
|||
panic("cache has not been initialized, call CacheRegister() before use") |
|||
} |
|||
|
|||
return &cacheCollection |
|||
} |
|||
|
|||
// Register регистрируем новый кеш (указываем фукнцию, кр будет возвращать нужное значение)
|
|||
func (c *cache) Register(key string, source Reader, ttl time.Duration) (err error) { |
|||
c.mx.Lock() |
|||
defer c.mx.Unlock() |
|||
|
|||
cache := ttlcache.NewCache() |
|||
cache.SkipTtlExtensionOnHit(true) |
|||
|
|||
ci := cacheItem{ |
|||
cache: cache, |
|||
persistentCache: ttlcache.NewCache(), |
|||
locks: locks{keys: map[string]bool{}}, |
|||
reader: source, |
|||
cacheTTL: ttl, |
|||
} |
|||
c.items[key] = &ci |
|||
return err |
|||
} |
|||
|
|||
// Unregister
|
|||
func (c *cache) Unregister(key string) (err error) { |
|||
c.mx.Lock() |
|||
defer c.mx.Unlock() |
|||
|
|||
delete(c.items, key) |
|||
return err |
|||
} |
|||
|
|||
// Get возвращает текущее значение параметра в сервисе keeper.
|
|||
// Нужно учитывать, что значения на время кешируются и обновляются с заданной периодичностью.
|
|||
func (c *cache) Get(key string) (value interface{}, err error) { |
|||
var item *cacheItem |
|||
var found bool |
|||
|
|||
item, found = c.items[key] |
|||
if !found { |
|||
return nil, fmt.Errorf("error. key is not found") |
|||
} |
|||
|
|||
if item.cache == nil { |
|||
return nil, fmt.Errorf("cache is not inited") |
|||
} |
|||
|
|||
if item.persistentCache == nil { |
|||
return nil, fmt.Errorf("persistent cache is not inited") |
|||
} |
|||
|
|||
if cachedValue, ok := item.cache.Get(cacheKeyPrefix + key); ok { |
|||
return cachedValue, nil |
|||
} |
|||
|
|||
// Если стоит блокировка, значит кто-то уже обновляет кеш. В этом случае
|
|||
// пытаемся отдать предыдущее значение.
|
|||
if item.locks.Get(key) { |
|||
return c.tryToGetOldValue(key) |
|||
} |
|||
|
|||
// Значение не найдено. Первый из запросов блокирует за собой обновление (на самом деле
|
|||
// может возникнуть ситуация когда несколько запросов поставят блокировку и начнут
|
|||
// обновлять кеш - пока считаем это некритичным).
|
|||
item.locks.Set(key, true) |
|||
defer item.locks.Set(key, false) |
|||
|
|||
var values []byte |
|||
values, err = item.reader.ReadSource() |
|||
if err != nil { |
|||
return nil, errors.Wrap(err, "could not get value from getter") |
|||
} |
|||
|
|||
value = values |
|||
|
|||
item.cache.SetWithTTL(cacheKeyPrefix+key, value, item.cacheTTL) |
|||
item.persistentCache.Set(cacheKeyPrefix+key, value) |
|||
|
|||
return value, nil |
|||
} |
|||
|
|||
// tryToGetOldValue пытается получить старое значение, если в момент запроса на актуальном стоит блокировка.
|
|||
func (c *cache) tryToGetOldValue(key string) (interface{}, error) { |
|||
var item *cacheItem |
|||
var found bool |
|||
|
|||
item, found = c.items[key] |
|||
if !found { |
|||
return nil, fmt.Errorf("error. key is not found") |
|||
} |
|||
|
|||
fnGetPersistentCacheValue := func() (interface{}, error) { |
|||
if cachedValue, ok := item.persistentCache.Get(cacheKeyPrefix + key); ok { |
|||
return cachedValue, nil |
|||
} |
|||
|
|||
return nil, fmt.Errorf("persinstent cache is empty") |
|||
} |
|||
|
|||
oldValue, err := fnGetPersistentCacheValue() |
|||
|
|||
// Повторяем попытку получить значение. При старте сервиса может возникнуть блокировка
|
|||
// обновления ключа, но при этом в постоянном кеше еще может не быть значения.
|
|||
if err != nil { |
|||
time.Sleep(100 * time.Millisecond) |
|||
|
|||
oldValue, err = fnGetPersistentCacheValue() |
|||
} |
|||
|
|||
return oldValue, err |
|||
} |
|||
|
|||
// CacheInit инициализировали глобальную переменную defaultCache
|
|||
// source - источник, откуда мы получаем значения для кеширования
|
|||
func CacheRegister() { |
|||
d := cache{ |
|||
items: map[string]*cacheItem{}, |
|||
mx: sync.RWMutex{}, |
|||
} |
|||
cacheCollection = d |
|||
} |
|||
|
|||
// locks выполняет функции блокировки при одновременном обновлении значений в кеше.
|
|||
type locks struct { |
|||
// keys хранит информацию о локах по каждому отдельному ключу.
|
|||
// Если значение установлено в true, в данный момент обновление кеша захвачено одной из горутин.
|
|||
keys map[string]bool |
|||
mx sync.RWMutex |
|||
} |
|||
|
|||
// Get возвращает информацию о том идет ли в данный момент обновление конкретного ключа.
|
|||
func (l *locks) Get(key string) bool { |
|||
l.mx.RLock() |
|||
defer l.mx.RUnlock() |
|||
|
|||
return l.keys[key] |
|||
} |
|||
|
|||
// Set устанавливает блокировку на обновление конкретного ключа другими горутинами.
|
|||
func (l *locks) Set(key string, value bool) { |
|||
l.mx.Lock() |
|||
l.keys[key] = value |
|||
l.mx.Unlock() |
|||
} |
@ -0,0 +1,18 @@ |
|||
package models |
|||
|
|||
const ( |
|||
headerXRequestID = "X-Request-ID" |
|||
headerXUserID = "X-User-ID" |
|||
headerXRequestUnit = "X-Request-Unit" |
|||
headerXRequestService = "X-Request-Service" |
|||
|
|||
requestIDField = "request-id" |
|||
userIDField = "user-id" |
|||
serviceIDField = "service-id" |
|||
configIDField = "config-id" |
|||
) |
|||
|
|||
var ProxiedHeaders = map[string]string{ |
|||
requestIDField: headerXRequestID, |
|||
userIDField: headerXUserID, |
|||
} |
@ -0,0 +1,31 @@ |
|||
# Compiled Object files, Static and Dynamic libs (Shared Objects) |
|||
*.o |
|||
*.a |
|||
*.so |
|||
|
|||
# Folders |
|||
_obj |
|||
_test |
|||
|
|||
# Architecture specific extensions/prefixes |
|||
*.[568vq] |
|||
[568vq].out |
|||
|
|||
*.cgo1.go |
|||
*.cgo2.c |
|||
_cgo_defun.c |
|||
_cgo_gotypes.go |
|||
_cgo_export.* |
|||
|
|||
_testmain.go |
|||
|
|||
*.exe |
|||
*.test |
|||
*.prof |
|||
/ksuid |
|||
|
|||
# Emacs |
|||
*~ |
|||
|
|||
# govendor |
|||
/vendor/*/ |
@ -0,0 +1,21 @@ |
|||
MIT License |
|||
|
|||
Copyright (c) 2017 Segment.io |
|||
|
|||
Permission is hereby granted, free of charge, to any person obtaining a copy |
|||
of this software and associated documentation files (the "Software"), to deal |
|||
in the Software without restriction, including without limitation the rights |
|||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|||
copies of the Software, and to permit persons to whom the Software is |
|||
furnished to do so, subject to the following conditions: |
|||
|
|||
The above copyright notice and this permission notice shall be included in all |
|||
copies or substantial portions of the Software. |
|||
|
|||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|||
SOFTWARE. |
@ -0,0 +1,234 @@ |
|||
# ksuid [![Go Report Card](https://goreportcard.com/badge/github.com/segmentio/ksuid)](https://goreportcard.com/report/github.com/segmentio/ksuid) [![GoDoc](https://godoc.org/github.com/segmentio/ksuid?status.svg)](https://godoc.org/github.com/segmentio/ksuid) [![Circle CI](https://circleci.com/gh/segmentio/ksuid.svg?style=shield)](https://circleci.com/gh/segmentio/ksuid.svg?style=shield) |
|||
|
|||
ksuid is an efficient, comprehensive, battle-tested Go library for |
|||
generating and parsing a specific kind of globally unique identifier |
|||
called a *KSUID*. This library serves as its reference implementation. |
|||
|
|||
## Install |
|||
```sh |
|||
go get -u github.com/segmentio/ksuid |
|||
``` |
|||
|
|||
## What is a KSUID? |
|||
|
|||
KSUID is for K-Sortable Unique IDentifier. It is a kind of globally |
|||
unique identifier similar to a [RFC 4122 UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier), built from the ground-up to be "naturally" |
|||
sorted by generation timestamp without any special type-aware logic. |
|||
|
|||
In short, running a set of KSUIDs through the UNIX `sort` command will result |
|||
in a list ordered by generation time. |
|||
|
|||
## Why use KSUIDs? |
|||
|
|||
There are numerous methods for generating unique identifiers, so why KSUID? |
|||
|
|||
1. Naturally ordered by generation time |
|||
2. Collision-free, coordination-free, dependency-free |
|||
3. Highly portable representations |
|||
|
|||
Even if only one of these properties are important to you, KSUID is a great |
|||
choice! :) Many projects chose to use KSUIDs *just* because the text |
|||
representation is copy-and-paste friendly. |
|||
|
|||
### 1. Naturally Ordered By Generation Time |
|||
|
|||
Unlike the more ubiquitous UUIDv4, a KSUID contains a timestamp component |
|||
that allows them to be loosely sorted by generation time. This is not a strong |
|||
guarantee (an invariant) as it depends on wall clocks, but is still incredibly |
|||
useful in practice. Both the binary and text representations will sort by |
|||
creation time without any special sorting logic. |
|||
|
|||
### 2. Collision-free, Coordination-free, Dependency-free |
|||
|
|||
While RFC 4122 UUIDv1s *do* include a time component, there aren't enough |
|||
bytes of randomness to provide strong protection against collisions |
|||
(duplicates). With such a low amount of entropy, it is feasible for a |
|||
malicious party to guess generated IDs, creating a problem for systems whose |
|||
security is, implicitly or explicitly, sensitive to an adversary guessing |
|||
identifiers. |
|||
|
|||
To fit into a 64-bit number space, [Snowflake IDs](https://blog.twitter.com/2010/announcing-snowflake) |
|||
and its derivatives require coordination to avoid collisions, which |
|||
significantly increases the deployment complexity and operational burden. |
|||
|
|||
A KSUID includes 128 bits of pseudorandom data ("entropy"). This number space |
|||
is 64 times larger than the 122 bits used by the well-accepted RFC 4122 UUIDv4 |
|||
standard. The additional timestamp component can be considered "bonus entropy" |
|||
which further decreases the probability of collisions, to the point of physical |
|||
infeasibility in any practical implementation. |
|||
|
|||
### Highly Portable Representations |
|||
|
|||
The text *and* binary representations are lexicographically sortable, which |
|||
allows them to be dropped into systems which do not natively support KSUIDs |
|||
and retain their time-ordered property. |
|||
|
|||
The text representation is an alphanumeric base62 encoding, so it "fits" |
|||
anywhere alphanumeric strings are accepted. No delimiters are used, so |
|||
stringified KSUIDs won't be inadvertently truncated or tokenized when |
|||
interpreted by software that is designed for human-readable text, a common |
|||
problem for the text representation of RFC 4122 UUIDs. |
|||
|
|||
## How do KSUIDs work? |
|||
|
|||
Binary KSUIDs are 20-bytes: a 32-bit unsigned integer UTC timestamp and |
|||
a 128-bit randomly generated payload. The timestamp uses big-endian |
|||
encoding, to support lexicographic sorting. The timestamp epoch is adjusted |
|||
to March 5th, 2014, providing over 100 years of life. The payload is |
|||
generated by a cryptographically-strong pseudorandom number generator. |
|||
|
|||
The text representation is always 27 characters, encoded in alphanumeric |
|||
base62 that will lexicographically sort by timestamp. |
|||
|
|||
## High Performance |
|||
|
|||
This library is designed to be used in code paths that are performance |
|||
critical. Its code has been tuned to eliminate all non-essential |
|||
overhead. The `KSUID` type is derived from a fixed-size array, which |
|||
eliminates the additional reference chasing and allocation involved in |
|||
a variable-width type. |
|||
|
|||
The API provides an interface for use in code paths which are sensitive |
|||
to allocation. For example, the `Append` method can be used to parse the |
|||
text representation and replace the contents of a `KSUID` value |
|||
without additional heap allocation. |
|||
|
|||
All public package level "pure" functions are concurrency-safe, protected |
|||
by a global mutex. For hot loops that generate a large amount of KSUIDs |
|||
from a single Goroutine, the `Sequence` type is provided to elide the |
|||
potential contention. |
|||
|
|||
By default, out of an abundance of caution, the cryptographically-secure |
|||
PRNG is used to generate the random bits of a KSUID. This can be relaxed |
|||
in extremely performance-critical code using the included `FastRander` |
|||
type. `FastRander` uses the standard PRNG with a seed generated by the |
|||
cryptographically-secure PRNG. |
|||
|
|||
*_NOTE:_ While there is no evidence that `FastRander` will increase the |
|||
probability of a collision, it shouldn't be used in scenarios where |
|||
uniqueness is important to security, as there is an increased chance |
|||
the generated IDs can be predicted by an adversary.* |
|||
|
|||
## Battle Tested |
|||
|
|||
This code has been used in production at Segment for several years, |
|||
across a diverse array of projects. Trillions upon trillions of |
|||
KSUIDs have been generated in some of Segment's most |
|||
performance-critical, large-scale distributed systems. |
|||
|
|||
## Plays Well With Others |
|||
|
|||
Designed to be integrated with other libraries, the `KSUID` type |
|||
implements many standard library interfaces, including: |
|||
|
|||
* `Stringer` |
|||
* `database/sql.Scanner` and `database/sql/driver.Valuer` |
|||
* `encoding.BinaryMarshal` and `encoding.BinaryUnmarshal` |
|||
* `encoding.TextMarshal` and `encoding.TextUnmarshal` |
|||
(`encoding/json` friendly!) |
|||
|
|||
## Command Line Tool |
|||
|
|||
This package comes with a command-line tool `ksuid`, useful for |
|||
generating KSUIDs as well as inspecting the internal components of |
|||
existing KSUIDs. Machine-friendly output is provided for scripting |
|||
use cases. |
|||
|
|||
Given a Go build environment, it can be installed with the command: |
|||
|
|||
```sh |
|||
$ go install github.com/segmentio/ksuid/cmd/ksuid |
|||
``` |
|||
|
|||
## CLI Usage Examples |
|||
|
|||
### Generate a KSUID |
|||
|
|||
```sh |
|||
$ ksuid |
|||
0ujsswThIGTUYm2K8FjOOfXtY1K |
|||
``` |
|||
|
|||
### Generate 4 KSUIDs |
|||
|
|||
```sh |
|||
$ ksuid -n 4 |
|||
0ujsszwN8NRY24YaXiTIE2VWDTS |
|||
0ujsswThIGTUYm2K8FjOOfXtY1K |
|||
0ujssxh0cECutqzMgbtXSGnjorm |
|||
0ujsszgFvbiEr7CDgE3z8MAUPFt |
|||
``` |
|||
|
|||
### Inspect the components of a KSUID |
|||
|
|||
```sh |
|||
$ ksuid -f inspect 0ujtsYcgvSTl8PAuAdqWYSMnLOv |
|||
|
|||
REPRESENTATION: |
|||
|
|||
String: 0ujtsYcgvSTl8PAuAdqWYSMnLOv |
|||
Raw: 0669F7EFB5A1CD34B5F99D1154FB6853345C9735 |
|||
|
|||
COMPONENTS: |
|||
|
|||
Time: 2017-10-09 21:00:47 -0700 PDT |
|||
Timestamp: 107608047 |
|||
Payload: B5A1CD34B5F99D1154FB6853345C9735 |
|||
``` |
|||
|
|||
### Generate a KSUID and inspect its components |
|||
|
|||
```sh |
|||
$ ksuid -f inspect |
|||
|
|||
REPRESENTATION: |
|||
|
|||
String: 0ujzPyRiIAffKhBux4PvQdDqMHY |
|||
Raw: 066A029C73FC1AA3B2446246D6E89FCD909E8FE8 |
|||
|
|||
COMPONENTS: |
|||
|
|||
Time: 2017-10-09 21:46:20 -0700 PDT |
|||
Timestamp: 107610780 |
|||
Payload: 73FC1AA3B2446246D6E89FCD909E8FE8 |
|||
|
|||
``` |
|||
|
|||
### Inspect a KSUID with template formatted inspection output |
|||
|
|||
```sh |
|||
$ ksuid -f template -t '{{ .Time }}: {{ .Payload }}' 0ujtsYcgvSTl8PAuAdqWYSMnLOv |
|||
2017-10-09 21:00:47 -0700 PDT: B5A1CD34B5F99D1154FB6853345C9735 |
|||
``` |
|||
|
|||
### Inspect multiple KSUIDs with template formatted output |
|||
|
|||
```sh |
|||
$ ksuid -f template -t '{{ .Time }}: {{ .Payload }}' $(ksuid -n 4) |
|||
2017-10-09 21:05:37 -0700 PDT: 304102BC687E087CC3A811F21D113CCF |
|||
2017-10-09 21:05:37 -0700 PDT: EAF0B240A9BFA55E079D887120D962F0 |
|||
2017-10-09 21:05:37 -0700 PDT: DF0761769909ABB0C7BB9D66F79FC041 |
|||
2017-10-09 21:05:37 -0700 PDT: 1A8F0E3D0BDEB84A5FAD702876F46543 |
|||
``` |
|||
|
|||
### Generate KSUIDs and output JSON using template formatting |
|||
|
|||
```sh |
|||
$ ksuid -f template -t '{ "timestamp": "{{ .Timestamp }}", "payload": "{{ .Payload }}", "ksuid": "{{.String}}"}' -n 4 |
|||
{ "timestamp": "107611700", "payload": "9850EEEC191BF4FF26F99315CE43B0C8", "ksuid": "0uk1Hbc9dQ9pxyTqJ93IUrfhdGq"} |
|||
{ "timestamp": "107611700", "payload": "CC55072555316F45B8CA2D2979D3ED0A", "ksuid": "0uk1HdCJ6hUZKDgcxhpJwUl5ZEI"} |
|||
{ "timestamp": "107611700", "payload": "BA1C205D6177F0992D15EE606AE32238", "ksuid": "0uk1HcdvF0p8C20KtTfdRSB9XIm"} |
|||
{ "timestamp": "107611700", "payload": "67517BA309EA62AE7991B27BB6F2FCAC", "ksuid": "0uk1Ha7hGJ1Q9Xbnkt0yZgNwg3g"} |
|||
``` |
|||
|
|||
## Implementations for other languages |
|||
|
|||
- Python: [svix-ksuid](https://github.com/svixhq/python-ksuid/) |
|||
- Ruby: [ksuid-ruby](https://github.com/michaelherold/ksuid-ruby) |
|||
- Java: [ksuid](https://github.com/ksuid/ksuid) |
|||
- Rust: [rksuid](https://github.com/nharring/rksuid) |
|||
- dotNet: [Ksuid.Net](https://github.com/JoyMoe/Ksuid.Net) |
|||
|
|||
## License |
|||
|
|||
ksuid source code is available under an MIT [License](/LICENSE.md). |
@ -0,0 +1,202 @@ |
|||
package ksuid |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"errors" |
|||
) |
|||
|
|||
const ( |
|||
// lexographic ordering (based on Unicode table) is 0-9A-Za-z
|
|||
base62Characters = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" |
|||
zeroString = "000000000000000000000000000" |
|||
offsetUppercase = 10 |
|||
offsetLowercase = 36 |
|||
) |
|||
|
|||
var ( |
|||
errShortBuffer = errors.New("the output buffer is too small to hold to decoded value") |
|||
) |
|||
|
|||
// Converts a base 62 byte into the number value that it represents.
|
|||
func base62Value(digit byte) byte { |
|||
switch { |
|||
case digit >= '0' && digit <= '9': |
|||
return digit - '0' |
|||
case digit >= 'A' && digit <= 'Z': |
|||
return offsetUppercase + (digit - 'A') |
|||
default: |
|||
return offsetLowercase + (digit - 'a') |
|||
} |
|||
} |
|||
|
|||
// This function encodes the base 62 representation of the src KSUID in binary
|
|||
// form into dst.
|
|||
//
|
|||
// In order to support a couple of optimizations the function assumes that src
|
|||
// is 20 bytes long and dst is 27 bytes long.
|
|||
//
|
|||
// Any unused bytes in dst will be set to the padding '0' byte.
|
|||
func fastEncodeBase62(dst []byte, src []byte) { |
|||
const srcBase = 4294967296 |
|||
const dstBase = 62 |
|||
|
|||
// Split src into 5 4-byte words, this is where most of the efficiency comes
|
|||
// from because this is a O(N^2) algorithm, and we make N = N / 4 by working
|
|||
// on 32 bits at a time.
|
|||
parts := [5]uint32{ |
|||
binary.BigEndian.Uint32(src[0:4]), |
|||
binary.BigEndian.Uint32(src[4:8]), |
|||
binary.BigEndian.Uint32(src[8:12]), |
|||
binary.BigEndian.Uint32(src[12:16]), |
|||
binary.BigEndian.Uint32(src[16:20]), |
|||
} |
|||
|
|||
n := len(dst) |
|||
bp := parts[:] |
|||
bq := [5]uint32{} |
|||
|
|||
for len(bp) != 0 { |
|||
quotient := bq[:0] |
|||
remainder := uint64(0) |
|||
|
|||
for _, c := range bp { |
|||
value := uint64(c) + uint64(remainder)*srcBase |
|||
digit := value / dstBase |
|||
remainder = value % dstBase |
|||
|
|||
if len(quotient) != 0 || digit != 0 { |
|||
quotient = append(quotient, uint32(digit)) |
|||
} |
|||
} |
|||
|
|||
// Writes at the end of the destination buffer because we computed the
|
|||
// lowest bits first.
|
|||
n-- |
|||
dst[n] = base62Characters[remainder] |
|||
bp = quotient |
|||
} |
|||
|
|||
// Add padding at the head of the destination buffer for all bytes that were
|
|||
// not set.
|
|||
copy(dst[:n], zeroString) |
|||
} |
|||
|
|||
// This function appends the base 62 representation of the KSUID in src to dst,
|
|||
// and returns the extended byte slice.
|
|||
// The result is left-padded with '0' bytes to always append 27 bytes to the
|
|||
// destination buffer.
|
|||
func fastAppendEncodeBase62(dst []byte, src []byte) []byte { |
|||
dst = reserve(dst, stringEncodedLength) |
|||
n := len(dst) |
|||
fastEncodeBase62(dst[n:n+stringEncodedLength], src) |
|||
return dst[:n+stringEncodedLength] |
|||
} |
|||
|
|||
// This function decodes the base 62 representation of the src KSUID to the
|
|||
// binary form into dst.
|
|||
//
|
|||
// In order to support a couple of optimizations the function assumes that src
|
|||
// is 27 bytes long and dst is 20 bytes long.
|
|||
//
|
|||
// Any unused bytes in dst will be set to zero.
|
|||
func fastDecodeBase62(dst []byte, src []byte) error { |
|||
const srcBase = 62 |
|||
const dstBase = 4294967296 |
|||
|
|||
// This line helps BCE (Bounds Check Elimination).
|
|||
// It may be safely removed.
|
|||
_ = src[26] |
|||
|
|||
parts := [27]byte{ |
|||
base62Value(src[0]), |
|||
base62Value(src[1]), |
|||
base62Value(src[2]), |
|||
base62Value(src[3]), |
|||
base62Value(src[4]), |
|||
base62Value(src[5]), |
|||
base62Value(src[6]), |
|||
base62Value(src[7]), |
|||
base62Value(src[8]), |
|||
base62Value(src[9]), |
|||
|
|||
base62Value(src[10]), |
|||
base62Value(src[11]), |
|||
base62Value(src[12]), |
|||
base62Value(src[13]), |
|||
base62Value(src[14]), |
|||
base62Value(src[15]), |
|||
base62Value(src[16]), |
|||
base62Value(src[17]), |
|||
base62Value(src[18]), |
|||
base62Value(src[19]), |
|||
|
|||
base62Value(src[20]), |
|||
base62Value(src[21]), |
|||
base62Value(src[22]), |
|||
base62Value(src[23]), |
|||
base62Value(src[24]), |
|||
base62Value(src[25]), |
|||
base62Value(src[26]), |
|||
} |
|||
|
|||
n := len(dst) |
|||
bp := parts[:] |
|||
bq := [stringEncodedLength]byte{} |
|||
|
|||
for len(bp) > 0 { |
|||
quotient := bq[:0] |
|||
remainder := uint64(0) |
|||
|
|||
for _, c := range bp { |
|||
value := uint64(c) + uint64(remainder)*srcBase |
|||
digit := value / dstBase |
|||
remainder = value % dstBase |
|||
|
|||
if len(quotient) != 0 || digit != 0 { |
|||
quotient = append(quotient, byte(digit)) |
|||
} |
|||
} |
|||
|
|||
if n < 4 { |
|||
return errShortBuffer |
|||
} |
|||
|
|||
dst[n-4] = byte(remainder >> 24) |
|||
dst[n-3] = byte(remainder >> 16) |
|||
dst[n-2] = byte(remainder >> 8) |
|||
dst[n-1] = byte(remainder) |
|||
n -= 4 |
|||
bp = quotient |
|||
} |
|||
|
|||
var zero [20]byte |
|||
copy(dst[:n], zero[:]) |
|||
return nil |
|||
} |
|||
|
|||
// This function appends the base 62 decoded version of src into dst.
|
|||
func fastAppendDecodeBase62(dst []byte, src []byte) []byte { |
|||
dst = reserve(dst, byteLength) |
|||
n := len(dst) |
|||
fastDecodeBase62(dst[n:n+byteLength], src) |
|||
return dst[:n+byteLength] |
|||
} |
|||
|
|||
// Ensures that at least nbytes are available in the remaining capacity of the
|
|||
// destination slice, if not, a new copy is made and returned by the function.
|
|||
func reserve(dst []byte, nbytes int) []byte { |
|||
c := cap(dst) |
|||
n := len(dst) |
|||
|
|||
if avail := c - n; avail < nbytes { |
|||
c *= 2 |
|||
if (c - n) < nbytes { |
|||
c = n + nbytes |
|||
} |
|||
b := make([]byte, n, c) |
|||
copy(b, dst) |
|||
dst = b |
|||
} |
|||
|
|||
return dst |
|||
} |
@ -0,0 +1,352 @@ |
|||
package ksuid |
|||
|
|||
import ( |
|||
"bytes" |
|||
"crypto/rand" |
|||
"database/sql/driver" |
|||
"encoding/binary" |
|||
"fmt" |
|||
"io" |
|||
"math" |
|||
"sync" |
|||
"time" |
|||
) |
|||
|
|||
const ( |
|||
// KSUID's epoch starts more recently so that the 32-bit number space gives a
|
|||
// significantly higher useful lifetime of around 136 years from March 2017.
|
|||
// This number (14e8) was picked to be easy to remember.
|
|||
epochStamp int64 = 1400000000 |
|||
|
|||
// Timestamp is a uint32
|
|||
timestampLengthInBytes = 4 |
|||
|
|||
// Payload is 16-bytes
|
|||
payloadLengthInBytes = 16 |
|||
|
|||
// KSUIDs are 20 bytes when binary encoded
|
|||
byteLength = timestampLengthInBytes + payloadLengthInBytes |
|||
|
|||
// The length of a KSUID when string (base62) encoded
|
|||
stringEncodedLength = 27 |
|||
|
|||
// A string-encoded minimum value for a KSUID
|
|||
minStringEncoded = "000000000000000000000000000" |
|||
|
|||
// A string-encoded maximum value for a KSUID
|
|||
maxStringEncoded = "aWgEPTl1tmebfsQzFP4bxwgy80V" |
|||
) |
|||
|
|||
// KSUIDs are 20 bytes:
|
|||
// 00-03 byte: uint32 BE UTC timestamp with custom epoch
|
|||
// 04-19 byte: random "payload"
|
|||
type KSUID [byteLength]byte |
|||
|
|||
var ( |
|||
rander = rand.Reader |
|||
randMutex = sync.Mutex{} |
|||
randBuffer = [payloadLengthInBytes]byte{} |
|||
|
|||
errSize = fmt.Errorf("Valid KSUIDs are %v bytes", byteLength) |
|||
errStrSize = fmt.Errorf("Valid encoded KSUIDs are %v characters", stringEncodedLength) |
|||
errStrValue = fmt.Errorf("Valid encoded KSUIDs are bounded by %s and %s", minStringEncoded, maxStringEncoded) |
|||
errPayloadSize = fmt.Errorf("Valid KSUID payloads are %v bytes", payloadLengthInBytes) |
|||
|
|||
// Represents a completely empty (invalid) KSUID
|
|||
Nil KSUID |
|||
// Represents the highest value a KSUID can have
|
|||
Max = KSUID{255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255} |
|||
) |
|||
|
|||
// Append appends the string representation of i to b, returning a slice to a
|
|||
// potentially larger memory area.
|
|||
func (i KSUID) Append(b []byte) []byte { |
|||
return fastAppendEncodeBase62(b, i[:]) |
|||
} |
|||
|
|||
// The timestamp portion of the ID as a Time object
|
|||
func (i KSUID) Time() time.Time { |
|||
return correctedUTCTimestampToTime(i.Timestamp()) |
|||
} |
|||
|
|||
// The timestamp portion of the ID as a bare integer which is uncorrected
|
|||
// for KSUID's special epoch.
|
|||
func (i KSUID) Timestamp() uint32 { |
|||
return binary.BigEndian.Uint32(i[:timestampLengthInBytes]) |
|||
} |
|||
|
|||
// The 16-byte random payload without the timestamp
|
|||
func (i KSUID) Payload() []byte { |
|||
return i[timestampLengthInBytes:] |
|||
} |
|||
|
|||
// String-encoded representation that can be passed through Parse()
|
|||
func (i KSUID) String() string { |
|||
return string(i.Append(make([]byte, 0, stringEncodedLength))) |
|||
} |
|||
|
|||
// Raw byte representation of KSUID
|
|||
func (i KSUID) Bytes() []byte { |
|||
// Safe because this is by-value
|
|||
return i[:] |
|||
} |
|||
|
|||
// IsNil returns true if this is a "nil" KSUID
|
|||
func (i KSUID) IsNil() bool { |
|||
return i == Nil |
|||
} |
|||
|
|||
// Get satisfies the flag.Getter interface, making it possible to use KSUIDs as
|
|||
// part of of the command line options of a program.
|
|||
func (i KSUID) Get() interface{} { |
|||
return i |
|||
} |
|||
|
|||
// Set satisfies the flag.Value interface, making it possible to use KSUIDs as
|
|||
// part of of the command line options of a program.
|
|||
func (i *KSUID) Set(s string) error { |
|||
return i.UnmarshalText([]byte(s)) |
|||
} |
|||
|
|||
func (i KSUID) MarshalText() ([]byte, error) { |
|||
return []byte(i.String()), nil |
|||
} |
|||
|
|||
func (i KSUID) MarshalBinary() ([]byte, error) { |
|||
return i.Bytes(), nil |
|||
} |
|||
|
|||
func (i *KSUID) UnmarshalText(b []byte) error { |
|||
id, err := Parse(string(b)) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
*i = id |
|||
return nil |
|||
} |
|||
|
|||
func (i *KSUID) UnmarshalBinary(b []byte) error { |
|||
id, err := FromBytes(b) |
|||
if err != nil { |
|||
return err |
|||
} |
|||
*i = id |
|||
return nil |
|||
} |
|||
|
|||
// Value converts the KSUID into a SQL driver value which can be used to
|
|||
// directly use the KSUID as parameter to a SQL query.
|
|||
func (i KSUID) Value() (driver.Value, error) { |
|||
if i.IsNil() { |
|||
return nil, nil |
|||
} |
|||
return i.String(), nil |
|||
} |
|||
|
|||
// Scan implements the sql.Scanner interface. It supports converting from
|
|||
// string, []byte, or nil into a KSUID value. Attempting to convert from
|
|||
// another type will return an error.
|
|||
func (i *KSUID) Scan(src interface{}) error { |
|||
switch v := src.(type) { |
|||
case nil: |
|||
return i.scan(nil) |
|||
case []byte: |
|||
return i.scan(v) |
|||
case string: |
|||
return i.scan([]byte(v)) |
|||
default: |
|||
return fmt.Errorf("Scan: unable to scan type %T into KSUID", v) |
|||
} |
|||
} |
|||
|
|||
func (i *KSUID) scan(b []byte) error { |
|||
switch len(b) { |
|||
case 0: |
|||
*i = Nil |
|||
return nil |
|||
case byteLength: |
|||
return i.UnmarshalBinary(b) |
|||
case stringEncodedLength: |
|||
return i.UnmarshalText(b) |
|||
default: |
|||
return errSize |
|||
} |
|||
} |
|||
|
|||
// Parse decodes a string-encoded representation of a KSUID object
|
|||
func Parse(s string) (KSUID, error) { |
|||
if len(s) != stringEncodedLength { |
|||
return Nil, errStrSize |
|||
} |
|||
|
|||
src := [stringEncodedLength]byte{} |
|||
dst := [byteLength]byte{} |
|||
|
|||
copy(src[:], s[:]) |
|||
|
|||
if err := fastDecodeBase62(dst[:], src[:]); err != nil { |
|||
return Nil, errStrValue |
|||
} |
|||
|
|||
return FromBytes(dst[:]) |
|||
} |
|||
|
|||
func timeToCorrectedUTCTimestamp(t time.Time) uint32 { |
|||
return uint32(t.Unix() - epochStamp) |
|||
} |
|||
|
|||
func correctedUTCTimestampToTime(ts uint32) time.Time { |
|||
return time.Unix(int64(ts)+epochStamp, 0) |
|||
} |
|||
|
|||
// Generates a new KSUID. In the strange case that random bytes
|
|||
// can't be read, it will panic.
|
|||
func New() KSUID { |
|||
ksuid, err := NewRandom() |
|||
if err != nil { |
|||
panic(fmt.Sprintf("Couldn't generate KSUID, inconceivable! error: %v", err)) |
|||
} |
|||
return ksuid |
|||
} |
|||
|
|||
// Generates a new KSUID
|
|||
func NewRandom() (ksuid KSUID, err error) { |
|||
return NewRandomWithTime(time.Now()) |
|||
} |
|||
|
|||
func NewRandomWithTime(t time.Time) (ksuid KSUID, err error) { |
|||
// Go's default random number generators are not safe for concurrent use by
|
|||
// multiple goroutines, the use of the rander and randBuffer are explicitly
|
|||
// synchronized here.
|
|||
randMutex.Lock() |
|||
|
|||
_, err = io.ReadAtLeast(rander, randBuffer[:], len(randBuffer)) |
|||
copy(ksuid[timestampLengthInBytes:], randBuffer[:]) |
|||
|
|||
randMutex.Unlock() |
|||
|
|||
if err != nil { |
|||
ksuid = Nil // don't leak random bytes on error
|
|||
return |
|||
} |
|||
|
|||
ts := timeToCorrectedUTCTimestamp(t) |
|||
binary.BigEndian.PutUint32(ksuid[:timestampLengthInBytes], ts) |
|||
return |
|||
} |
|||
|
|||
// Constructs a KSUID from constituent parts
|
|||
func FromParts(t time.Time, payload []byte) (KSUID, error) { |
|||
if len(payload) != payloadLengthInBytes { |
|||
return Nil, errPayloadSize |
|||
} |
|||
|
|||
var ksuid KSUID |
|||
|
|||
ts := timeToCorrectedUTCTimestamp(t) |
|||
binary.BigEndian.PutUint32(ksuid[:timestampLengthInBytes], ts) |
|||
|
|||
copy(ksuid[timestampLengthInBytes:], payload) |
|||
|
|||
return ksuid, nil |
|||
} |
|||
|
|||
// Constructs a KSUID from a 20-byte binary representation
|
|||
func FromBytes(b []byte) (KSUID, error) { |
|||
var ksuid KSUID |
|||
|
|||
if len(b) != byteLength { |
|||
return Nil, errSize |
|||
} |
|||
|
|||
copy(ksuid[:], b) |
|||
return ksuid, nil |
|||
} |
|||
|
|||
// Sets the global source of random bytes for KSUID generation. This
|
|||
// should probably only be set once globally. While this is technically
|
|||
// thread-safe as in it won't cause corruption, there's no guarantee
|
|||
// on ordering.
|
|||
func SetRand(r io.Reader) { |
|||
if r == nil { |
|||
rander = rand.Reader |
|||
return |
|||
} |
|||
rander = r |
|||
} |
|||
|
|||
// Implements comparison for KSUID type
|
|||
func Compare(a, b KSUID) int { |
|||
return bytes.Compare(a[:], b[:]) |
|||
} |
|||
|
|||
// Sorts the given slice of KSUIDs
|
|||
func Sort(ids []KSUID) { |
|||
quickSort(ids, 0, len(ids)-1) |
|||
} |
|||
|
|||
// IsSorted checks whether a slice of KSUIDs is sorted
|
|||
func IsSorted(ids []KSUID) bool { |
|||
if len(ids) != 0 { |
|||
min := ids[0] |
|||
for _, id := range ids[1:] { |
|||
if bytes.Compare(min[:], id[:]) > 0 { |
|||
return false |
|||
} |
|||
min = id |
|||
} |
|||
} |
|||
return true |
|||
} |
|||
|
|||
func quickSort(a []KSUID, lo int, hi int) { |
|||
if lo < hi { |
|||
pivot := a[hi] |
|||
i := lo - 1 |
|||
|
|||
for j, n := lo, hi; j != n; j++ { |
|||
if bytes.Compare(a[j][:], pivot[:]) < 0 { |
|||
i++ |
|||
a[i], a[j] = a[j], a[i] |
|||
} |
|||
} |
|||
|
|||
i++ |
|||
if bytes.Compare(a[hi][:], a[i][:]) < 0 { |
|||
a[i], a[hi] = a[hi], a[i] |
|||
} |
|||
|
|||
quickSort(a, lo, i-1) |
|||
quickSort(a, i+1, hi) |
|||
} |
|||
} |
|||
|
|||
// Next returns the next KSUID after id.
|
|||
func (id KSUID) Next() KSUID { |
|||
zero := makeUint128(0, 0) |
|||
|
|||
t := id.Timestamp() |
|||
u := uint128Payload(id) |
|||
v := add128(u, makeUint128(0, 1)) |
|||
|
|||
if v == zero { // overflow
|
|||
t++ |
|||
} |
|||
|
|||
return v.ksuid(t) |
|||
} |
|||
|
|||
// Prev returns the previoud KSUID before id.
|
|||
func (id KSUID) Prev() KSUID { |
|||
max := makeUint128(math.MaxUint64, math.MaxUint64) |
|||
|
|||
t := id.Timestamp() |
|||
u := uint128Payload(id) |
|||
v := sub128(u, makeUint128(0, 1)) |
|||
|
|||
if v == max { // overflow
|
|||
t-- |
|||
} |
|||
|
|||
return v.ksuid(t) |
|||
} |
@ -0,0 +1,55 @@ |
|||
package ksuid |
|||
|
|||
import ( |
|||
cryptoRand "crypto/rand" |
|||
"encoding/binary" |
|||
"io" |
|||
"math/rand" |
|||
) |
|||
|
|||
// FastRander is an io.Reader that uses math/rand and is optimized for
|
|||
// generating 16 bytes KSUID payloads. It is intended to be used as a
|
|||
// performance improvements for programs that have no need for
|
|||
// cryptographically secure KSUIDs and are generating a lot of them.
|
|||
var FastRander = newRBG() |
|||
|
|||
func newRBG() io.Reader { |
|||
r, err := newRandomBitsGenerator() |
|||
if err != nil { |
|||
panic(err) |
|||
} |
|||
return r |
|||
} |
|||
|
|||
func newRandomBitsGenerator() (r io.Reader, err error) { |
|||
var seed int64 |
|||
|
|||
if seed, err = readCryptoRandomSeed(); err != nil { |
|||
return |
|||
} |
|||
|
|||
r = &randSourceReader{source: rand.NewSource(seed).(rand.Source64)} |
|||
return |
|||
} |
|||
|
|||
func readCryptoRandomSeed() (seed int64, err error) { |
|||
var b [8]byte |
|||
|
|||
if _, err = io.ReadFull(cryptoRand.Reader, b[:]); err != nil { |
|||
return |
|||
} |
|||
|
|||
seed = int64(binary.LittleEndian.Uint64(b[:])) |
|||
return |
|||
} |
|||
|
|||
type randSourceReader struct { |
|||
source rand.Source64 |
|||
} |
|||
|
|||
func (r *randSourceReader) Read(b []byte) (int, error) { |
|||
// optimized for generating 16 bytes payloads
|
|||
binary.LittleEndian.PutUint64(b[:8], r.source.Uint64()) |
|||
binary.LittleEndian.PutUint64(b[8:], r.source.Uint64()) |
|||
return 16, nil |
|||
} |
@ -0,0 +1,55 @@ |
|||
package ksuid |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"errors" |
|||
"math" |
|||
) |
|||
|
|||
// Sequence is a KSUID generator which produces a sequence of ordered KSUIDs
|
|||
// from a seed.
|
|||
//
|
|||
// Up to 65536 KSUIDs can be generated by for a single seed.
|
|||
//
|
|||
// A typical usage of a Sequence looks like this:
|
|||
//
|
|||
// seq := ksuid.Sequence{
|
|||
// Seed: ksuid.New(),
|
|||
// }
|
|||
// id, err := seq.Next()
|
|||
//
|
|||
// Sequence values are not safe to use concurrently from multiple goroutines.
|
|||
type Sequence struct { |
|||
// The seed is used as base for the KSUID generator, all generated KSUIDs
|
|||
// share the same leading 18 bytes of the seed.
|
|||
Seed KSUID |
|||
count uint32 // uint32 for overflow, only 2 bytes are used
|
|||
} |
|||
|
|||
// Next produces the next KSUID in the sequence, or returns an error if the
|
|||
// sequence has been exhausted.
|
|||
func (seq *Sequence) Next() (KSUID, error) { |
|||
id := seq.Seed // copy
|
|||
count := seq.count |
|||
if count > math.MaxUint16 { |
|||
return Nil, errors.New("too many IDs were generated") |
|||
} |
|||
seq.count++ |
|||
return withSequenceNumber(id, uint16(count)), nil |
|||
} |
|||
|
|||
// Bounds returns the inclusive min and max bounds of the KSUIDs that may be
|
|||
// generated by the sequence. If all ids have been generated already then the
|
|||
// returned min value is equal to the max.
|
|||
func (seq *Sequence) Bounds() (min KSUID, max KSUID) { |
|||
count := seq.count |
|||
if count > math.MaxUint16 { |
|||
count = math.MaxUint16 |
|||
} |
|||
return withSequenceNumber(seq.Seed, uint16(count)), withSequenceNumber(seq.Seed, math.MaxUint16) |
|||
} |
|||
|
|||
func withSequenceNumber(id KSUID, n uint16) KSUID { |
|||
binary.BigEndian.PutUint16(id[len(id)-2:], n) |
|||
return id |
|||
} |
@ -0,0 +1,343 @@ |
|||
package ksuid |
|||
|
|||
import ( |
|||
"bytes" |
|||
"encoding/binary" |
|||
) |
|||
|
|||
// CompressedSet is an immutable data type which stores a set of KSUIDs.
|
|||
type CompressedSet []byte |
|||
|
|||
// Iter returns an iterator that produces all KSUIDs in the set.
|
|||
func (set CompressedSet) Iter() CompressedSetIter { |
|||
return CompressedSetIter{ |
|||
content: []byte(set), |
|||
} |
|||
} |
|||
|
|||
// String satisfies the fmt.Stringer interface, returns a human-readable string
|
|||
// representation of the set.
|
|||
func (set CompressedSet) String() string { |
|||
b := bytes.Buffer{} |
|||
b.WriteByte('[') |
|||
set.writeTo(&b) |
|||
b.WriteByte(']') |
|||
return b.String() |
|||
} |
|||
|
|||
// String satisfies the fmt.GoStringer interface, returns a Go representation of
|
|||
// the set.
|
|||
func (set CompressedSet) GoString() string { |
|||
b := bytes.Buffer{} |
|||
b.WriteString("ksuid.CompressedSet{") |
|||
set.writeTo(&b) |
|||
b.WriteByte('}') |
|||
return b.String() |
|||
} |
|||
|
|||
func (set CompressedSet) writeTo(b *bytes.Buffer) { |
|||
a := [27]byte{} |
|||
|
|||
for i, it := 0, set.Iter(); it.Next(); i++ { |
|||
if i != 0 { |
|||
b.WriteString(", ") |
|||
} |
|||
b.WriteByte('"') |
|||
it.KSUID.Append(a[:0]) |
|||
b.Write(a[:]) |
|||
b.WriteByte('"') |
|||
} |
|||
} |
|||
|
|||
// Compress creates and returns a compressed set of KSUIDs from the list given
|
|||
// as arguments.
|
|||
func Compress(ids ...KSUID) CompressedSet { |
|||
c := 1 + byteLength + (len(ids) / 5) |
|||
b := make([]byte, 0, c) |
|||
return AppendCompressed(b, ids...) |
|||
} |
|||
|
|||
// AppendCompressed uses the given byte slice as pre-allocated storage space to
|
|||
// build a KSUID set.
|
|||
//
|
|||
// Note that the set uses a compression technique to store the KSUIDs, so the
|
|||
// resuling length is not 20 x len(ids). The rule of thumb here is for the given
|
|||
// byte slice to reserve the amount of memory that the application would be OK
|
|||
// to waste.
|
|||
func AppendCompressed(set []byte, ids ...KSUID) CompressedSet { |
|||
if len(ids) != 0 { |
|||
if !IsSorted(ids) { |
|||
Sort(ids) |
|||
} |
|||
one := makeUint128(0, 1) |
|||
|
|||
// The first KSUID is always written to the set, this is the starting
|
|||
// point for all deltas.
|
|||
set = append(set, byte(rawKSUID)) |
|||
set = append(set, ids[0][:]...) |
|||
|
|||
timestamp := ids[0].Timestamp() |
|||
lastKSUID := ids[0] |
|||
lastValue := uint128Payload(ids[0]) |
|||
|
|||
for i := 1; i != len(ids); i++ { |
|||
id := ids[i] |
|||
|
|||
if id == lastKSUID { |
|||
continue |
|||
} |
|||
|
|||
t := id.Timestamp() |
|||
v := uint128Payload(id) |
|||
|
|||
if t != timestamp { |
|||
d := t - timestamp |
|||
n := varintLength32(d) |
|||
|
|||
set = append(set, timeDelta|byte(n)) |
|||
set = appendVarint32(set, d, n) |
|||
set = append(set, id[timestampLengthInBytes:]...) |
|||
|
|||
timestamp = t |
|||
} else { |
|||
d := sub128(v, lastValue) |
|||
|
|||
if d != one { |
|||
n := varintLength128(d) |
|||
|
|||
set = append(set, payloadDelta|byte(n)) |
|||
set = appendVarint128(set, d, n) |
|||
} else { |
|||
l, c := rangeLength(ids[i+1:], t, id, v) |
|||
m := uint64(l + 1) |
|||
n := varintLength64(m) |
|||
|
|||
set = append(set, payloadRange|byte(n)) |
|||
set = appendVarint64(set, m, n) |
|||
|
|||
i += c |
|||
id = ids[i] |
|||
v = uint128Payload(id) |
|||
} |
|||
} |
|||
|
|||
lastKSUID = id |
|||
lastValue = v |
|||
} |
|||
} |
|||
return CompressedSet(set) |
|||
} |
|||
|
|||
func rangeLength(ids []KSUID, timestamp uint32, lastKSUID KSUID, lastValue uint128) (length int, count int) { |
|||
one := makeUint128(0, 1) |
|||
|
|||
for i := range ids { |
|||
id := ids[i] |
|||
|
|||
if id == lastKSUID { |
|||
continue |
|||
} |
|||
|
|||
if id.Timestamp() != timestamp { |
|||
count = i |
|||
return |
|||
} |
|||
|
|||
v := uint128Payload(id) |
|||
|
|||
if sub128(v, lastValue) != one { |
|||
count = i |
|||
return |
|||
} |
|||
|
|||
lastKSUID = id |
|||
lastValue = v |
|||
length++ |
|||
} |
|||
|
|||
count = len(ids) |
|||
return |
|||
} |
|||
|
|||
func appendVarint128(b []byte, v uint128, n int) []byte { |
|||
c := v.bytes() |
|||
return append(b, c[len(c)-n:]...) |
|||
} |
|||
|
|||
func appendVarint64(b []byte, v uint64, n int) []byte { |
|||
c := [8]byte{} |
|||
binary.BigEndian.PutUint64(c[:], v) |
|||
return append(b, c[len(c)-n:]...) |
|||
} |
|||
|
|||
func appendVarint32(b []byte, v uint32, n int) []byte { |
|||
c := [4]byte{} |
|||
binary.BigEndian.PutUint32(c[:], v) |
|||
return append(b, c[len(c)-n:]...) |
|||
} |
|||
|
|||
func varint128(b []byte) uint128 { |
|||
a := [16]byte{} |
|||
copy(a[16-len(b):], b) |
|||
return makeUint128FromPayload(a[:]) |
|||
} |
|||
|
|||
func varint64(b []byte) uint64 { |
|||
a := [8]byte{} |
|||
copy(a[8-len(b):], b) |
|||
return binary.BigEndian.Uint64(a[:]) |
|||
} |
|||
|
|||
func varint32(b []byte) uint32 { |
|||
a := [4]byte{} |
|||
copy(a[4-len(b):], b) |
|||
return binary.BigEndian.Uint32(a[:]) |
|||
} |
|||
|
|||
func varintLength128(v uint128) int { |
|||
if v[1] != 0 { |
|||
return 8 + varintLength64(v[1]) |
|||
} |
|||
return varintLength64(v[0]) |
|||
} |
|||
|
|||
func varintLength64(v uint64) int { |
|||
switch { |
|||
case (v & 0xFFFFFFFFFFFFFF00) == 0: |
|||
return 1 |
|||
case (v & 0xFFFFFFFFFFFF0000) == 0: |
|||
return 2 |
|||
case (v & 0xFFFFFFFFFF000000) == 0: |
|||
return 3 |
|||
case (v & 0xFFFFFFFF00000000) == 0: |
|||
return 4 |
|||
case (v & 0xFFFFFF0000000000) == 0: |
|||
return 5 |
|||
case (v & 0xFFFF000000000000) == 0: |
|||
return 6 |
|||
case (v & 0xFF00000000000000) == 0: |
|||
return 7 |
|||
default: |
|||
return 8 |
|||
} |
|||
} |
|||
|
|||
func varintLength32(v uint32) int { |
|||
switch { |
|||
case (v & 0xFFFFFF00) == 0: |
|||
return 1 |
|||
case (v & 0xFFFF0000) == 0: |
|||
return 2 |
|||
case (v & 0xFF000000) == 0: |
|||
return 3 |
|||
default: |
|||
return 4 |
|||
} |
|||
} |
|||
|
|||
const ( |
|||
rawKSUID = 0 |
|||
timeDelta = (1 << 6) |
|||
payloadDelta = (1 << 7) |
|||
payloadRange = (1 << 6) | (1 << 7) |
|||
) |
|||
|
|||
// CompressedSetIter is an iterator type returned by Set.Iter to produce the
|
|||
// list of KSUIDs stored in a set.
|
|||
//
|
|||
// Here's is how the iterator type is commonly used:
|
|||
//
|
|||
// for it := set.Iter(); it.Next(); {
|
|||
// id := it.KSUID
|
|||
// // ...
|
|||
// }
|
|||
//
|
|||
// CompressedSetIter values are not safe to use concurrently from multiple
|
|||
// goroutines.
|
|||
type CompressedSetIter struct { |
|||
// KSUID is modified by calls to the Next method to hold the KSUID loaded
|
|||
// by the iterator.
|
|||
KSUID KSUID |
|||
|
|||
content []byte |
|||
offset int |
|||
|
|||
seqlength uint64 |
|||
timestamp uint32 |
|||
lastValue uint128 |
|||
} |
|||
|
|||
// Next moves the iterator forward, returning true if there a KSUID was found,
|
|||
// or false if the iterator as reached the end of the set it was created from.
|
|||
func (it *CompressedSetIter) Next() bool { |
|||
if it.seqlength != 0 { |
|||
value := incr128(it.lastValue) |
|||
it.KSUID = value.ksuid(it.timestamp) |
|||
it.seqlength-- |
|||
it.lastValue = value |
|||
return true |
|||
} |
|||
|
|||
if it.offset == len(it.content) { |
|||
return false |
|||
} |
|||
|
|||
b := it.content[it.offset] |
|||
it.offset++ |
|||
|
|||
const mask = rawKSUID | timeDelta | payloadDelta | payloadRange |
|||
tag := int(b) & mask |
|||
cnt := int(b) & ^mask |
|||
|
|||
switch tag { |
|||
case rawKSUID: |
|||
off0 := it.offset |
|||
off1 := off0 + byteLength |
|||
|
|||
copy(it.KSUID[:], it.content[off0:off1]) |
|||
|
|||
it.offset = off1 |
|||
it.timestamp = it.KSUID.Timestamp() |
|||
it.lastValue = uint128Payload(it.KSUID) |
|||
|
|||
case timeDelta: |
|||
off0 := it.offset |
|||
off1 := off0 + cnt |
|||
off2 := off1 + payloadLengthInBytes |
|||
|
|||
it.timestamp += varint32(it.content[off0:off1]) |
|||
|
|||
binary.BigEndian.PutUint32(it.KSUID[:timestampLengthInBytes], it.timestamp) |
|||
copy(it.KSUID[timestampLengthInBytes:], it.content[off1:off2]) |
|||
|
|||
it.offset = off2 |
|||
it.lastValue = uint128Payload(it.KSUID) |
|||
|
|||
case payloadDelta: |
|||
off0 := it.offset |
|||
off1 := off0 + cnt |
|||
|
|||
delta := varint128(it.content[off0:off1]) |
|||
value := add128(it.lastValue, delta) |
|||
|
|||
it.KSUID = value.ksuid(it.timestamp) |
|||
it.offset = off1 |
|||
it.lastValue = value |
|||
|
|||
case payloadRange: |
|||
off0 := it.offset |
|||
off1 := off0 + cnt |
|||
|
|||
value := incr128(it.lastValue) |
|||
it.KSUID = value.ksuid(it.timestamp) |
|||
it.seqlength = varint64(it.content[off0:off1]) |
|||
it.offset = off1 |
|||
it.seqlength-- |
|||
it.lastValue = value |
|||
|
|||
default: |
|||
panic("KSUID set iterator is reading malformed data") |
|||
} |
|||
|
|||
return true |
|||
} |
@ -0,0 +1,141 @@ |
|||
package ksuid |
|||
|
|||
import "fmt" |
|||
|
|||
// uint128 represents an unsigned 128 bits little endian integer.
|
|||
type uint128 [2]uint64 |
|||
|
|||
func uint128Payload(ksuid KSUID) uint128 { |
|||
return makeUint128FromPayload(ksuid[timestampLengthInBytes:]) |
|||
} |
|||
|
|||
func makeUint128(high uint64, low uint64) uint128 { |
|||
return uint128{low, high} |
|||
} |
|||
|
|||
func makeUint128FromPayload(payload []byte) uint128 { |
|||
return uint128{ |
|||
// low
|
|||
uint64(payload[8])<<56 | |
|||
uint64(payload[9])<<48 | |
|||
uint64(payload[10])<<40 | |
|||
uint64(payload[11])<<32 | |
|||
uint64(payload[12])<<24 | |
|||
uint64(payload[13])<<16 | |
|||
uint64(payload[14])<<8 | |
|||
uint64(payload[15]), |
|||
// high
|
|||
uint64(payload[0])<<56 | |
|||
uint64(payload[1])<<48 | |
|||
uint64(payload[2])<<40 | |
|||
uint64(payload[3])<<32 | |
|||
uint64(payload[4])<<24 | |
|||
uint64(payload[5])<<16 | |
|||
uint64(payload[6])<<8 | |
|||
uint64(payload[7]), |
|||
} |
|||
} |
|||
|
|||
func (v uint128) ksuid(timestamp uint32) KSUID { |
|||
return KSUID{ |
|||
// time
|
|||
byte(timestamp >> 24), |
|||
byte(timestamp >> 16), |
|||
byte(timestamp >> 8), |
|||
byte(timestamp), |
|||
|
|||
// high
|
|||
byte(v[1] >> 56), |
|||
byte(v[1] >> 48), |
|||
byte(v[1] >> 40), |
|||
byte(v[1] >> 32), |
|||
byte(v[1] >> 24), |
|||
byte(v[1] >> 16), |
|||
byte(v[1] >> 8), |
|||
byte(v[1]), |
|||
|
|||
// low
|
|||
byte(v[0] >> 56), |
|||
byte(v[0] >> 48), |
|||
byte(v[0] >> 40), |
|||
byte(v[0] >> 32), |
|||
byte(v[0] >> 24), |
|||
byte(v[0] >> 16), |
|||
byte(v[0] >> 8), |
|||
byte(v[0]), |
|||
} |
|||
} |
|||
|
|||
func (v uint128) bytes() [16]byte { |
|||
return [16]byte{ |
|||
// high
|
|||
byte(v[1] >> 56), |
|||
byte(v[1] >> 48), |
|||
byte(v[1] >> 40), |
|||
byte(v[1] >> 32), |
|||
byte(v[1] >> 24), |
|||
byte(v[1] >> 16), |
|||
byte(v[1] >> 8), |
|||
byte(v[1]), |
|||
|
|||
// low
|
|||
byte(v[0] >> 56), |
|||
byte(v[0] >> 48), |
|||
byte(v[0] >> 40), |
|||
byte(v[0] >> 32), |
|||
byte(v[0] >> 24), |
|||
byte(v[0] >> 16), |
|||
byte(v[0] >> 8), |
|||
byte(v[0]), |
|||
} |
|||
} |
|||
|
|||
func (v uint128) String() string { |
|||
return fmt.Sprintf("0x%016X%016X", v[0], v[1]) |
|||
} |
|||
|
|||
const wordBitSize = 64 |
|||
|
|||
func cmp128(x, y uint128) int { |
|||
if x[1] < y[1] { |
|||
return -1 |
|||
} |
|||
if x[1] > y[1] { |
|||
return 1 |
|||
} |
|||
if x[0] < y[0] { |
|||
return -1 |
|||
} |
|||
if x[0] > y[0] { |
|||
return 1 |
|||
} |
|||
return 0 |
|||
} |
|||
|
|||
func add128(x, y uint128) (z uint128) { |
|||
x0 := x[0] |
|||
y0 := y[0] |
|||
z0 := x0 + y0 |
|||
z[0] = z0 |
|||
|
|||
c := (x0&y0 | (x0|y0)&^z0) >> (wordBitSize - 1) |
|||
|
|||
z[1] = x[1] + y[1] + c |
|||
return |
|||
} |
|||
|
|||
func sub128(x, y uint128) (z uint128) { |
|||
x0 := x[0] |
|||
y0 := y[0] |
|||
z0 := x0 - y0 |
|||
z[0] = z0 |
|||
|
|||
c := (y0&^x0 | (y0|^x0)&z0) >> (wordBitSize - 1) |
|||
|
|||
z[1] = x[1] - y[1] - c |
|||
return |
|||
} |
|||
|
|||
func incr128(x uint128) uint128 { |
|||
return add128(x, uint128{1, 0}) |
|||
} |
@ -0,0 +1,27 @@ |
|||
Copyright (c) 2009 The Go Authors. All rights reserved. |
|||
|
|||
Redistribution and use in source and binary forms, with or without |
|||
modification, are permitted provided that the following conditions are |
|||
met: |
|||
|
|||
* Redistributions of source code must retain the above copyright |
|||
notice, this list of conditions and the following disclaimer. |
|||
* Redistributions in binary form must reproduce the above |
|||
copyright notice, this list of conditions and the following disclaimer |
|||
in the documentation and/or other materials provided with the |
|||
distribution. |
|||
* Neither the name of Google Inc. nor the names of its |
|||
contributors may be used to endorse or promote products derived from |
|||
this software without specific prior written permission. |
|||
|
|||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
@ -0,0 +1,22 @@ |
|||
Additional IP Rights Grant (Patents) |
|||
|
|||
"This implementation" means the copyrightable works distributed by |
|||
Google as part of the Go project. |
|||
|
|||
Google hereby grants to You a perpetual, worldwide, non-exclusive, |
|||
no-charge, royalty-free, irrevocable (except as stated in this section) |
|||
patent license to make, have made, use, offer to sell, sell, import, |
|||
transfer and otherwise run, modify and propagate the contents of this |
|||
implementation of Go, where such license applies only to those patent |
|||
claims, both currently owned or controlled by Google and acquired in |
|||
the future, licensable by Google that are necessarily infringed by this |
|||
implementation of Go. This grant does not include claims that would be |
|||
infringed only as a consequence of further modification of this |
|||
implementation. If you or your agent or exclusive licensee institute or |
|||
order or agree to the institution of patent litigation against any |
|||
entity (including a cross-claim or counterclaim in a lawsuit) alleging |
|||
that this implementation of Go or any code incorporated within this |
|||
implementation of Go constitutes direct or contributory patent |
|||
infringement, or inducement of patent infringement, then any patent |
|||
rights granted to you under this License for this implementation of Go |
|||
shall terminate as of the date such litigation is filed. |
@ -0,0 +1,283 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
// Package argon2 implements the key derivation function Argon2.
|
|||
// Argon2 was selected as the winner of the Password Hashing Competition and can
|
|||
// be used to derive cryptographic keys from passwords.
|
|||
//
|
|||
// For a detailed specification of Argon2 see [1].
|
|||
//
|
|||
// If you aren't sure which function you need, use Argon2id (IDKey) and
|
|||
// the parameter recommendations for your scenario.
|
|||
//
|
|||
// # Argon2i
|
|||
//
|
|||
// Argon2i (implemented by Key) is the side-channel resistant version of Argon2.
|
|||
// It uses data-independent memory access, which is preferred for password
|
|||
// hashing and password-based key derivation. Argon2i requires more passes over
|
|||
// memory than Argon2id to protect from trade-off attacks. The recommended
|
|||
// parameters (taken from [2]) for non-interactive operations are time=3 and to
|
|||
// use the maximum available memory.
|
|||
//
|
|||
// # Argon2id
|
|||
//
|
|||
// Argon2id (implemented by IDKey) is a hybrid version of Argon2 combining
|
|||
// Argon2i and Argon2d. It uses data-independent memory access for the first
|
|||
// half of the first iteration over the memory and data-dependent memory access
|
|||
// for the rest. Argon2id is side-channel resistant and provides better brute-
|
|||
// force cost savings due to time-memory tradeoffs than Argon2i. The recommended
|
|||
// parameters for non-interactive operations (taken from [2]) are time=1 and to
|
|||
// use the maximum available memory.
|
|||
//
|
|||
// [1] https://github.com/P-H-C/phc-winner-argon2/blob/master/argon2-specs.pdf
|
|||
// [2] https://tools.ietf.org/html/draft-irtf-cfrg-argon2-03#section-9.3
|
|||
package argon2 |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"sync" |
|||
|
|||
"golang.org/x/crypto/blake2b" |
|||
) |
|||
|
|||
// The Argon2 version implemented by this package.
|
|||
const Version = 0x13 |
|||
|
|||
const ( |
|||
argon2d = iota |
|||
argon2i |
|||
argon2id |
|||
) |
|||
|
|||
// Key derives a key from the password, salt, and cost parameters using Argon2i
|
|||
// returning a byte slice of length keyLen that can be used as cryptographic
|
|||
// key. The CPU cost and parallelism degree must be greater than zero.
|
|||
//
|
|||
// For example, you can get a derived key for e.g. AES-256 (which needs a
|
|||
// 32-byte key) by doing:
|
|||
//
|
|||
// key := argon2.Key([]byte("some password"), salt, 3, 32*1024, 4, 32)
|
|||
//
|
|||
// The draft RFC recommends[2] time=3, and memory=32*1024 is a sensible number.
|
|||
// If using that amount of memory (32 MB) is not possible in some contexts then
|
|||
// the time parameter can be increased to compensate.
|
|||
//
|
|||
// The time parameter specifies the number of passes over the memory and the
|
|||
// memory parameter specifies the size of the memory in KiB. For example
|
|||
// memory=32*1024 sets the memory cost to ~32 MB. The number of threads can be
|
|||
// adjusted to the number of available CPUs. The cost parameters should be
|
|||
// increased as memory latency and CPU parallelism increases. Remember to get a
|
|||
// good random salt.
|
|||
func Key(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { |
|||
return deriveKey(argon2i, password, salt, nil, nil, time, memory, threads, keyLen) |
|||
} |
|||
|
|||
// IDKey derives a key from the password, salt, and cost parameters using
|
|||
// Argon2id returning a byte slice of length keyLen that can be used as
|
|||
// cryptographic key. The CPU cost and parallelism degree must be greater than
|
|||
// zero.
|
|||
//
|
|||
// For example, you can get a derived key for e.g. AES-256 (which needs a
|
|||
// 32-byte key) by doing:
|
|||
//
|
|||
// key := argon2.IDKey([]byte("some password"), salt, 1, 64*1024, 4, 32)
|
|||
//
|
|||
// The draft RFC recommends[2] time=1, and memory=64*1024 is a sensible number.
|
|||
// If using that amount of memory (64 MB) is not possible in some contexts then
|
|||
// the time parameter can be increased to compensate.
|
|||
//
|
|||
// The time parameter specifies the number of passes over the memory and the
|
|||
// memory parameter specifies the size of the memory in KiB. For example
|
|||
// memory=64*1024 sets the memory cost to ~64 MB. The number of threads can be
|
|||
// adjusted to the numbers of available CPUs. The cost parameters should be
|
|||
// increased as memory latency and CPU parallelism increases. Remember to get a
|
|||
// good random salt.
|
|||
func IDKey(password, salt []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { |
|||
return deriveKey(argon2id, password, salt, nil, nil, time, memory, threads, keyLen) |
|||
} |
|||
|
|||
func deriveKey(mode int, password, salt, secret, data []byte, time, memory uint32, threads uint8, keyLen uint32) []byte { |
|||
if time < 1 { |
|||
panic("argon2: number of rounds too small") |
|||
} |
|||
if threads < 1 { |
|||
panic("argon2: parallelism degree too low") |
|||
} |
|||
h0 := initHash(password, salt, secret, data, time, memory, uint32(threads), keyLen, mode) |
|||
|
|||
memory = memory / (syncPoints * uint32(threads)) * (syncPoints * uint32(threads)) |
|||
if memory < 2*syncPoints*uint32(threads) { |
|||
memory = 2 * syncPoints * uint32(threads) |
|||
} |
|||
B := initBlocks(&h0, memory, uint32(threads)) |
|||
processBlocks(B, time, memory, uint32(threads), mode) |
|||
return extractKey(B, memory, uint32(threads), keyLen) |
|||
} |
|||
|
|||
const ( |
|||
blockLength = 128 |
|||
syncPoints = 4 |
|||
) |
|||
|
|||
type block [blockLength]uint64 |
|||
|
|||
func initHash(password, salt, key, data []byte, time, memory, threads, keyLen uint32, mode int) [blake2b.Size + 8]byte { |
|||
var ( |
|||
h0 [blake2b.Size + 8]byte |
|||
params [24]byte |
|||
tmp [4]byte |
|||
) |
|||
|
|||
b2, _ := blake2b.New512(nil) |
|||
binary.LittleEndian.PutUint32(params[0:4], threads) |
|||
binary.LittleEndian.PutUint32(params[4:8], keyLen) |
|||
binary.LittleEndian.PutUint32(params[8:12], memory) |
|||
binary.LittleEndian.PutUint32(params[12:16], time) |
|||
binary.LittleEndian.PutUint32(params[16:20], uint32(Version)) |
|||
binary.LittleEndian.PutUint32(params[20:24], uint32(mode)) |
|||
b2.Write(params[:]) |
|||
binary.LittleEndian.PutUint32(tmp[:], uint32(len(password))) |
|||
b2.Write(tmp[:]) |
|||
b2.Write(password) |
|||
binary.LittleEndian.PutUint32(tmp[:], uint32(len(salt))) |
|||
b2.Write(tmp[:]) |
|||
b2.Write(salt) |
|||
binary.LittleEndian.PutUint32(tmp[:], uint32(len(key))) |
|||
b2.Write(tmp[:]) |
|||
b2.Write(key) |
|||
binary.LittleEndian.PutUint32(tmp[:], uint32(len(data))) |
|||
b2.Write(tmp[:]) |
|||
b2.Write(data) |
|||
b2.Sum(h0[:0]) |
|||
return h0 |
|||
} |
|||
|
|||
func initBlocks(h0 *[blake2b.Size + 8]byte, memory, threads uint32) []block { |
|||
var block0 [1024]byte |
|||
B := make([]block, memory) |
|||
for lane := uint32(0); lane < threads; lane++ { |
|||
j := lane * (memory / threads) |
|||
binary.LittleEndian.PutUint32(h0[blake2b.Size+4:], lane) |
|||
|
|||
binary.LittleEndian.PutUint32(h0[blake2b.Size:], 0) |
|||
blake2bHash(block0[:], h0[:]) |
|||
for i := range B[j+0] { |
|||
B[j+0][i] = binary.LittleEndian.Uint64(block0[i*8:]) |
|||
} |
|||
|
|||
binary.LittleEndian.PutUint32(h0[blake2b.Size:], 1) |
|||
blake2bHash(block0[:], h0[:]) |
|||
for i := range B[j+1] { |
|||
B[j+1][i] = binary.LittleEndian.Uint64(block0[i*8:]) |
|||
} |
|||
} |
|||
return B |
|||
} |
|||
|
|||
func processBlocks(B []block, time, memory, threads uint32, mode int) { |
|||
lanes := memory / threads |
|||
segments := lanes / syncPoints |
|||
|
|||
processSegment := func(n, slice, lane uint32, wg *sync.WaitGroup) { |
|||
var addresses, in, zero block |
|||
if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) { |
|||
in[0] = uint64(n) |
|||
in[1] = uint64(lane) |
|||
in[2] = uint64(slice) |
|||
in[3] = uint64(memory) |
|||
in[4] = uint64(time) |
|||
in[5] = uint64(mode) |
|||
} |
|||
|
|||
index := uint32(0) |
|||
if n == 0 && slice == 0 { |
|||
index = 2 // we have already generated the first two blocks
|
|||
if mode == argon2i || mode == argon2id { |
|||
in[6]++ |
|||
processBlock(&addresses, &in, &zero) |
|||
processBlock(&addresses, &addresses, &zero) |
|||
} |
|||
} |
|||
|
|||
offset := lane*lanes + slice*segments + index |
|||
var random uint64 |
|||
for index < segments { |
|||
prev := offset - 1 |
|||
if index == 0 && slice == 0 { |
|||
prev += lanes // last block in lane
|
|||
} |
|||
if mode == argon2i || (mode == argon2id && n == 0 && slice < syncPoints/2) { |
|||
if index%blockLength == 0 { |
|||
in[6]++ |
|||
processBlock(&addresses, &in, &zero) |
|||
processBlock(&addresses, &addresses, &zero) |
|||
} |
|||
random = addresses[index%blockLength] |
|||
} else { |
|||
random = B[prev][0] |
|||
} |
|||
newOffset := indexAlpha(random, lanes, segments, threads, n, slice, lane, index) |
|||
processBlockXOR(&B[offset], &B[prev], &B[newOffset]) |
|||
index, offset = index+1, offset+1 |
|||
} |
|||
wg.Done() |
|||
} |
|||
|
|||
for n := uint32(0); n < time; n++ { |
|||
for slice := uint32(0); slice < syncPoints; slice++ { |
|||
var wg sync.WaitGroup |
|||
for lane := uint32(0); lane < threads; lane++ { |
|||
wg.Add(1) |
|||
go processSegment(n, slice, lane, &wg) |
|||
} |
|||
wg.Wait() |
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
func extractKey(B []block, memory, threads, keyLen uint32) []byte { |
|||
lanes := memory / threads |
|||
for lane := uint32(0); lane < threads-1; lane++ { |
|||
for i, v := range B[(lane*lanes)+lanes-1] { |
|||
B[memory-1][i] ^= v |
|||
} |
|||
} |
|||
|
|||
var block [1024]byte |
|||
for i, v := range B[memory-1] { |
|||
binary.LittleEndian.PutUint64(block[i*8:], v) |
|||
} |
|||
key := make([]byte, keyLen) |
|||
blake2bHash(key, block[:]) |
|||
return key |
|||
} |
|||
|
|||
func indexAlpha(rand uint64, lanes, segments, threads, n, slice, lane, index uint32) uint32 { |
|||
refLane := uint32(rand>>32) % threads |
|||
if n == 0 && slice == 0 { |
|||
refLane = lane |
|||
} |
|||
m, s := 3*segments, ((slice+1)%syncPoints)*segments |
|||
if lane == refLane { |
|||
m += index |
|||
} |
|||
if n == 0 { |
|||
m, s = slice*segments, 0 |
|||
if slice == 0 || lane == refLane { |
|||
m += index |
|||
} |
|||
} |
|||
if index == 0 || lane == refLane { |
|||
m-- |
|||
} |
|||
return phi(rand, uint64(m), uint64(s), refLane, lanes) |
|||
} |
|||
|
|||
func phi(rand, m, s uint64, lane, lanes uint32) uint32 { |
|||
p := rand & 0xFFFFFFFF |
|||
p = (p * p) >> 32 |
|||
p = (p * m) >> 32 |
|||
return lane*lanes + uint32((s+m-(p+1))%uint64(lanes)) |
|||
} |
@ -0,0 +1,53 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package argon2 |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"hash" |
|||
|
|||
"golang.org/x/crypto/blake2b" |
|||
) |
|||
|
|||
// blake2bHash computes an arbitrary long hash value of in
|
|||
// and writes the hash to out.
|
|||
func blake2bHash(out []byte, in []byte) { |
|||
var b2 hash.Hash |
|||
if n := len(out); n < blake2b.Size { |
|||
b2, _ = blake2b.New(n, nil) |
|||
} else { |
|||
b2, _ = blake2b.New512(nil) |
|||
} |
|||
|
|||
var buffer [blake2b.Size]byte |
|||
binary.LittleEndian.PutUint32(buffer[:4], uint32(len(out))) |
|||
b2.Write(buffer[:4]) |
|||
b2.Write(in) |
|||
|
|||
if len(out) <= blake2b.Size { |
|||
b2.Sum(out[:0]) |
|||
return |
|||
} |
|||
|
|||
outLen := len(out) |
|||
b2.Sum(buffer[:0]) |
|||
b2.Reset() |
|||
copy(out, buffer[:32]) |
|||
out = out[32:] |
|||
for len(out) > blake2b.Size { |
|||
b2.Write(buffer[:]) |
|||
b2.Sum(buffer[:0]) |
|||
copy(out, buffer[:32]) |
|||
out = out[32:] |
|||
b2.Reset() |
|||
} |
|||
|
|||
if outLen%blake2b.Size > 0 { // outLen > 64
|
|||
r := ((outLen + 31) / 32) - 2 // ⌈τ /32⌉-2
|
|||
b2, _ = blake2b.New(outLen-32*r, nil) |
|||
} |
|||
b2.Write(buffer[:]) |
|||
b2.Sum(out[:0]) |
|||
} |
@ -0,0 +1,60 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build amd64 && gc && !purego
|
|||
|
|||
package argon2 |
|||
|
|||
import "golang.org/x/sys/cpu" |
|||
|
|||
func init() { |
|||
useSSE4 = cpu.X86.HasSSE41 |
|||
} |
|||
|
|||
//go:noescape
|
|||
func mixBlocksSSE2(out, a, b, c *block) |
|||
|
|||
//go:noescape
|
|||
func xorBlocksSSE2(out, a, b, c *block) |
|||
|
|||
//go:noescape
|
|||
func blamkaSSE4(b *block) |
|||
|
|||
func processBlockSSE(out, in1, in2 *block, xor bool) { |
|||
var t block |
|||
mixBlocksSSE2(&t, in1, in2, &t) |
|||
if useSSE4 { |
|||
blamkaSSE4(&t) |
|||
} else { |
|||
for i := 0; i < blockLength; i += 16 { |
|||
blamkaGeneric( |
|||
&t[i+0], &t[i+1], &t[i+2], &t[i+3], |
|||
&t[i+4], &t[i+5], &t[i+6], &t[i+7], |
|||
&t[i+8], &t[i+9], &t[i+10], &t[i+11], |
|||
&t[i+12], &t[i+13], &t[i+14], &t[i+15], |
|||
) |
|||
} |
|||
for i := 0; i < blockLength/8; i += 2 { |
|||
blamkaGeneric( |
|||
&t[i], &t[i+1], &t[16+i], &t[16+i+1], |
|||
&t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1], |
|||
&t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1], |
|||
&t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1], |
|||
) |
|||
} |
|||
} |
|||
if xor { |
|||
xorBlocksSSE2(out, in1, in2, &t) |
|||
} else { |
|||
mixBlocksSSE2(out, in1, in2, &t) |
|||
} |
|||
} |
|||
|
|||
func processBlock(out, in1, in2 *block) { |
|||
processBlockSSE(out, in1, in2, false) |
|||
} |
|||
|
|||
func processBlockXOR(out, in1, in2 *block) { |
|||
processBlockSSE(out, in1, in2, true) |
|||
} |
@ -0,0 +1,243 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build amd64 && gc && !purego |
|||
|
|||
#include "textflag.h" |
|||
|
|||
DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 |
|||
DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b |
|||
GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 |
|||
DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a |
|||
GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ |
|||
MOVO v4, t1; \ |
|||
MOVO v5, v4; \ |
|||
MOVO t1, v5; \ |
|||
MOVO v6, t1; \ |
|||
PUNPCKLQDQ v6, t2; \ |
|||
PUNPCKHQDQ v7, v6; \ |
|||
PUNPCKHQDQ t2, v6; \ |
|||
PUNPCKLQDQ v7, t2; \ |
|||
MOVO t1, v7; \ |
|||
MOVO v2, t1; \ |
|||
PUNPCKHQDQ t2, v7; \ |
|||
PUNPCKLQDQ v3, t2; \ |
|||
PUNPCKHQDQ t2, v2; \ |
|||
PUNPCKLQDQ t1, t2; \ |
|||
PUNPCKHQDQ t2, v3 |
|||
|
|||
#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ |
|||
MOVO v4, t1; \ |
|||
MOVO v5, v4; \ |
|||
MOVO t1, v5; \ |
|||
MOVO v2, t1; \ |
|||
PUNPCKLQDQ v2, t2; \ |
|||
PUNPCKHQDQ v3, v2; \ |
|||
PUNPCKHQDQ t2, v2; \ |
|||
PUNPCKLQDQ v3, t2; \ |
|||
MOVO t1, v3; \ |
|||
MOVO v6, t1; \ |
|||
PUNPCKHQDQ t2, v3; \ |
|||
PUNPCKLQDQ v7, t2; \ |
|||
PUNPCKHQDQ t2, v6; \ |
|||
PUNPCKLQDQ t1, t2; \ |
|||
PUNPCKHQDQ t2, v7 |
|||
|
|||
#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \ |
|||
MOVO v0, t0; \ |
|||
PMULULQ v2, t0; \ |
|||
PADDQ v2, v0; \ |
|||
PADDQ t0, v0; \ |
|||
PADDQ t0, v0; \ |
|||
PXOR v0, v6; \ |
|||
PSHUFD $0xB1, v6, v6; \ |
|||
MOVO v4, t0; \ |
|||
PMULULQ v6, t0; \ |
|||
PADDQ v6, v4; \ |
|||
PADDQ t0, v4; \ |
|||
PADDQ t0, v4; \ |
|||
PXOR v4, v2; \ |
|||
PSHUFB c40, v2; \ |
|||
MOVO v0, t0; \ |
|||
PMULULQ v2, t0; \ |
|||
PADDQ v2, v0; \ |
|||
PADDQ t0, v0; \ |
|||
PADDQ t0, v0; \ |
|||
PXOR v0, v6; \ |
|||
PSHUFB c48, v6; \ |
|||
MOVO v4, t0; \ |
|||
PMULULQ v6, t0; \ |
|||
PADDQ v6, v4; \ |
|||
PADDQ t0, v4; \ |
|||
PADDQ t0, v4; \ |
|||
PXOR v4, v2; \ |
|||
MOVO v2, t0; \ |
|||
PADDQ v2, t0; \ |
|||
PSRLQ $63, v2; \ |
|||
PXOR t0, v2; \ |
|||
MOVO v1, t0; \ |
|||
PMULULQ v3, t0; \ |
|||
PADDQ v3, v1; \ |
|||
PADDQ t0, v1; \ |
|||
PADDQ t0, v1; \ |
|||
PXOR v1, v7; \ |
|||
PSHUFD $0xB1, v7, v7; \ |
|||
MOVO v5, t0; \ |
|||
PMULULQ v7, t0; \ |
|||
PADDQ v7, v5; \ |
|||
PADDQ t0, v5; \ |
|||
PADDQ t0, v5; \ |
|||
PXOR v5, v3; \ |
|||
PSHUFB c40, v3; \ |
|||
MOVO v1, t0; \ |
|||
PMULULQ v3, t0; \ |
|||
PADDQ v3, v1; \ |
|||
PADDQ t0, v1; \ |
|||
PADDQ t0, v1; \ |
|||
PXOR v1, v7; \ |
|||
PSHUFB c48, v7; \ |
|||
MOVO v5, t0; \ |
|||
PMULULQ v7, t0; \ |
|||
PADDQ v7, v5; \ |
|||
PADDQ t0, v5; \ |
|||
PADDQ t0, v5; \ |
|||
PXOR v5, v3; \ |
|||
MOVO v3, t0; \ |
|||
PADDQ v3, t0; \ |
|||
PSRLQ $63, v3; \ |
|||
PXOR t0, v3 |
|||
|
|||
#define LOAD_MSG_0(block, off) \ |
|||
MOVOU 8*(off+0)(block), X0; \ |
|||
MOVOU 8*(off+2)(block), X1; \ |
|||
MOVOU 8*(off+4)(block), X2; \ |
|||
MOVOU 8*(off+6)(block), X3; \ |
|||
MOVOU 8*(off+8)(block), X4; \ |
|||
MOVOU 8*(off+10)(block), X5; \ |
|||
MOVOU 8*(off+12)(block), X6; \ |
|||
MOVOU 8*(off+14)(block), X7 |
|||
|
|||
#define STORE_MSG_0(block, off) \ |
|||
MOVOU X0, 8*(off+0)(block); \ |
|||
MOVOU X1, 8*(off+2)(block); \ |
|||
MOVOU X2, 8*(off+4)(block); \ |
|||
MOVOU X3, 8*(off+6)(block); \ |
|||
MOVOU X4, 8*(off+8)(block); \ |
|||
MOVOU X5, 8*(off+10)(block); \ |
|||
MOVOU X6, 8*(off+12)(block); \ |
|||
MOVOU X7, 8*(off+14)(block) |
|||
|
|||
#define LOAD_MSG_1(block, off) \ |
|||
MOVOU 8*off+0*8(block), X0; \ |
|||
MOVOU 8*off+16*8(block), X1; \ |
|||
MOVOU 8*off+32*8(block), X2; \ |
|||
MOVOU 8*off+48*8(block), X3; \ |
|||
MOVOU 8*off+64*8(block), X4; \ |
|||
MOVOU 8*off+80*8(block), X5; \ |
|||
MOVOU 8*off+96*8(block), X6; \ |
|||
MOVOU 8*off+112*8(block), X7 |
|||
|
|||
#define STORE_MSG_1(block, off) \ |
|||
MOVOU X0, 8*off+0*8(block); \ |
|||
MOVOU X1, 8*off+16*8(block); \ |
|||
MOVOU X2, 8*off+32*8(block); \ |
|||
MOVOU X3, 8*off+48*8(block); \ |
|||
MOVOU X4, 8*off+64*8(block); \ |
|||
MOVOU X5, 8*off+80*8(block); \ |
|||
MOVOU X6, 8*off+96*8(block); \ |
|||
MOVOU X7, 8*off+112*8(block) |
|||
|
|||
#define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \ |
|||
LOAD_MSG_0(block, off); \ |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ |
|||
STORE_MSG_0(block, off) |
|||
|
|||
#define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \ |
|||
LOAD_MSG_1(block, off); \ |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \ |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \ |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \ |
|||
STORE_MSG_1(block, off) |
|||
|
|||
// func blamkaSSE4(b *block) |
|||
TEXT ·blamkaSSE4(SB), 4, $0-8 |
|||
MOVQ b+0(FP), AX |
|||
|
|||
MOVOU ·c40<>(SB), X10 |
|||
MOVOU ·c48<>(SB), X11 |
|||
|
|||
BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11) |
|||
|
|||
BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11) |
|||
BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11) |
|||
RET |
|||
|
|||
// func mixBlocksSSE2(out, a, b, c *block) |
|||
TEXT ·mixBlocksSSE2(SB), 4, $0-32 |
|||
MOVQ out+0(FP), DX |
|||
MOVQ a+8(FP), AX |
|||
MOVQ b+16(FP), BX |
|||
MOVQ c+24(FP), CX |
|||
MOVQ $128, DI |
|||
|
|||
loop: |
|||
MOVOU 0(AX), X0 |
|||
MOVOU 0(BX), X1 |
|||
MOVOU 0(CX), X2 |
|||
PXOR X1, X0 |
|||
PXOR X2, X0 |
|||
MOVOU X0, 0(DX) |
|||
ADDQ $16, AX |
|||
ADDQ $16, BX |
|||
ADDQ $16, CX |
|||
ADDQ $16, DX |
|||
SUBQ $2, DI |
|||
JA loop |
|||
RET |
|||
|
|||
// func xorBlocksSSE2(out, a, b, c *block) |
|||
TEXT ·xorBlocksSSE2(SB), 4, $0-32 |
|||
MOVQ out+0(FP), DX |
|||
MOVQ a+8(FP), AX |
|||
MOVQ b+16(FP), BX |
|||
MOVQ c+24(FP), CX |
|||
MOVQ $128, DI |
|||
|
|||
loop: |
|||
MOVOU 0(AX), X0 |
|||
MOVOU 0(BX), X1 |
|||
MOVOU 0(CX), X2 |
|||
MOVOU 0(DX), X3 |
|||
PXOR X1, X0 |
|||
PXOR X2, X0 |
|||
PXOR X3, X0 |
|||
MOVOU X0, 0(DX) |
|||
ADDQ $16, AX |
|||
ADDQ $16, BX |
|||
ADDQ $16, CX |
|||
ADDQ $16, DX |
|||
SUBQ $2, DI |
|||
JA loop |
|||
RET |
@ -0,0 +1,163 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package argon2 |
|||
|
|||
var useSSE4 bool |
|||
|
|||
func processBlockGeneric(out, in1, in2 *block, xor bool) { |
|||
var t block |
|||
for i := range t { |
|||
t[i] = in1[i] ^ in2[i] |
|||
} |
|||
for i := 0; i < blockLength; i += 16 { |
|||
blamkaGeneric( |
|||
&t[i+0], &t[i+1], &t[i+2], &t[i+3], |
|||
&t[i+4], &t[i+5], &t[i+6], &t[i+7], |
|||
&t[i+8], &t[i+9], &t[i+10], &t[i+11], |
|||
&t[i+12], &t[i+13], &t[i+14], &t[i+15], |
|||
) |
|||
} |
|||
for i := 0; i < blockLength/8; i += 2 { |
|||
blamkaGeneric( |
|||
&t[i], &t[i+1], &t[16+i], &t[16+i+1], |
|||
&t[32+i], &t[32+i+1], &t[48+i], &t[48+i+1], |
|||
&t[64+i], &t[64+i+1], &t[80+i], &t[80+i+1], |
|||
&t[96+i], &t[96+i+1], &t[112+i], &t[112+i+1], |
|||
) |
|||
} |
|||
if xor { |
|||
for i := range t { |
|||
out[i] ^= in1[i] ^ in2[i] ^ t[i] |
|||
} |
|||
} else { |
|||
for i := range t { |
|||
out[i] = in1[i] ^ in2[i] ^ t[i] |
|||
} |
|||
} |
|||
} |
|||
|
|||
func blamkaGeneric(t00, t01, t02, t03, t04, t05, t06, t07, t08, t09, t10, t11, t12, t13, t14, t15 *uint64) { |
|||
v00, v01, v02, v03 := *t00, *t01, *t02, *t03 |
|||
v04, v05, v06, v07 := *t04, *t05, *t06, *t07 |
|||
v08, v09, v10, v11 := *t08, *t09, *t10, *t11 |
|||
v12, v13, v14, v15 := *t12, *t13, *t14, *t15 |
|||
|
|||
v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04)) |
|||
v12 ^= v00 |
|||
v12 = v12>>32 | v12<<32 |
|||
v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12)) |
|||
v04 ^= v08 |
|||
v04 = v04>>24 | v04<<40 |
|||
|
|||
v00 += v04 + 2*uint64(uint32(v00))*uint64(uint32(v04)) |
|||
v12 ^= v00 |
|||
v12 = v12>>16 | v12<<48 |
|||
v08 += v12 + 2*uint64(uint32(v08))*uint64(uint32(v12)) |
|||
v04 ^= v08 |
|||
v04 = v04>>63 | v04<<1 |
|||
|
|||
v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05)) |
|||
v13 ^= v01 |
|||
v13 = v13>>32 | v13<<32 |
|||
v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13)) |
|||
v05 ^= v09 |
|||
v05 = v05>>24 | v05<<40 |
|||
|
|||
v01 += v05 + 2*uint64(uint32(v01))*uint64(uint32(v05)) |
|||
v13 ^= v01 |
|||
v13 = v13>>16 | v13<<48 |
|||
v09 += v13 + 2*uint64(uint32(v09))*uint64(uint32(v13)) |
|||
v05 ^= v09 |
|||
v05 = v05>>63 | v05<<1 |
|||
|
|||
v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06)) |
|||
v14 ^= v02 |
|||
v14 = v14>>32 | v14<<32 |
|||
v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14)) |
|||
v06 ^= v10 |
|||
v06 = v06>>24 | v06<<40 |
|||
|
|||
v02 += v06 + 2*uint64(uint32(v02))*uint64(uint32(v06)) |
|||
v14 ^= v02 |
|||
v14 = v14>>16 | v14<<48 |
|||
v10 += v14 + 2*uint64(uint32(v10))*uint64(uint32(v14)) |
|||
v06 ^= v10 |
|||
v06 = v06>>63 | v06<<1 |
|||
|
|||
v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07)) |
|||
v15 ^= v03 |
|||
v15 = v15>>32 | v15<<32 |
|||
v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15)) |
|||
v07 ^= v11 |
|||
v07 = v07>>24 | v07<<40 |
|||
|
|||
v03 += v07 + 2*uint64(uint32(v03))*uint64(uint32(v07)) |
|||
v15 ^= v03 |
|||
v15 = v15>>16 | v15<<48 |
|||
v11 += v15 + 2*uint64(uint32(v11))*uint64(uint32(v15)) |
|||
v07 ^= v11 |
|||
v07 = v07>>63 | v07<<1 |
|||
|
|||
v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05)) |
|||
v15 ^= v00 |
|||
v15 = v15>>32 | v15<<32 |
|||
v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15)) |
|||
v05 ^= v10 |
|||
v05 = v05>>24 | v05<<40 |
|||
|
|||
v00 += v05 + 2*uint64(uint32(v00))*uint64(uint32(v05)) |
|||
v15 ^= v00 |
|||
v15 = v15>>16 | v15<<48 |
|||
v10 += v15 + 2*uint64(uint32(v10))*uint64(uint32(v15)) |
|||
v05 ^= v10 |
|||
v05 = v05>>63 | v05<<1 |
|||
|
|||
v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06)) |
|||
v12 ^= v01 |
|||
v12 = v12>>32 | v12<<32 |
|||
v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12)) |
|||
v06 ^= v11 |
|||
v06 = v06>>24 | v06<<40 |
|||
|
|||
v01 += v06 + 2*uint64(uint32(v01))*uint64(uint32(v06)) |
|||
v12 ^= v01 |
|||
v12 = v12>>16 | v12<<48 |
|||
v11 += v12 + 2*uint64(uint32(v11))*uint64(uint32(v12)) |
|||
v06 ^= v11 |
|||
v06 = v06>>63 | v06<<1 |
|||
|
|||
v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07)) |
|||
v13 ^= v02 |
|||
v13 = v13>>32 | v13<<32 |
|||
v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13)) |
|||
v07 ^= v08 |
|||
v07 = v07>>24 | v07<<40 |
|||
|
|||
v02 += v07 + 2*uint64(uint32(v02))*uint64(uint32(v07)) |
|||
v13 ^= v02 |
|||
v13 = v13>>16 | v13<<48 |
|||
v08 += v13 + 2*uint64(uint32(v08))*uint64(uint32(v13)) |
|||
v07 ^= v08 |
|||
v07 = v07>>63 | v07<<1 |
|||
|
|||
v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04)) |
|||
v14 ^= v03 |
|||
v14 = v14>>32 | v14<<32 |
|||
v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14)) |
|||
v04 ^= v09 |
|||
v04 = v04>>24 | v04<<40 |
|||
|
|||
v03 += v04 + 2*uint64(uint32(v03))*uint64(uint32(v04)) |
|||
v14 ^= v03 |
|||
v14 = v14>>16 | v14<<48 |
|||
v09 += v14 + 2*uint64(uint32(v09))*uint64(uint32(v14)) |
|||
v04 ^= v09 |
|||
v04 = v04>>63 | v04<<1 |
|||
|
|||
*t00, *t01, *t02, *t03 = v00, v01, v02, v03 |
|||
*t04, *t05, *t06, *t07 = v04, v05, v06, v07 |
|||
*t08, *t09, *t10, *t11 = v08, v09, v10, v11 |
|||
*t12, *t13, *t14, *t15 = v12, v13, v14, v15 |
|||
} |
@ -0,0 +1,15 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !amd64 || purego || !gc
|
|||
|
|||
package argon2 |
|||
|
|||
func processBlock(out, in1, in2 *block) { |
|||
processBlockGeneric(out, in1, in2, false) |
|||
} |
|||
|
|||
func processBlockXOR(out, in1, in2 *block) { |
|||
processBlockGeneric(out, in1, in2, true) |
|||
} |
@ -0,0 +1,291 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
// Package blake2b implements the BLAKE2b hash algorithm defined by RFC 7693
|
|||
// and the extendable output function (XOF) BLAKE2Xb.
|
|||
//
|
|||
// BLAKE2b is optimized for 64-bit platforms—including NEON-enabled ARMs—and
|
|||
// produces digests of any size between 1 and 64 bytes.
|
|||
// For a detailed specification of BLAKE2b see https://blake2.net/blake2.pdf
|
|||
// and for BLAKE2Xb see https://blake2.net/blake2x.pdf
|
|||
//
|
|||
// If you aren't sure which function you need, use BLAKE2b (Sum512 or New512).
|
|||
// If you need a secret-key MAC (message authentication code), use the New512
|
|||
// function with a non-nil key.
|
|||
//
|
|||
// BLAKE2X is a construction to compute hash values larger than 64 bytes. It
|
|||
// can produce hash values between 0 and 4 GiB.
|
|||
package blake2b |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"errors" |
|||
"hash" |
|||
) |
|||
|
|||
const ( |
|||
// The blocksize of BLAKE2b in bytes.
|
|||
BlockSize = 128 |
|||
// The hash size of BLAKE2b-512 in bytes.
|
|||
Size = 64 |
|||
// The hash size of BLAKE2b-384 in bytes.
|
|||
Size384 = 48 |
|||
// The hash size of BLAKE2b-256 in bytes.
|
|||
Size256 = 32 |
|||
) |
|||
|
|||
var ( |
|||
useAVX2 bool |
|||
useAVX bool |
|||
useSSE4 bool |
|||
) |
|||
|
|||
var ( |
|||
errKeySize = errors.New("blake2b: invalid key size") |
|||
errHashSize = errors.New("blake2b: invalid hash size") |
|||
) |
|||
|
|||
var iv = [8]uint64{ |
|||
0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1, |
|||
0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179, |
|||
} |
|||
|
|||
// Sum512 returns the BLAKE2b-512 checksum of the data.
|
|||
func Sum512(data []byte) [Size]byte { |
|||
var sum [Size]byte |
|||
checkSum(&sum, Size, data) |
|||
return sum |
|||
} |
|||
|
|||
// Sum384 returns the BLAKE2b-384 checksum of the data.
|
|||
func Sum384(data []byte) [Size384]byte { |
|||
var sum [Size]byte |
|||
var sum384 [Size384]byte |
|||
checkSum(&sum, Size384, data) |
|||
copy(sum384[:], sum[:Size384]) |
|||
return sum384 |
|||
} |
|||
|
|||
// Sum256 returns the BLAKE2b-256 checksum of the data.
|
|||
func Sum256(data []byte) [Size256]byte { |
|||
var sum [Size]byte |
|||
var sum256 [Size256]byte |
|||
checkSum(&sum, Size256, data) |
|||
copy(sum256[:], sum[:Size256]) |
|||
return sum256 |
|||
} |
|||
|
|||
// New512 returns a new hash.Hash computing the BLAKE2b-512 checksum. A non-nil
|
|||
// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
|
|||
func New512(key []byte) (hash.Hash, error) { return newDigest(Size, key) } |
|||
|
|||
// New384 returns a new hash.Hash computing the BLAKE2b-384 checksum. A non-nil
|
|||
// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
|
|||
func New384(key []byte) (hash.Hash, error) { return newDigest(Size384, key) } |
|||
|
|||
// New256 returns a new hash.Hash computing the BLAKE2b-256 checksum. A non-nil
|
|||
// key turns the hash into a MAC. The key must be between zero and 64 bytes long.
|
|||
func New256(key []byte) (hash.Hash, error) { return newDigest(Size256, key) } |
|||
|
|||
// New returns a new hash.Hash computing the BLAKE2b checksum with a custom length.
|
|||
// A non-nil key turns the hash into a MAC. The key must be between zero and 64 bytes long.
|
|||
// The hash size can be a value between 1 and 64 but it is highly recommended to use
|
|||
// values equal or greater than:
|
|||
// - 32 if BLAKE2b is used as a hash function (The key is zero bytes long).
|
|||
// - 16 if BLAKE2b is used as a MAC function (The key is at least 16 bytes long).
|
|||
// When the key is nil, the returned hash.Hash implements BinaryMarshaler
|
|||
// and BinaryUnmarshaler for state (de)serialization as documented by hash.Hash.
|
|||
func New(size int, key []byte) (hash.Hash, error) { return newDigest(size, key) } |
|||
|
|||
func newDigest(hashSize int, key []byte) (*digest, error) { |
|||
if hashSize < 1 || hashSize > Size { |
|||
return nil, errHashSize |
|||
} |
|||
if len(key) > Size { |
|||
return nil, errKeySize |
|||
} |
|||
d := &digest{ |
|||
size: hashSize, |
|||
keyLen: len(key), |
|||
} |
|||
copy(d.key[:], key) |
|||
d.Reset() |
|||
return d, nil |
|||
} |
|||
|
|||
func checkSum(sum *[Size]byte, hashSize int, data []byte) { |
|||
h := iv |
|||
h[0] ^= uint64(hashSize) | (1 << 16) | (1 << 24) |
|||
var c [2]uint64 |
|||
|
|||
if length := len(data); length > BlockSize { |
|||
n := length &^ (BlockSize - 1) |
|||
if length == n { |
|||
n -= BlockSize |
|||
} |
|||
hashBlocks(&h, &c, 0, data[:n]) |
|||
data = data[n:] |
|||
} |
|||
|
|||
var block [BlockSize]byte |
|||
offset := copy(block[:], data) |
|||
remaining := uint64(BlockSize - offset) |
|||
if c[0] < remaining { |
|||
c[1]-- |
|||
} |
|||
c[0] -= remaining |
|||
|
|||
hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:]) |
|||
|
|||
for i, v := range h[:(hashSize+7)/8] { |
|||
binary.LittleEndian.PutUint64(sum[8*i:], v) |
|||
} |
|||
} |
|||
|
|||
type digest struct { |
|||
h [8]uint64 |
|||
c [2]uint64 |
|||
size int |
|||
block [BlockSize]byte |
|||
offset int |
|||
|
|||
key [BlockSize]byte |
|||
keyLen int |
|||
} |
|||
|
|||
const ( |
|||
magic = "b2b" |
|||
marshaledSize = len(magic) + 8*8 + 2*8 + 1 + BlockSize + 1 |
|||
) |
|||
|
|||
func (d *digest) MarshalBinary() ([]byte, error) { |
|||
if d.keyLen != 0 { |
|||
return nil, errors.New("crypto/blake2b: cannot marshal MACs") |
|||
} |
|||
b := make([]byte, 0, marshaledSize) |
|||
b = append(b, magic...) |
|||
for i := 0; i < 8; i++ { |
|||
b = appendUint64(b, d.h[i]) |
|||
} |
|||
b = appendUint64(b, d.c[0]) |
|||
b = appendUint64(b, d.c[1]) |
|||
// Maximum value for size is 64
|
|||
b = append(b, byte(d.size)) |
|||
b = append(b, d.block[:]...) |
|||
b = append(b, byte(d.offset)) |
|||
return b, nil |
|||
} |
|||
|
|||
func (d *digest) UnmarshalBinary(b []byte) error { |
|||
if len(b) < len(magic) || string(b[:len(magic)]) != magic { |
|||
return errors.New("crypto/blake2b: invalid hash state identifier") |
|||
} |
|||
if len(b) != marshaledSize { |
|||
return errors.New("crypto/blake2b: invalid hash state size") |
|||
} |
|||
b = b[len(magic):] |
|||
for i := 0; i < 8; i++ { |
|||
b, d.h[i] = consumeUint64(b) |
|||
} |
|||
b, d.c[0] = consumeUint64(b) |
|||
b, d.c[1] = consumeUint64(b) |
|||
d.size = int(b[0]) |
|||
b = b[1:] |
|||
copy(d.block[:], b[:BlockSize]) |
|||
b = b[BlockSize:] |
|||
d.offset = int(b[0]) |
|||
return nil |
|||
} |
|||
|
|||
func (d *digest) BlockSize() int { return BlockSize } |
|||
|
|||
func (d *digest) Size() int { return d.size } |
|||
|
|||
func (d *digest) Reset() { |
|||
d.h = iv |
|||
d.h[0] ^= uint64(d.size) | (uint64(d.keyLen) << 8) | (1 << 16) | (1 << 24) |
|||
d.offset, d.c[0], d.c[1] = 0, 0, 0 |
|||
if d.keyLen > 0 { |
|||
d.block = d.key |
|||
d.offset = BlockSize |
|||
} |
|||
} |
|||
|
|||
func (d *digest) Write(p []byte) (n int, err error) { |
|||
n = len(p) |
|||
|
|||
if d.offset > 0 { |
|||
remaining := BlockSize - d.offset |
|||
if n <= remaining { |
|||
d.offset += copy(d.block[d.offset:], p) |
|||
return |
|||
} |
|||
copy(d.block[d.offset:], p[:remaining]) |
|||
hashBlocks(&d.h, &d.c, 0, d.block[:]) |
|||
d.offset = 0 |
|||
p = p[remaining:] |
|||
} |
|||
|
|||
if length := len(p); length > BlockSize { |
|||
nn := length &^ (BlockSize - 1) |
|||
if length == nn { |
|||
nn -= BlockSize |
|||
} |
|||
hashBlocks(&d.h, &d.c, 0, p[:nn]) |
|||
p = p[nn:] |
|||
} |
|||
|
|||
if len(p) > 0 { |
|||
d.offset += copy(d.block[:], p) |
|||
} |
|||
|
|||
return |
|||
} |
|||
|
|||
func (d *digest) Sum(sum []byte) []byte { |
|||
var hash [Size]byte |
|||
d.finalize(&hash) |
|||
return append(sum, hash[:d.size]...) |
|||
} |
|||
|
|||
func (d *digest) finalize(hash *[Size]byte) { |
|||
var block [BlockSize]byte |
|||
copy(block[:], d.block[:d.offset]) |
|||
remaining := uint64(BlockSize - d.offset) |
|||
|
|||
c := d.c |
|||
if c[0] < remaining { |
|||
c[1]-- |
|||
} |
|||
c[0] -= remaining |
|||
|
|||
h := d.h |
|||
hashBlocks(&h, &c, 0xFFFFFFFFFFFFFFFF, block[:]) |
|||
|
|||
for i, v := range h { |
|||
binary.LittleEndian.PutUint64(hash[8*i:], v) |
|||
} |
|||
} |
|||
|
|||
func appendUint64(b []byte, x uint64) []byte { |
|||
var a [8]byte |
|||
binary.BigEndian.PutUint64(a[:], x) |
|||
return append(b, a[:]...) |
|||
} |
|||
|
|||
func appendUint32(b []byte, x uint32) []byte { |
|||
var a [4]byte |
|||
binary.BigEndian.PutUint32(a[:], x) |
|||
return append(b, a[:]...) |
|||
} |
|||
|
|||
func consumeUint64(b []byte) ([]byte, uint64) { |
|||
x := binary.BigEndian.Uint64(b) |
|||
return b[8:], x |
|||
} |
|||
|
|||
func consumeUint32(b []byte) ([]byte, uint32) { |
|||
x := binary.BigEndian.Uint32(b) |
|||
return b[4:], x |
|||
} |
@ -0,0 +1,37 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build amd64 && gc && !purego
|
|||
|
|||
package blake2b |
|||
|
|||
import "golang.org/x/sys/cpu" |
|||
|
|||
func init() { |
|||
useAVX2 = cpu.X86.HasAVX2 |
|||
useAVX = cpu.X86.HasAVX |
|||
useSSE4 = cpu.X86.HasSSE41 |
|||
} |
|||
|
|||
//go:noescape
|
|||
func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
|
|||
//go:noescape
|
|||
func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
|
|||
//go:noescape
|
|||
func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
|
|||
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { |
|||
switch { |
|||
case useAVX2: |
|||
hashBlocksAVX2(h, c, flag, blocks) |
|||
case useAVX: |
|||
hashBlocksAVX(h, c, flag, blocks) |
|||
case useSSE4: |
|||
hashBlocksSSE4(h, c, flag, blocks) |
|||
default: |
|||
hashBlocksGeneric(h, c, flag, blocks) |
|||
} |
|||
} |
@ -0,0 +1,744 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build amd64 && gc && !purego |
|||
|
|||
#include "textflag.h" |
|||
|
|||
DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 |
|||
DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b |
|||
DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b |
|||
DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1 |
|||
GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32 |
|||
|
|||
DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1 |
|||
DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f |
|||
DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b |
|||
DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179 |
|||
GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32 |
|||
|
|||
DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403 |
|||
DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b |
|||
DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403 |
|||
DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b |
|||
GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32 |
|||
|
|||
DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302 |
|||
DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a |
|||
DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302 |
|||
DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a |
|||
GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32 |
|||
|
|||
DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 |
|||
DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b |
|||
GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b |
|||
DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 |
|||
GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1 |
|||
DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f |
|||
GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b |
|||
DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 |
|||
GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403 |
|||
DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b |
|||
GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302 |
|||
DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a |
|||
GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39 |
|||
#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93 |
|||
#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e |
|||
#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93 |
|||
#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39 |
|||
|
|||
#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \ |
|||
VPADDQ m0, Y0, Y0; \ |
|||
VPADDQ Y1, Y0, Y0; \ |
|||
VPXOR Y0, Y3, Y3; \ |
|||
VPSHUFD $-79, Y3, Y3; \ |
|||
VPADDQ Y3, Y2, Y2; \ |
|||
VPXOR Y2, Y1, Y1; \ |
|||
VPSHUFB c40, Y1, Y1; \ |
|||
VPADDQ m1, Y0, Y0; \ |
|||
VPADDQ Y1, Y0, Y0; \ |
|||
VPXOR Y0, Y3, Y3; \ |
|||
VPSHUFB c48, Y3, Y3; \ |
|||
VPADDQ Y3, Y2, Y2; \ |
|||
VPXOR Y2, Y1, Y1; \ |
|||
VPADDQ Y1, Y1, t; \ |
|||
VPSRLQ $63, Y1, Y1; \ |
|||
VPXOR t, Y1, Y1; \ |
|||
VPERMQ_0x39_Y1_Y1; \ |
|||
VPERMQ_0x4E_Y2_Y2; \ |
|||
VPERMQ_0x93_Y3_Y3; \ |
|||
VPADDQ m2, Y0, Y0; \ |
|||
VPADDQ Y1, Y0, Y0; \ |
|||
VPXOR Y0, Y3, Y3; \ |
|||
VPSHUFD $-79, Y3, Y3; \ |
|||
VPADDQ Y3, Y2, Y2; \ |
|||
VPXOR Y2, Y1, Y1; \ |
|||
VPSHUFB c40, Y1, Y1; \ |
|||
VPADDQ m3, Y0, Y0; \ |
|||
VPADDQ Y1, Y0, Y0; \ |
|||
VPXOR Y0, Y3, Y3; \ |
|||
VPSHUFB c48, Y3, Y3; \ |
|||
VPADDQ Y3, Y2, Y2; \ |
|||
VPXOR Y2, Y1, Y1; \ |
|||
VPADDQ Y1, Y1, t; \ |
|||
VPSRLQ $63, Y1, Y1; \ |
|||
VPXOR t, Y1, Y1; \ |
|||
VPERMQ_0x39_Y3_Y3; \ |
|||
VPERMQ_0x4E_Y2_Y2; \ |
|||
VPERMQ_0x93_Y1_Y1 |
|||
|
|||
#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E |
|||
#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26 |
|||
#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E |
|||
#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36 |
|||
#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E |
|||
|
|||
#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n |
|||
#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n |
|||
#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n |
|||
#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n |
|||
#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n |
|||
|
|||
#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01 |
|||
|
|||
#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01 |
|||
#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01 |
|||
|
|||
#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8 |
|||
#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01 |
|||
|
|||
// load msg: Y12 = (i0, i1, i2, i3) |
|||
// i0, i1, i2, i3 must not be 0 |
|||
#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \ |
|||
VMOVQ_SI_X12(i0*8); \ |
|||
VMOVQ_SI_X11(i2*8); \ |
|||
VPINSRQ_1_SI_X12(i1*8); \ |
|||
VPINSRQ_1_SI_X11(i3*8); \ |
|||
VINSERTI128 $1, X11, Y12, Y12 |
|||
|
|||
// load msg: Y13 = (i0, i1, i2, i3) |
|||
// i0, i1, i2, i3 must not be 0 |
|||
#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \ |
|||
VMOVQ_SI_X13(i0*8); \ |
|||
VMOVQ_SI_X11(i2*8); \ |
|||
VPINSRQ_1_SI_X13(i1*8); \ |
|||
VPINSRQ_1_SI_X11(i3*8); \ |
|||
VINSERTI128 $1, X11, Y13, Y13 |
|||
|
|||
// load msg: Y14 = (i0, i1, i2, i3) |
|||
// i0, i1, i2, i3 must not be 0 |
|||
#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \ |
|||
VMOVQ_SI_X14(i0*8); \ |
|||
VMOVQ_SI_X11(i2*8); \ |
|||
VPINSRQ_1_SI_X14(i1*8); \ |
|||
VPINSRQ_1_SI_X11(i3*8); \ |
|||
VINSERTI128 $1, X11, Y14, Y14 |
|||
|
|||
// load msg: Y15 = (i0, i1, i2, i3) |
|||
// i0, i1, i2, i3 must not be 0 |
|||
#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \ |
|||
VMOVQ_SI_X15(i0*8); \ |
|||
VMOVQ_SI_X11(i2*8); \ |
|||
VPINSRQ_1_SI_X15(i1*8); \ |
|||
VPINSRQ_1_SI_X11(i3*8); \ |
|||
VINSERTI128 $1, X11, Y15, Y15 |
|||
|
|||
#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \ |
|||
VMOVQ_SI_X12_0; \ |
|||
VMOVQ_SI_X11(4*8); \ |
|||
VPINSRQ_1_SI_X12(2*8); \ |
|||
VPINSRQ_1_SI_X11(6*8); \ |
|||
VINSERTI128 $1, X11, Y12, Y12; \ |
|||
LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \ |
|||
LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \ |
|||
LOAD_MSG_AVX2_Y15(9, 11, 13, 15) |
|||
|
|||
#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \ |
|||
LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \ |
|||
LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \ |
|||
VMOVQ_SI_X11(11*8); \ |
|||
VPSHUFD $0x4E, 0*8(SI), X14; \ |
|||
VPINSRQ_1_SI_X11(5*8); \ |
|||
VINSERTI128 $1, X11, Y14, Y14; \ |
|||
LOAD_MSG_AVX2_Y15(12, 2, 7, 3) |
|||
|
|||
#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \ |
|||
VMOVQ_SI_X11(5*8); \ |
|||
VMOVDQU 11*8(SI), X12; \ |
|||
VPINSRQ_1_SI_X11(15*8); \ |
|||
VINSERTI128 $1, X11, Y12, Y12; \ |
|||
VMOVQ_SI_X13(8*8); \ |
|||
VMOVQ_SI_X11(2*8); \ |
|||
VPINSRQ_1_SI_X13_0; \ |
|||
VPINSRQ_1_SI_X11(13*8); \ |
|||
VINSERTI128 $1, X11, Y13, Y13; \ |
|||
LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \ |
|||
LOAD_MSG_AVX2_Y15(14, 6, 1, 4) |
|||
|
|||
#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \ |
|||
LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \ |
|||
LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \ |
|||
LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \ |
|||
VMOVQ_SI_X15(6*8); \ |
|||
VMOVQ_SI_X11_0; \ |
|||
VPINSRQ_1_SI_X15(10*8); \ |
|||
VPINSRQ_1_SI_X11(8*8); \ |
|||
VINSERTI128 $1, X11, Y15, Y15 |
|||
|
|||
#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \ |
|||
LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \ |
|||
VMOVQ_SI_X13_0; \ |
|||
VMOVQ_SI_X11(4*8); \ |
|||
VPINSRQ_1_SI_X13(7*8); \ |
|||
VPINSRQ_1_SI_X11(15*8); \ |
|||
VINSERTI128 $1, X11, Y13, Y13; \ |
|||
LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \ |
|||
LOAD_MSG_AVX2_Y15(1, 12, 8, 13) |
|||
|
|||
#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \ |
|||
VMOVQ_SI_X12(2*8); \ |
|||
VMOVQ_SI_X11_0; \ |
|||
VPINSRQ_1_SI_X12(6*8); \ |
|||
VPINSRQ_1_SI_X11(8*8); \ |
|||
VINSERTI128 $1, X11, Y12, Y12; \ |
|||
LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \ |
|||
LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \ |
|||
LOAD_MSG_AVX2_Y15(13, 5, 14, 9) |
|||
|
|||
#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \ |
|||
LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \ |
|||
LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \ |
|||
VMOVQ_SI_X14_0; \ |
|||
VPSHUFD $0x4E, 8*8(SI), X11; \ |
|||
VPINSRQ_1_SI_X14(6*8); \ |
|||
VINSERTI128 $1, X11, Y14, Y14; \ |
|||
LOAD_MSG_AVX2_Y15(7, 3, 2, 11) |
|||
|
|||
#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \ |
|||
LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \ |
|||
LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \ |
|||
LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \ |
|||
VMOVQ_SI_X15_0; \ |
|||
VMOVQ_SI_X11(6*8); \ |
|||
VPINSRQ_1_SI_X15(4*8); \ |
|||
VPINSRQ_1_SI_X11(10*8); \ |
|||
VINSERTI128 $1, X11, Y15, Y15 |
|||
|
|||
#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \ |
|||
VMOVQ_SI_X12(6*8); \ |
|||
VMOVQ_SI_X11(11*8); \ |
|||
VPINSRQ_1_SI_X12(14*8); \ |
|||
VPINSRQ_1_SI_X11_0; \ |
|||
VINSERTI128 $1, X11, Y12, Y12; \ |
|||
LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \ |
|||
VMOVQ_SI_X11(1*8); \ |
|||
VMOVDQU 12*8(SI), X14; \ |
|||
VPINSRQ_1_SI_X11(10*8); \ |
|||
VINSERTI128 $1, X11, Y14, Y14; \ |
|||
VMOVQ_SI_X15(2*8); \ |
|||
VMOVDQU 4*8(SI), X11; \ |
|||
VPINSRQ_1_SI_X15(7*8); \ |
|||
VINSERTI128 $1, X11, Y15, Y15 |
|||
|
|||
#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \ |
|||
LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \ |
|||
VMOVQ_SI_X13(2*8); \ |
|||
VPSHUFD $0x4E, 5*8(SI), X11; \ |
|||
VPINSRQ_1_SI_X13(4*8); \ |
|||
VINSERTI128 $1, X11, Y13, Y13; \ |
|||
LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \ |
|||
VMOVQ_SI_X15(11*8); \ |
|||
VMOVQ_SI_X11(12*8); \ |
|||
VPINSRQ_1_SI_X15(14*8); \ |
|||
VPINSRQ_1_SI_X11_0; \ |
|||
VINSERTI128 $1, X11, Y15, Y15 |
|||
|
|||
// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment |
|||
MOVQ h+0(FP), AX |
|||
MOVQ c+8(FP), BX |
|||
MOVQ flag+16(FP), CX |
|||
MOVQ blocks_base+24(FP), SI |
|||
MOVQ blocks_len+32(FP), DI |
|||
|
|||
MOVQ SP, DX |
|||
ADDQ $31, DX |
|||
ANDQ $~31, DX |
|||
|
|||
MOVQ CX, 16(DX) |
|||
XORQ CX, CX |
|||
MOVQ CX, 24(DX) |
|||
|
|||
VMOVDQU ·AVX2_c40<>(SB), Y4 |
|||
VMOVDQU ·AVX2_c48<>(SB), Y5 |
|||
|
|||
VMOVDQU 0(AX), Y8 |
|||
VMOVDQU 32(AX), Y9 |
|||
VMOVDQU ·AVX2_iv0<>(SB), Y6 |
|||
VMOVDQU ·AVX2_iv1<>(SB), Y7 |
|||
|
|||
MOVQ 0(BX), R8 |
|||
MOVQ 8(BX), R9 |
|||
MOVQ R9, 8(DX) |
|||
|
|||
loop: |
|||
ADDQ $128, R8 |
|||
MOVQ R8, 0(DX) |
|||
CMPQ R8, $128 |
|||
JGE noinc |
|||
INCQ R9 |
|||
MOVQ R9, 8(DX) |
|||
|
|||
noinc: |
|||
VMOVDQA Y8, Y0 |
|||
VMOVDQA Y9, Y1 |
|||
VMOVDQA Y6, Y2 |
|||
VPXOR 0(DX), Y7, Y3 |
|||
|
|||
LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() |
|||
VMOVDQA Y12, 32(DX) |
|||
VMOVDQA Y13, 64(DX) |
|||
VMOVDQA Y14, 96(DX) |
|||
VMOVDQA Y15, 128(DX) |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() |
|||
VMOVDQA Y12, 160(DX) |
|||
VMOVDQA Y13, 192(DX) |
|||
VMOVDQA Y14, 224(DX) |
|||
VMOVDQA Y15, 256(DX) |
|||
|
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() |
|||
ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5) |
|||
|
|||
ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5) |
|||
ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5) |
|||
|
|||
VPXOR Y0, Y8, Y8 |
|||
VPXOR Y1, Y9, Y9 |
|||
VPXOR Y2, Y8, Y8 |
|||
VPXOR Y3, Y9, Y9 |
|||
|
|||
LEAQ 128(SI), SI |
|||
SUBQ $128, DI |
|||
JNE loop |
|||
|
|||
MOVQ R8, 0(BX) |
|||
MOVQ R9, 8(BX) |
|||
|
|||
VMOVDQU Y8, 0(AX) |
|||
VMOVDQU Y9, 32(AX) |
|||
VZEROUPPER |
|||
|
|||
RET |
|||
|
|||
#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA |
|||
#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB |
|||
#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF |
|||
#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD |
|||
#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE |
|||
|
|||
#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7 |
|||
#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF |
|||
#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7 |
|||
#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF |
|||
#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7 |
|||
#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7 |
|||
#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF |
|||
#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF |
|||
|
|||
#define SHUFFLE_AVX() \ |
|||
VMOVDQA X6, X13; \ |
|||
VMOVDQA X2, X14; \ |
|||
VMOVDQA X4, X6; \ |
|||
VPUNPCKLQDQ_X13_X13_X15; \ |
|||
VMOVDQA X5, X4; \ |
|||
VMOVDQA X6, X5; \ |
|||
VPUNPCKHQDQ_X15_X7_X6; \ |
|||
VPUNPCKLQDQ_X7_X7_X15; \ |
|||
VPUNPCKHQDQ_X15_X13_X7; \ |
|||
VPUNPCKLQDQ_X3_X3_X15; \ |
|||
VPUNPCKHQDQ_X15_X2_X2; \ |
|||
VPUNPCKLQDQ_X14_X14_X15; \ |
|||
VPUNPCKHQDQ_X15_X3_X3; \ |
|||
|
|||
#define SHUFFLE_AVX_INV() \ |
|||
VMOVDQA X2, X13; \ |
|||
VMOVDQA X4, X14; \ |
|||
VPUNPCKLQDQ_X2_X2_X15; \ |
|||
VMOVDQA X5, X4; \ |
|||
VPUNPCKHQDQ_X15_X3_X2; \ |
|||
VMOVDQA X14, X5; \ |
|||
VPUNPCKLQDQ_X3_X3_X15; \ |
|||
VMOVDQA X6, X14; \ |
|||
VPUNPCKHQDQ_X15_X13_X3; \ |
|||
VPUNPCKLQDQ_X7_X7_X15; \ |
|||
VPUNPCKHQDQ_X15_X6_X6; \ |
|||
VPUNPCKLQDQ_X14_X14_X15; \ |
|||
VPUNPCKHQDQ_X15_X7_X7; \ |
|||
|
|||
#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ |
|||
VPADDQ m0, v0, v0; \ |
|||
VPADDQ v2, v0, v0; \ |
|||
VPADDQ m1, v1, v1; \ |
|||
VPADDQ v3, v1, v1; \ |
|||
VPXOR v0, v6, v6; \ |
|||
VPXOR v1, v7, v7; \ |
|||
VPSHUFD $-79, v6, v6; \ |
|||
VPSHUFD $-79, v7, v7; \ |
|||
VPADDQ v6, v4, v4; \ |
|||
VPADDQ v7, v5, v5; \ |
|||
VPXOR v4, v2, v2; \ |
|||
VPXOR v5, v3, v3; \ |
|||
VPSHUFB c40, v2, v2; \ |
|||
VPSHUFB c40, v3, v3; \ |
|||
VPADDQ m2, v0, v0; \ |
|||
VPADDQ v2, v0, v0; \ |
|||
VPADDQ m3, v1, v1; \ |
|||
VPADDQ v3, v1, v1; \ |
|||
VPXOR v0, v6, v6; \ |
|||
VPXOR v1, v7, v7; \ |
|||
VPSHUFB c48, v6, v6; \ |
|||
VPSHUFB c48, v7, v7; \ |
|||
VPADDQ v6, v4, v4; \ |
|||
VPADDQ v7, v5, v5; \ |
|||
VPXOR v4, v2, v2; \ |
|||
VPXOR v5, v3, v3; \ |
|||
VPADDQ v2, v2, t0; \ |
|||
VPSRLQ $63, v2, v2; \ |
|||
VPXOR t0, v2, v2; \ |
|||
VPADDQ v3, v3, t0; \ |
|||
VPSRLQ $63, v3, v3; \ |
|||
VPXOR t0, v3, v3 |
|||
|
|||
// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7) |
|||
// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0 |
|||
#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \ |
|||
VMOVQ_SI_X12(i0*8); \ |
|||
VMOVQ_SI_X13(i2*8); \ |
|||
VMOVQ_SI_X14(i4*8); \ |
|||
VMOVQ_SI_X15(i6*8); \ |
|||
VPINSRQ_1_SI_X12(i1*8); \ |
|||
VPINSRQ_1_SI_X13(i3*8); \ |
|||
VPINSRQ_1_SI_X14(i5*8); \ |
|||
VPINSRQ_1_SI_X15(i7*8) |
|||
|
|||
// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7) |
|||
#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \ |
|||
VMOVQ_SI_X12_0; \ |
|||
VMOVQ_SI_X13(4*8); \ |
|||
VMOVQ_SI_X14(1*8); \ |
|||
VMOVQ_SI_X15(5*8); \ |
|||
VPINSRQ_1_SI_X12(2*8); \ |
|||
VPINSRQ_1_SI_X13(6*8); \ |
|||
VPINSRQ_1_SI_X14(3*8); \ |
|||
VPINSRQ_1_SI_X15(7*8) |
|||
|
|||
// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3) |
|||
#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \ |
|||
VPSHUFD $0x4E, 0*8(SI), X12; \ |
|||
VMOVQ_SI_X13(11*8); \ |
|||
VMOVQ_SI_X14(12*8); \ |
|||
VMOVQ_SI_X15(7*8); \ |
|||
VPINSRQ_1_SI_X13(5*8); \ |
|||
VPINSRQ_1_SI_X14(2*8); \ |
|||
VPINSRQ_1_SI_X15(3*8) |
|||
|
|||
// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13) |
|||
#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \ |
|||
VMOVDQU 11*8(SI), X12; \ |
|||
VMOVQ_SI_X13(5*8); \ |
|||
VMOVQ_SI_X14(8*8); \ |
|||
VMOVQ_SI_X15(2*8); \ |
|||
VPINSRQ_1_SI_X13(15*8); \ |
|||
VPINSRQ_1_SI_X14_0; \ |
|||
VPINSRQ_1_SI_X15(13*8) |
|||
|
|||
// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8) |
|||
#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \ |
|||
VMOVQ_SI_X12(2*8); \ |
|||
VMOVQ_SI_X13(4*8); \ |
|||
VMOVQ_SI_X14(6*8); \ |
|||
VMOVQ_SI_X15_0; \ |
|||
VPINSRQ_1_SI_X12(5*8); \ |
|||
VPINSRQ_1_SI_X13(15*8); \ |
|||
VPINSRQ_1_SI_X14(10*8); \ |
|||
VPINSRQ_1_SI_X15(8*8) |
|||
|
|||
// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15) |
|||
#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \ |
|||
VMOVQ_SI_X12(9*8); \ |
|||
VMOVQ_SI_X13(2*8); \ |
|||
VMOVQ_SI_X14_0; \ |
|||
VMOVQ_SI_X15(4*8); \ |
|||
VPINSRQ_1_SI_X12(5*8); \ |
|||
VPINSRQ_1_SI_X13(10*8); \ |
|||
VPINSRQ_1_SI_X14(7*8); \ |
|||
VPINSRQ_1_SI_X15(15*8) |
|||
|
|||
// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3) |
|||
#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \ |
|||
VMOVQ_SI_X12(2*8); \ |
|||
VMOVQ_SI_X13_0; \ |
|||
VMOVQ_SI_X14(12*8); \ |
|||
VMOVQ_SI_X15(11*8); \ |
|||
VPINSRQ_1_SI_X12(6*8); \ |
|||
VPINSRQ_1_SI_X13(8*8); \ |
|||
VPINSRQ_1_SI_X14(10*8); \ |
|||
VPINSRQ_1_SI_X15(3*8) |
|||
|
|||
// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11) |
|||
#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \ |
|||
MOVQ 0*8(SI), X12; \ |
|||
VPSHUFD $0x4E, 8*8(SI), X13; \ |
|||
MOVQ 7*8(SI), X14; \ |
|||
MOVQ 2*8(SI), X15; \ |
|||
VPINSRQ_1_SI_X12(6*8); \ |
|||
VPINSRQ_1_SI_X14(3*8); \ |
|||
VPINSRQ_1_SI_X15(11*8) |
|||
|
|||
// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8) |
|||
#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \ |
|||
MOVQ 6*8(SI), X12; \ |
|||
MOVQ 11*8(SI), X13; \ |
|||
MOVQ 15*8(SI), X14; \ |
|||
MOVQ 3*8(SI), X15; \ |
|||
VPINSRQ_1_SI_X12(14*8); \ |
|||
VPINSRQ_1_SI_X13_0; \ |
|||
VPINSRQ_1_SI_X14(9*8); \ |
|||
VPINSRQ_1_SI_X15(8*8) |
|||
|
|||
// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10) |
|||
#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \ |
|||
MOVQ 5*8(SI), X12; \ |
|||
MOVQ 8*8(SI), X13; \ |
|||
MOVQ 0*8(SI), X14; \ |
|||
MOVQ 6*8(SI), X15; \ |
|||
VPINSRQ_1_SI_X12(15*8); \ |
|||
VPINSRQ_1_SI_X13(2*8); \ |
|||
VPINSRQ_1_SI_X14(4*8); \ |
|||
VPINSRQ_1_SI_X15(10*8) |
|||
|
|||
// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5) |
|||
#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \ |
|||
VMOVDQU 12*8(SI), X12; \ |
|||
MOVQ 1*8(SI), X13; \ |
|||
MOVQ 2*8(SI), X14; \ |
|||
VPINSRQ_1_SI_X13(10*8); \ |
|||
VPINSRQ_1_SI_X14(7*8); \ |
|||
VMOVDQU 4*8(SI), X15 |
|||
|
|||
// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0) |
|||
#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \ |
|||
MOVQ 15*8(SI), X12; \ |
|||
MOVQ 3*8(SI), X13; \ |
|||
MOVQ 11*8(SI), X14; \ |
|||
MOVQ 12*8(SI), X15; \ |
|||
VPINSRQ_1_SI_X12(9*8); \ |
|||
VPINSRQ_1_SI_X13(13*8); \ |
|||
VPINSRQ_1_SI_X14(14*8); \ |
|||
VPINSRQ_1_SI_X15_0 |
|||
|
|||
// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment |
|||
MOVQ h+0(FP), AX |
|||
MOVQ c+8(FP), BX |
|||
MOVQ flag+16(FP), CX |
|||
MOVQ blocks_base+24(FP), SI |
|||
MOVQ blocks_len+32(FP), DI |
|||
|
|||
MOVQ SP, R10 |
|||
ADDQ $15, R10 |
|||
ANDQ $~15, R10 |
|||
|
|||
VMOVDQU ·AVX_c40<>(SB), X0 |
|||
VMOVDQU ·AVX_c48<>(SB), X1 |
|||
VMOVDQA X0, X8 |
|||
VMOVDQA X1, X9 |
|||
|
|||
VMOVDQU ·AVX_iv3<>(SB), X0 |
|||
VMOVDQA X0, 0(R10) |
|||
XORQ CX, 0(R10) // 0(R10) = ·AVX_iv3 ^ (CX || 0) |
|||
|
|||
VMOVDQU 0(AX), X10 |
|||
VMOVDQU 16(AX), X11 |
|||
VMOVDQU 32(AX), X2 |
|||
VMOVDQU 48(AX), X3 |
|||
|
|||
MOVQ 0(BX), R8 |
|||
MOVQ 8(BX), R9 |
|||
|
|||
loop: |
|||
ADDQ $128, R8 |
|||
CMPQ R8, $128 |
|||
JGE noinc |
|||
INCQ R9 |
|||
|
|||
noinc: |
|||
VMOVQ_R8_X15 |
|||
VPINSRQ_1_R9_X15 |
|||
|
|||
VMOVDQA X10, X0 |
|||
VMOVDQA X11, X1 |
|||
VMOVDQU ·AVX_iv0<>(SB), X4 |
|||
VMOVDQU ·AVX_iv1<>(SB), X5 |
|||
VMOVDQU ·AVX_iv2<>(SB), X6 |
|||
|
|||
VPXOR X15, X6, X6 |
|||
VMOVDQA 0(R10), X7 |
|||
|
|||
LOAD_MSG_AVX_0_2_4_6_1_3_5_7() |
|||
VMOVDQA X12, 16(R10) |
|||
VMOVDQA X13, 32(R10) |
|||
VMOVDQA X14, 48(R10) |
|||
VMOVDQA X15, 64(R10) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15) |
|||
VMOVDQA X12, 80(R10) |
|||
VMOVDQA X13, 96(R10) |
|||
VMOVDQA X14, 112(R10) |
|||
VMOVDQA X15, 128(R10) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6) |
|||
VMOVDQA X12, 144(R10) |
|||
VMOVDQA X13, 160(R10) |
|||
VMOVDQA X14, 176(R10) |
|||
VMOVDQA X15, 192(R10) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_1_0_11_5_12_2_7_3() |
|||
VMOVDQA X12, 208(R10) |
|||
VMOVDQA X13, 224(R10) |
|||
VMOVDQA X14, 240(R10) |
|||
VMOVDQA X15, 256(R10) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX_11_12_5_15_8_0_2_13() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_2_5_4_15_6_10_0_8() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX_9_5_2_10_0_7_4_15() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX_2_6_0_8_12_10_11_3() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_0_6_9_8_7_3_2_11() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_5_15_8_2_0_4_6_10() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX_6_14_11_0_15_9_3_8() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_12_13_1_10_2_7_4_5() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5) |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
LOAD_MSG_AVX_15_9_3_13_11_14_12_0() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9) |
|||
SHUFFLE_AVX() |
|||
HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9) |
|||
SHUFFLE_AVX_INV() |
|||
|
|||
VMOVDQU 32(AX), X14 |
|||
VMOVDQU 48(AX), X15 |
|||
VPXOR X0, X10, X10 |
|||
VPXOR X1, X11, X11 |
|||
VPXOR X2, X14, X14 |
|||
VPXOR X3, X15, X15 |
|||
VPXOR X4, X10, X10 |
|||
VPXOR X5, X11, X11 |
|||
VPXOR X6, X14, X2 |
|||
VPXOR X7, X15, X3 |
|||
VMOVDQU X2, 32(AX) |
|||
VMOVDQU X3, 48(AX) |
|||
|
|||
LEAQ 128(SI), SI |
|||
SUBQ $128, DI |
|||
JNE loop |
|||
|
|||
VMOVDQU X10, 0(AX) |
|||
VMOVDQU X11, 16(AX) |
|||
|
|||
MOVQ R8, 0(BX) |
|||
MOVQ R9, 8(BX) |
|||
VZEROUPPER |
|||
|
|||
RET |
@ -0,0 +1,278 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build amd64 && gc && !purego |
|||
|
|||
#include "textflag.h" |
|||
|
|||
DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908 |
|||
DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b |
|||
GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b |
|||
DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1 |
|||
GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1 |
|||
DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f |
|||
GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b |
|||
DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179 |
|||
GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·c40<>+0x00(SB)/8, $0x0201000706050403 |
|||
DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b |
|||
GLOBL ·c40<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
DATA ·c48<>+0x00(SB)/8, $0x0100070605040302 |
|||
DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a |
|||
GLOBL ·c48<>(SB), (NOPTR+RODATA), $16 |
|||
|
|||
#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \ |
|||
MOVO v4, t1; \ |
|||
MOVO v5, v4; \ |
|||
MOVO t1, v5; \ |
|||
MOVO v6, t1; \ |
|||
PUNPCKLQDQ v6, t2; \ |
|||
PUNPCKHQDQ v7, v6; \ |
|||
PUNPCKHQDQ t2, v6; \ |
|||
PUNPCKLQDQ v7, t2; \ |
|||
MOVO t1, v7; \ |
|||
MOVO v2, t1; \ |
|||
PUNPCKHQDQ t2, v7; \ |
|||
PUNPCKLQDQ v3, t2; \ |
|||
PUNPCKHQDQ t2, v2; \ |
|||
PUNPCKLQDQ t1, t2; \ |
|||
PUNPCKHQDQ t2, v3 |
|||
|
|||
#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \ |
|||
MOVO v4, t1; \ |
|||
MOVO v5, v4; \ |
|||
MOVO t1, v5; \ |
|||
MOVO v2, t1; \ |
|||
PUNPCKLQDQ v2, t2; \ |
|||
PUNPCKHQDQ v3, v2; \ |
|||
PUNPCKHQDQ t2, v2; \ |
|||
PUNPCKLQDQ v3, t2; \ |
|||
MOVO t1, v3; \ |
|||
MOVO v6, t1; \ |
|||
PUNPCKHQDQ t2, v3; \ |
|||
PUNPCKLQDQ v7, t2; \ |
|||
PUNPCKHQDQ t2, v6; \ |
|||
PUNPCKLQDQ t1, t2; \ |
|||
PUNPCKHQDQ t2, v7 |
|||
|
|||
#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \ |
|||
PADDQ m0, v0; \ |
|||
PADDQ m1, v1; \ |
|||
PADDQ v2, v0; \ |
|||
PADDQ v3, v1; \ |
|||
PXOR v0, v6; \ |
|||
PXOR v1, v7; \ |
|||
PSHUFD $0xB1, v6, v6; \ |
|||
PSHUFD $0xB1, v7, v7; \ |
|||
PADDQ v6, v4; \ |
|||
PADDQ v7, v5; \ |
|||
PXOR v4, v2; \ |
|||
PXOR v5, v3; \ |
|||
PSHUFB c40, v2; \ |
|||
PSHUFB c40, v3; \ |
|||
PADDQ m2, v0; \ |
|||
PADDQ m3, v1; \ |
|||
PADDQ v2, v0; \ |
|||
PADDQ v3, v1; \ |
|||
PXOR v0, v6; \ |
|||
PXOR v1, v7; \ |
|||
PSHUFB c48, v6; \ |
|||
PSHUFB c48, v7; \ |
|||
PADDQ v6, v4; \ |
|||
PADDQ v7, v5; \ |
|||
PXOR v4, v2; \ |
|||
PXOR v5, v3; \ |
|||
MOVOU v2, t0; \ |
|||
PADDQ v2, t0; \ |
|||
PSRLQ $63, v2; \ |
|||
PXOR t0, v2; \ |
|||
MOVOU v3, t0; \ |
|||
PADDQ v3, t0; \ |
|||
PSRLQ $63, v3; \ |
|||
PXOR t0, v3 |
|||
|
|||
#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \ |
|||
MOVQ i0*8(src), m0; \ |
|||
PINSRQ $1, i1*8(src), m0; \ |
|||
MOVQ i2*8(src), m1; \ |
|||
PINSRQ $1, i3*8(src), m1; \ |
|||
MOVQ i4*8(src), m2; \ |
|||
PINSRQ $1, i5*8(src), m2; \ |
|||
MOVQ i6*8(src), m3; \ |
|||
PINSRQ $1, i7*8(src), m3 |
|||
|
|||
// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) |
|||
TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment |
|||
MOVQ h+0(FP), AX |
|||
MOVQ c+8(FP), BX |
|||
MOVQ flag+16(FP), CX |
|||
MOVQ blocks_base+24(FP), SI |
|||
MOVQ blocks_len+32(FP), DI |
|||
|
|||
MOVQ SP, R10 |
|||
ADDQ $15, R10 |
|||
ANDQ $~15, R10 |
|||
|
|||
MOVOU ·iv3<>(SB), X0 |
|||
MOVO X0, 0(R10) |
|||
XORQ CX, 0(R10) // 0(R10) = ·iv3 ^ (CX || 0) |
|||
|
|||
MOVOU ·c40<>(SB), X13 |
|||
MOVOU ·c48<>(SB), X14 |
|||
|
|||
MOVOU 0(AX), X12 |
|||
MOVOU 16(AX), X15 |
|||
|
|||
MOVQ 0(BX), R8 |
|||
MOVQ 8(BX), R9 |
|||
|
|||
loop: |
|||
ADDQ $128, R8 |
|||
CMPQ R8, $128 |
|||
JGE noinc |
|||
INCQ R9 |
|||
|
|||
noinc: |
|||
MOVQ R8, X8 |
|||
PINSRQ $1, R9, X8 |
|||
|
|||
MOVO X12, X0 |
|||
MOVO X15, X1 |
|||
MOVOU 32(AX), X2 |
|||
MOVOU 48(AX), X3 |
|||
MOVOU ·iv0<>(SB), X4 |
|||
MOVOU ·iv1<>(SB), X5 |
|||
MOVOU ·iv2<>(SB), X6 |
|||
|
|||
PXOR X8, X6 |
|||
MOVO 0(R10), X7 |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7) |
|||
MOVO X8, 16(R10) |
|||
MOVO X9, 32(R10) |
|||
MOVO X10, 48(R10) |
|||
MOVO X11, 64(R10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15) |
|||
MOVO X8, 80(R10) |
|||
MOVO X9, 96(R10) |
|||
MOVO X10, 112(R10) |
|||
MOVO X11, 128(R10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6) |
|||
MOVO X8, 144(R10) |
|||
MOVO X9, 160(R10) |
|||
MOVO X10, 176(R10) |
|||
MOVO X11, 192(R10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3) |
|||
MOVO X8, 208(R10) |
|||
MOVO X9, 224(R10) |
|||
MOVO X10, 240(R10) |
|||
MOVO X11, 256(R10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14) |
|||
SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14) |
|||
SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9) |
|||
|
|||
MOVOU 32(AX), X10 |
|||
MOVOU 48(AX), X11 |
|||
PXOR X0, X12 |
|||
PXOR X1, X15 |
|||
PXOR X2, X10 |
|||
PXOR X3, X11 |
|||
PXOR X4, X12 |
|||
PXOR X5, X15 |
|||
PXOR X6, X10 |
|||
PXOR X7, X11 |
|||
MOVOU X10, 32(AX) |
|||
MOVOU X11, 48(AX) |
|||
|
|||
LEAQ 128(SI), SI |
|||
SUBQ $128, DI |
|||
JNE loop |
|||
|
|||
MOVOU X12, 0(AX) |
|||
MOVOU X15, 16(AX) |
|||
|
|||
MOVQ R8, 0(BX) |
|||
MOVQ R9, 8(BX) |
|||
|
|||
RET |
@ -0,0 +1,182 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package blake2b |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"math/bits" |
|||
) |
|||
|
|||
// the precomputed values for BLAKE2b
|
|||
// there are 12 16-byte arrays - one for each round
|
|||
// the entries are calculated from the sigma constants.
|
|||
var precomputed = [12][16]byte{ |
|||
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, |
|||
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, |
|||
{11, 12, 5, 15, 8, 0, 2, 13, 10, 3, 7, 9, 14, 6, 1, 4}, |
|||
{7, 3, 13, 11, 9, 1, 12, 14, 2, 5, 4, 15, 6, 10, 0, 8}, |
|||
{9, 5, 2, 10, 0, 7, 4, 15, 14, 11, 6, 3, 1, 12, 8, 13}, |
|||
{2, 6, 0, 8, 12, 10, 11, 3, 4, 7, 15, 1, 13, 5, 14, 9}, |
|||
{12, 1, 14, 4, 5, 15, 13, 10, 0, 6, 9, 8, 7, 3, 2, 11}, |
|||
{13, 7, 12, 3, 11, 14, 1, 9, 5, 15, 8, 2, 0, 4, 6, 10}, |
|||
{6, 14, 11, 0, 15, 9, 3, 8, 12, 13, 1, 10, 2, 7, 4, 5}, |
|||
{10, 8, 7, 1, 2, 4, 6, 5, 15, 9, 3, 13, 11, 14, 12, 0}, |
|||
{0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14, 9, 11, 13, 15}, // equal to the first
|
|||
{14, 4, 9, 13, 10, 8, 15, 6, 1, 0, 11, 5, 12, 2, 7, 3}, // equal to the second
|
|||
} |
|||
|
|||
func hashBlocksGeneric(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { |
|||
var m [16]uint64 |
|||
c0, c1 := c[0], c[1] |
|||
|
|||
for i := 0; i < len(blocks); { |
|||
c0 += BlockSize |
|||
if c0 < BlockSize { |
|||
c1++ |
|||
} |
|||
|
|||
v0, v1, v2, v3, v4, v5, v6, v7 := h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7] |
|||
v8, v9, v10, v11, v12, v13, v14, v15 := iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7] |
|||
v12 ^= c0 |
|||
v13 ^= c1 |
|||
v14 ^= flag |
|||
|
|||
for j := range m { |
|||
m[j] = binary.LittleEndian.Uint64(blocks[i:]) |
|||
i += 8 |
|||
} |
|||
|
|||
for j := range precomputed { |
|||
s := &(precomputed[j]) |
|||
|
|||
v0 += m[s[0]] |
|||
v0 += v4 |
|||
v12 ^= v0 |
|||
v12 = bits.RotateLeft64(v12, -32) |
|||
v8 += v12 |
|||
v4 ^= v8 |
|||
v4 = bits.RotateLeft64(v4, -24) |
|||
v1 += m[s[1]] |
|||
v1 += v5 |
|||
v13 ^= v1 |
|||
v13 = bits.RotateLeft64(v13, -32) |
|||
v9 += v13 |
|||
v5 ^= v9 |
|||
v5 = bits.RotateLeft64(v5, -24) |
|||
v2 += m[s[2]] |
|||
v2 += v6 |
|||
v14 ^= v2 |
|||
v14 = bits.RotateLeft64(v14, -32) |
|||
v10 += v14 |
|||
v6 ^= v10 |
|||
v6 = bits.RotateLeft64(v6, -24) |
|||
v3 += m[s[3]] |
|||
v3 += v7 |
|||
v15 ^= v3 |
|||
v15 = bits.RotateLeft64(v15, -32) |
|||
v11 += v15 |
|||
v7 ^= v11 |
|||
v7 = bits.RotateLeft64(v7, -24) |
|||
|
|||
v0 += m[s[4]] |
|||
v0 += v4 |
|||
v12 ^= v0 |
|||
v12 = bits.RotateLeft64(v12, -16) |
|||
v8 += v12 |
|||
v4 ^= v8 |
|||
v4 = bits.RotateLeft64(v4, -63) |
|||
v1 += m[s[5]] |
|||
v1 += v5 |
|||
v13 ^= v1 |
|||
v13 = bits.RotateLeft64(v13, -16) |
|||
v9 += v13 |
|||
v5 ^= v9 |
|||
v5 = bits.RotateLeft64(v5, -63) |
|||
v2 += m[s[6]] |
|||
v2 += v6 |
|||
v14 ^= v2 |
|||
v14 = bits.RotateLeft64(v14, -16) |
|||
v10 += v14 |
|||
v6 ^= v10 |
|||
v6 = bits.RotateLeft64(v6, -63) |
|||
v3 += m[s[7]] |
|||
v3 += v7 |
|||
v15 ^= v3 |
|||
v15 = bits.RotateLeft64(v15, -16) |
|||
v11 += v15 |
|||
v7 ^= v11 |
|||
v7 = bits.RotateLeft64(v7, -63) |
|||
|
|||
v0 += m[s[8]] |
|||
v0 += v5 |
|||
v15 ^= v0 |
|||
v15 = bits.RotateLeft64(v15, -32) |
|||
v10 += v15 |
|||
v5 ^= v10 |
|||
v5 = bits.RotateLeft64(v5, -24) |
|||
v1 += m[s[9]] |
|||
v1 += v6 |
|||
v12 ^= v1 |
|||
v12 = bits.RotateLeft64(v12, -32) |
|||
v11 += v12 |
|||
v6 ^= v11 |
|||
v6 = bits.RotateLeft64(v6, -24) |
|||
v2 += m[s[10]] |
|||
v2 += v7 |
|||
v13 ^= v2 |
|||
v13 = bits.RotateLeft64(v13, -32) |
|||
v8 += v13 |
|||
v7 ^= v8 |
|||
v7 = bits.RotateLeft64(v7, -24) |
|||
v3 += m[s[11]] |
|||
v3 += v4 |
|||
v14 ^= v3 |
|||
v14 = bits.RotateLeft64(v14, -32) |
|||
v9 += v14 |
|||
v4 ^= v9 |
|||
v4 = bits.RotateLeft64(v4, -24) |
|||
|
|||
v0 += m[s[12]] |
|||
v0 += v5 |
|||
v15 ^= v0 |
|||
v15 = bits.RotateLeft64(v15, -16) |
|||
v10 += v15 |
|||
v5 ^= v10 |
|||
v5 = bits.RotateLeft64(v5, -63) |
|||
v1 += m[s[13]] |
|||
v1 += v6 |
|||
v12 ^= v1 |
|||
v12 = bits.RotateLeft64(v12, -16) |
|||
v11 += v12 |
|||
v6 ^= v11 |
|||
v6 = bits.RotateLeft64(v6, -63) |
|||
v2 += m[s[14]] |
|||
v2 += v7 |
|||
v13 ^= v2 |
|||
v13 = bits.RotateLeft64(v13, -16) |
|||
v8 += v13 |
|||
v7 ^= v8 |
|||
v7 = bits.RotateLeft64(v7, -63) |
|||
v3 += m[s[15]] |
|||
v3 += v4 |
|||
v14 ^= v3 |
|||
v14 = bits.RotateLeft64(v14, -16) |
|||
v9 += v14 |
|||
v4 ^= v9 |
|||
v4 = bits.RotateLeft64(v4, -63) |
|||
|
|||
} |
|||
|
|||
h[0] ^= v0 ^ v8 |
|||
h[1] ^= v1 ^ v9 |
|||
h[2] ^= v2 ^ v10 |
|||
h[3] ^= v3 ^ v11 |
|||
h[4] ^= v4 ^ v12 |
|||
h[5] ^= v5 ^ v13 |
|||
h[6] ^= v6 ^ v14 |
|||
h[7] ^= v7 ^ v15 |
|||
} |
|||
c[0], c[1] = c0, c1 |
|||
} |
@ -0,0 +1,11 @@ |
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !amd64 || purego || !gc
|
|||
|
|||
package blake2b |
|||
|
|||
func hashBlocks(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte) { |
|||
hashBlocksGeneric(h, c, flag, blocks) |
|||
} |
@ -0,0 +1,177 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package blake2b |
|||
|
|||
import ( |
|||
"encoding/binary" |
|||
"errors" |
|||
"io" |
|||
) |
|||
|
|||
// XOF defines the interface to hash functions that
|
|||
// support arbitrary-length output.
|
|||
type XOF interface { |
|||
// Write absorbs more data into the hash's state. It panics if called
|
|||
// after Read.
|
|||
io.Writer |
|||
|
|||
// Read reads more output from the hash. It returns io.EOF if the limit
|
|||
// has been reached.
|
|||
io.Reader |
|||
|
|||
// Clone returns a copy of the XOF in its current state.
|
|||
Clone() XOF |
|||
|
|||
// Reset resets the XOF to its initial state.
|
|||
Reset() |
|||
} |
|||
|
|||
// OutputLengthUnknown can be used as the size argument to NewXOF to indicate
|
|||
// the length of the output is not known in advance.
|
|||
const OutputLengthUnknown = 0 |
|||
|
|||
// magicUnknownOutputLength is a magic value for the output size that indicates
|
|||
// an unknown number of output bytes.
|
|||
const magicUnknownOutputLength = (1 << 32) - 1 |
|||
|
|||
// maxOutputLength is the absolute maximum number of bytes to produce when the
|
|||
// number of output bytes is unknown.
|
|||
const maxOutputLength = (1 << 32) * 64 |
|||
|
|||
// NewXOF creates a new variable-output-length hash. The hash either produce a
|
|||
// known number of bytes (1 <= size < 2**32-1), or an unknown number of bytes
|
|||
// (size == OutputLengthUnknown). In the latter case, an absolute limit of
|
|||
// 256GiB applies.
|
|||
//
|
|||
// A non-nil key turns the hash into a MAC. The key must between
|
|||
// zero and 32 bytes long.
|
|||
func NewXOF(size uint32, key []byte) (XOF, error) { |
|||
if len(key) > Size { |
|||
return nil, errKeySize |
|||
} |
|||
if size == magicUnknownOutputLength { |
|||
// 2^32-1 indicates an unknown number of bytes and thus isn't a
|
|||
// valid length.
|
|||
return nil, errors.New("blake2b: XOF length too large") |
|||
} |
|||
if size == OutputLengthUnknown { |
|||
size = magicUnknownOutputLength |
|||
} |
|||
x := &xof{ |
|||
d: digest{ |
|||
size: Size, |
|||
keyLen: len(key), |
|||
}, |
|||
length: size, |
|||
} |
|||
copy(x.d.key[:], key) |
|||
x.Reset() |
|||
return x, nil |
|||
} |
|||
|
|||
type xof struct { |
|||
d digest |
|||
length uint32 |
|||
remaining uint64 |
|||
cfg, root, block [Size]byte |
|||
offset int |
|||
nodeOffset uint32 |
|||
readMode bool |
|||
} |
|||
|
|||
func (x *xof) Write(p []byte) (n int, err error) { |
|||
if x.readMode { |
|||
panic("blake2b: write to XOF after read") |
|||
} |
|||
return x.d.Write(p) |
|||
} |
|||
|
|||
func (x *xof) Clone() XOF { |
|||
clone := *x |
|||
return &clone |
|||
} |
|||
|
|||
func (x *xof) Reset() { |
|||
x.cfg[0] = byte(Size) |
|||
binary.LittleEndian.PutUint32(x.cfg[4:], uint32(Size)) // leaf length
|
|||
binary.LittleEndian.PutUint32(x.cfg[12:], x.length) // XOF length
|
|||
x.cfg[17] = byte(Size) // inner hash size
|
|||
|
|||
x.d.Reset() |
|||
x.d.h[1] ^= uint64(x.length) << 32 |
|||
|
|||
x.remaining = uint64(x.length) |
|||
if x.remaining == magicUnknownOutputLength { |
|||
x.remaining = maxOutputLength |
|||
} |
|||
x.offset, x.nodeOffset = 0, 0 |
|||
x.readMode = false |
|||
} |
|||
|
|||
func (x *xof) Read(p []byte) (n int, err error) { |
|||
if !x.readMode { |
|||
x.d.finalize(&x.root) |
|||
x.readMode = true |
|||
} |
|||
|
|||
if x.remaining == 0 { |
|||
return 0, io.EOF |
|||
} |
|||
|
|||
n = len(p) |
|||
if uint64(n) > x.remaining { |
|||
n = int(x.remaining) |
|||
p = p[:n] |
|||
} |
|||
|
|||
if x.offset > 0 { |
|||
blockRemaining := Size - x.offset |
|||
if n < blockRemaining { |
|||
x.offset += copy(p, x.block[x.offset:]) |
|||
x.remaining -= uint64(n) |
|||
return |
|||
} |
|||
copy(p, x.block[x.offset:]) |
|||
p = p[blockRemaining:] |
|||
x.offset = 0 |
|||
x.remaining -= uint64(blockRemaining) |
|||
} |
|||
|
|||
for len(p) >= Size { |
|||
binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset) |
|||
x.nodeOffset++ |
|||
|
|||
x.d.initConfig(&x.cfg) |
|||
x.d.Write(x.root[:]) |
|||
x.d.finalize(&x.block) |
|||
|
|||
copy(p, x.block[:]) |
|||
p = p[Size:] |
|||
x.remaining -= uint64(Size) |
|||
} |
|||
|
|||
if todo := len(p); todo > 0 { |
|||
if x.remaining < uint64(Size) { |
|||
x.cfg[0] = byte(x.remaining) |
|||
} |
|||
binary.LittleEndian.PutUint32(x.cfg[8:], x.nodeOffset) |
|||
x.nodeOffset++ |
|||
|
|||
x.d.initConfig(&x.cfg) |
|||
x.d.Write(x.root[:]) |
|||
x.d.finalize(&x.block) |
|||
|
|||
x.offset = copy(p, x.block[:todo]) |
|||
x.remaining -= uint64(todo) |
|||
} |
|||
return |
|||
} |
|||
|
|||
func (d *digest) initConfig(cfg *[Size]byte) { |
|||
d.offset, d.c[0], d.c[1] = 0, 0, 0 |
|||
for i := range d.h { |
|||
d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(cfg[i*8:]) |
|||
} |
|||
} |
@ -0,0 +1,30 @@ |
|||
// Copyright 2017 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package blake2b |
|||
|
|||
import ( |
|||
"crypto" |
|||
"hash" |
|||
) |
|||
|
|||
func init() { |
|||
newHash256 := func() hash.Hash { |
|||
h, _ := New256(nil) |
|||
return h |
|||
} |
|||
newHash384 := func() hash.Hash { |
|||
h, _ := New384(nil) |
|||
return h |
|||
} |
|||
|
|||
newHash512 := func() hash.Hash { |
|||
h, _ := New512(nil) |
|||
return h |
|||
} |
|||
|
|||
crypto.RegisterHash(crypto.BLAKE2b_256, newHash256) |
|||
crypto.RegisterHash(crypto.BLAKE2b_384, newHash384) |
|||
crypto.RegisterHash(crypto.BLAKE2b_512, newHash512) |
|||
} |
@ -1,51 +0,0 @@ |
|||
# |
|||
# This Dockerfile builds a recent curl with HTTP/2 client support, using |
|||
# a recent nghttp2 build. |
|||
# |
|||
# See the Makefile for how to tag it. If Docker and that image is found, the |
|||
# Go tests use this curl binary for integration tests. |
|||
# |
|||
|
|||
FROM ubuntu:trusty |
|||
|
|||
RUN apt-get update && \ |
|||
apt-get upgrade -y && \ |
|||
apt-get install -y git-core build-essential wget |
|||
|
|||
RUN apt-get install -y --no-install-recommends \ |
|||
autotools-dev libtool pkg-config zlib1g-dev \ |
|||
libcunit1-dev libssl-dev libxml2-dev libevent-dev \ |
|||
automake autoconf |
|||
|
|||
# The list of packages nghttp2 recommends for h2load: |
|||
RUN apt-get install -y --no-install-recommends make binutils \ |
|||
autoconf automake autotools-dev \ |
|||
libtool pkg-config zlib1g-dev libcunit1-dev libssl-dev libxml2-dev \ |
|||
libev-dev libevent-dev libjansson-dev libjemalloc-dev \ |
|||
cython python3.4-dev python-setuptools |
|||
|
|||
# Note: setting NGHTTP2_VER before the git clone, so an old git clone isn't cached: |
|||
ENV NGHTTP2_VER 895da9a |
|||
RUN cd /root && git clone https://github.com/tatsuhiro-t/nghttp2.git |
|||
|
|||
WORKDIR /root/nghttp2 |
|||
RUN git reset --hard $NGHTTP2_VER |
|||
RUN autoreconf -i |
|||
RUN automake |
|||
RUN autoconf |
|||
RUN ./configure |
|||
RUN make |
|||
RUN make install |
|||
|
|||
WORKDIR /root |
|||
RUN wget https://curl.se/download/curl-7.45.0.tar.gz |
|||
RUN tar -zxvf curl-7.45.0.tar.gz |
|||
WORKDIR /root/curl-7.45.0 |
|||
RUN ./configure --with-ssl --with-nghttp2=/usr/local |
|||
RUN make |
|||
RUN make install |
|||
RUN ldconfig |
|||
|
|||
CMD ["-h"] |
|||
ENTRYPOINT ["/usr/local/bin/curl"] |
|||
|
@ -1,3 +0,0 @@ |
|||
curlimage: |
|||
docker build -t gohttp2/curl . |
|||
|
@ -1,30 +0,0 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build go1.11
|
|||
// +build go1.11
|
|||
|
|||
package http2 |
|||
|
|||
import ( |
|||
"net/http/httptrace" |
|||
"net/textproto" |
|||
) |
|||
|
|||
func traceHasWroteHeaderField(trace *httptrace.ClientTrace) bool { |
|||
return trace != nil && trace.WroteHeaderField != nil |
|||
} |
|||
|
|||
func traceWroteHeaderField(trace *httptrace.ClientTrace, k, v string) { |
|||
if trace != nil && trace.WroteHeaderField != nil { |
|||
trace.WroteHeaderField(k, []string{v}) |
|||
} |
|||
} |
|||
|
|||
func traceGot1xxResponseFunc(trace *httptrace.ClientTrace) func(int, textproto.MIMEHeader) error { |
|||
if trace != nil { |
|||
return trace.Got1xxResponse |
|||
} |
|||
return nil |
|||
} |
@ -1,27 +0,0 @@ |
|||
// Copyright 2021 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build go1.15
|
|||
// +build go1.15
|
|||
|
|||
package http2 |
|||
|
|||
import ( |
|||
"context" |
|||
"crypto/tls" |
|||
) |
|||
|
|||
// dialTLSWithContext uses tls.Dialer, added in Go 1.15, to open a TLS
|
|||
// connection.
|
|||
func (t *Transport) dialTLSWithContext(ctx context.Context, network, addr string, cfg *tls.Config) (*tls.Conn, error) { |
|||
dialer := &tls.Dialer{ |
|||
Config: cfg, |
|||
} |
|||
cn, err := dialer.DialContext(ctx, network, addr) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
tlsCn := cn.(*tls.Conn) // DialContext comment promises this will always succeed
|
|||
return tlsCn, nil |
|||
} |
@ -1,17 +0,0 @@ |
|||
// Copyright 2021 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build go1.18
|
|||
// +build go1.18
|
|||
|
|||
package http2 |
|||
|
|||
import ( |
|||
"crypto/tls" |
|||
"net" |
|||
) |
|||
|
|||
func tlsUnderlyingConn(tc *tls.Conn) net.Conn { |
|||
return tc.NetConn() |
|||
} |
@ -1,21 +0,0 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !go1.11
|
|||
// +build !go1.11
|
|||
|
|||
package http2 |
|||
|
|||
import ( |
|||
"net/http/httptrace" |
|||
"net/textproto" |
|||
) |
|||
|
|||
func traceHasWroteHeaderField(trace *httptrace.ClientTrace) bool { return false } |
|||
|
|||
func traceWroteHeaderField(trace *httptrace.ClientTrace, k, v string) {} |
|||
|
|||
func traceGot1xxResponseFunc(trace *httptrace.ClientTrace) func(int, textproto.MIMEHeader) error { |
|||
return nil |
|||
} |
@ -1,31 +0,0 @@ |
|||
// Copyright 2021 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !go1.15
|
|||
// +build !go1.15
|
|||
|
|||
package http2 |
|||
|
|||
import ( |
|||
"context" |
|||
"crypto/tls" |
|||
) |
|||
|
|||
// dialTLSWithContext opens a TLS connection.
|
|||
func (t *Transport) dialTLSWithContext(ctx context.Context, network, addr string, cfg *tls.Config) (*tls.Conn, error) { |
|||
cn, err := tls.Dial(network, addr, cfg) |
|||
if err != nil { |
|||
return nil, err |
|||
} |
|||
if err := cn.Handshake(); err != nil { |
|||
return nil, err |
|||
} |
|||
if cfg.InsecureSkipVerify { |
|||
return cn, nil |
|||
} |
|||
if err := cn.VerifyHostname(cfg.ServerName); err != nil { |
|||
return nil, err |
|||
} |
|||
return cn, nil |
|||
} |
@ -1,17 +0,0 @@ |
|||
// Copyright 2021 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !go1.18
|
|||
// +build !go1.18
|
|||
|
|||
package http2 |
|||
|
|||
import ( |
|||
"crypto/tls" |
|||
"net" |
|||
) |
|||
|
|||
func tlsUnderlyingConn(tc *tls.Conn) net.Conn { |
|||
return nil |
|||
} |
@ -0,0 +1,119 @@ |
|||
// Copyright 2023 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package http2 |
|||
|
|||
import ( |
|||
"fmt" |
|||
"math" |
|||
) |
|||
|
|||
type roundRobinWriteScheduler struct { |
|||
// control contains control frames (SETTINGS, PING, etc.).
|
|||
control writeQueue |
|||
|
|||
// streams maps stream ID to a queue.
|
|||
streams map[uint32]*writeQueue |
|||
|
|||
// stream queues are stored in a circular linked list.
|
|||
// head is the next stream to write, or nil if there are no streams open.
|
|||
head *writeQueue |
|||
|
|||
// pool of empty queues for reuse.
|
|||
queuePool writeQueuePool |
|||
} |
|||
|
|||
// newRoundRobinWriteScheduler constructs a new write scheduler.
|
|||
// The round robin scheduler priorizes control frames
|
|||
// like SETTINGS and PING over DATA frames.
|
|||
// When there are no control frames to send, it performs a round-robin
|
|||
// selection from the ready streams.
|
|||
func newRoundRobinWriteScheduler() WriteScheduler { |
|||
ws := &roundRobinWriteScheduler{ |
|||
streams: make(map[uint32]*writeQueue), |
|||
} |
|||
return ws |
|||
} |
|||
|
|||
func (ws *roundRobinWriteScheduler) OpenStream(streamID uint32, options OpenStreamOptions) { |
|||
if ws.streams[streamID] != nil { |
|||
panic(fmt.Errorf("stream %d already opened", streamID)) |
|||
} |
|||
q := ws.queuePool.get() |
|||
ws.streams[streamID] = q |
|||
if ws.head == nil { |
|||
ws.head = q |
|||
q.next = q |
|||
q.prev = q |
|||
} else { |
|||
// Queues are stored in a ring.
|
|||
// Insert the new stream before ws.head, putting it at the end of the list.
|
|||
q.prev = ws.head.prev |
|||
q.next = ws.head |
|||
q.prev.next = q |
|||
q.next.prev = q |
|||
} |
|||
} |
|||
|
|||
func (ws *roundRobinWriteScheduler) CloseStream(streamID uint32) { |
|||
q := ws.streams[streamID] |
|||
if q == nil { |
|||
return |
|||
} |
|||
if q.next == q { |
|||
// This was the only open stream.
|
|||
ws.head = nil |
|||
} else { |
|||
q.prev.next = q.next |
|||
q.next.prev = q.prev |
|||
if ws.head == q { |
|||
ws.head = q.next |
|||
} |
|||
} |
|||
delete(ws.streams, streamID) |
|||
ws.queuePool.put(q) |
|||
} |
|||
|
|||
func (ws *roundRobinWriteScheduler) AdjustStream(streamID uint32, priority PriorityParam) {} |
|||
|
|||
func (ws *roundRobinWriteScheduler) Push(wr FrameWriteRequest) { |
|||
if wr.isControl() { |
|||
ws.control.push(wr) |
|||
return |
|||
} |
|||
q := ws.streams[wr.StreamID()] |
|||
if q == nil { |
|||
// This is a closed stream.
|
|||
// wr should not be a HEADERS or DATA frame.
|
|||
// We push the request onto the control queue.
|
|||
if wr.DataSize() > 0 { |
|||
panic("add DATA on non-open stream") |
|||
} |
|||
ws.control.push(wr) |
|||
return |
|||
} |
|||
q.push(wr) |
|||
} |
|||
|
|||
func (ws *roundRobinWriteScheduler) Pop() (FrameWriteRequest, bool) { |
|||
// Control and RST_STREAM frames first.
|
|||
if !ws.control.empty() { |
|||
return ws.control.shift(), true |
|||
} |
|||
if ws.head == nil { |
|||
return FrameWriteRequest{}, false |
|||
} |
|||
q := ws.head |
|||
for { |
|||
if wr, ok := q.consume(math.MaxInt32); ok { |
|||
ws.head = q.next |
|||
return wr, true |
|||
} |
|||
q = q.next |
|||
if q == ws.head { |
|||
break |
|||
} |
|||
} |
|||
return FrameWriteRequest{}, false |
|||
} |
File diff suppressed because it is too large
File diff suppressed because it is too large
@ -0,0 +1,30 @@ |
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
|||
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !go1.16
|
|||
|
|||
package idna |
|||
|
|||
// appendMapping appends the mapping for the respective rune. isMapped must be
|
|||
// true. A mapping is a categorization of a rune as defined in UTS #46.
|
|||
func (c info) appendMapping(b []byte, s string) []byte { |
|||
index := int(c >> indexShift) |
|||
if c&xorBit == 0 { |
|||
s := mappings[index:] |
|||
return append(b, s[1:s[0]+1]...) |
|||
} |
|||
b = append(b, s...) |
|||
if c&inlineXOR == inlineXOR { |
|||
// TODO: support and handle two-byte inline masks
|
|||
b[len(b)-1] ^= byte(index) |
|||
} else { |
|||
for p := len(b) - int(xorData[index]); p < len(b); p++ { |
|||
index++ |
|||
b[p] ^= xorData[index] |
|||
} |
|||
} |
|||
return b |
|||
} |
@ -0,0 +1,30 @@ |
|||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
|||
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build go1.16
|
|||
|
|||
package idna |
|||
|
|||
// appendMapping appends the mapping for the respective rune. isMapped must be
|
|||
// true. A mapping is a categorization of a rune as defined in UTS #46.
|
|||
func (c info) appendMapping(b []byte, s string) []byte { |
|||
index := int(c >> indexShift) |
|||
if c&xorBit == 0 { |
|||
p := index |
|||
return append(b, mappings[mappingIndex[p]:mappingIndex[p+1]]...) |
|||
} |
|||
b = append(b, s...) |
|||
if c&inlineXOR == inlineXOR { |
|||
// TODO: support and handle two-byte inline masks
|
|||
b[len(b)-1] ^= byte(index) |
|||
} else { |
|||
for p := len(b) - int(xorData[index]); p < len(b); p++ { |
|||
index++ |
|||
b[p] ^= xorData[index] |
|||
} |
|||
} |
|||
return b |
|||
} |
@ -0,0 +1,17 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build gc |
|||
|
|||
#include "textflag.h" |
|||
|
|||
// |
|||
// System calls for ppc64, AIX are implemented in runtime/syscall_aix.go |
|||
// |
|||
|
|||
TEXT ·syscall6(SB),NOSPLIT,$0-88 |
|||
JMP syscall·syscall6(SB) |
|||
|
|||
TEXT ·rawSyscall6(SB),NOSPLIT,$0-88 |
|||
JMP syscall·rawSyscall6(SB) |
@ -0,0 +1,66 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
import ( |
|||
"runtime" |
|||
) |
|||
|
|||
// byteOrder is a subset of encoding/binary.ByteOrder.
|
|||
type byteOrder interface { |
|||
Uint32([]byte) uint32 |
|||
Uint64([]byte) uint64 |
|||
} |
|||
|
|||
type littleEndian struct{} |
|||
type bigEndian struct{} |
|||
|
|||
func (littleEndian) Uint32(b []byte) uint32 { |
|||
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
|
|||
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 |
|||
} |
|||
|
|||
func (littleEndian) Uint64(b []byte) uint64 { |
|||
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
|
|||
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | |
|||
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 |
|||
} |
|||
|
|||
func (bigEndian) Uint32(b []byte) uint32 { |
|||
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
|
|||
return uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24 |
|||
} |
|||
|
|||
func (bigEndian) Uint64(b []byte) uint64 { |
|||
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
|
|||
return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | |
|||
uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56 |
|||
} |
|||
|
|||
// hostByteOrder returns littleEndian on little-endian machines and
|
|||
// bigEndian on big-endian machines.
|
|||
func hostByteOrder() byteOrder { |
|||
switch runtime.GOARCH { |
|||
case "386", "amd64", "amd64p32", |
|||
"alpha", |
|||
"arm", "arm64", |
|||
"loong64", |
|||
"mipsle", "mips64le", "mips64p32le", |
|||
"nios2", |
|||
"ppc64le", |
|||
"riscv", "riscv64", |
|||
"sh": |
|||
return littleEndian{} |
|||
case "armbe", "arm64be", |
|||
"m68k", |
|||
"mips", "mips64", "mips64p32", |
|||
"ppc", "ppc64", |
|||
"s390", "s390x", |
|||
"shbe", |
|||
"sparc", "sparc64": |
|||
return bigEndian{} |
|||
} |
|||
panic("unknown architecture") |
|||
} |
@ -0,0 +1,290 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
// Package cpu implements processor feature detection for
|
|||
// various CPU architectures.
|
|||
package cpu |
|||
|
|||
import ( |
|||
"os" |
|||
"strings" |
|||
) |
|||
|
|||
// Initialized reports whether the CPU features were initialized.
|
|||
//
|
|||
// For some GOOS/GOARCH combinations initialization of the CPU features depends
|
|||
// on reading an operating specific file, e.g. /proc/self/auxv on linux/arm
|
|||
// Initialized will report false if reading the file fails.
|
|||
var Initialized bool |
|||
|
|||
// CacheLinePad is used to pad structs to avoid false sharing.
|
|||
type CacheLinePad struct{ _ [cacheLineSize]byte } |
|||
|
|||
// X86 contains the supported CPU features of the
|
|||
// current X86/AMD64 platform. If the current platform
|
|||
// is not X86/AMD64 then all feature flags are false.
|
|||
//
|
|||
// X86 is padded to avoid false sharing. Further the HasAVX
|
|||
// and HasAVX2 are only set if the OS supports XMM and YMM
|
|||
// registers in addition to the CPUID feature bit being set.
|
|||
var X86 struct { |
|||
_ CacheLinePad |
|||
HasAES bool // AES hardware implementation (AES NI)
|
|||
HasADX bool // Multi-precision add-carry instruction extensions
|
|||
HasAVX bool // Advanced vector extension
|
|||
HasAVX2 bool // Advanced vector extension 2
|
|||
HasAVX512 bool // Advanced vector extension 512
|
|||
HasAVX512F bool // Advanced vector extension 512 Foundation Instructions
|
|||
HasAVX512CD bool // Advanced vector extension 512 Conflict Detection Instructions
|
|||
HasAVX512ER bool // Advanced vector extension 512 Exponential and Reciprocal Instructions
|
|||
HasAVX512PF bool // Advanced vector extension 512 Prefetch Instructions
|
|||
HasAVX512VL bool // Advanced vector extension 512 Vector Length Extensions
|
|||
HasAVX512BW bool // Advanced vector extension 512 Byte and Word Instructions
|
|||
HasAVX512DQ bool // Advanced vector extension 512 Doubleword and Quadword Instructions
|
|||
HasAVX512IFMA bool // Advanced vector extension 512 Integer Fused Multiply Add
|
|||
HasAVX512VBMI bool // Advanced vector extension 512 Vector Byte Manipulation Instructions
|
|||
HasAVX5124VNNIW bool // Advanced vector extension 512 Vector Neural Network Instructions Word variable precision
|
|||
HasAVX5124FMAPS bool // Advanced vector extension 512 Fused Multiply Accumulation Packed Single precision
|
|||
HasAVX512VPOPCNTDQ bool // Advanced vector extension 512 Double and quad word population count instructions
|
|||
HasAVX512VPCLMULQDQ bool // Advanced vector extension 512 Vector carry-less multiply operations
|
|||
HasAVX512VNNI bool // Advanced vector extension 512 Vector Neural Network Instructions
|
|||
HasAVX512GFNI bool // Advanced vector extension 512 Galois field New Instructions
|
|||
HasAVX512VAES bool // Advanced vector extension 512 Vector AES instructions
|
|||
HasAVX512VBMI2 bool // Advanced vector extension 512 Vector Byte Manipulation Instructions 2
|
|||
HasAVX512BITALG bool // Advanced vector extension 512 Bit Algorithms
|
|||
HasAVX512BF16 bool // Advanced vector extension 512 BFloat16 Instructions
|
|||
HasAMXTile bool // Advanced Matrix Extension Tile instructions
|
|||
HasAMXInt8 bool // Advanced Matrix Extension Int8 instructions
|
|||
HasAMXBF16 bool // Advanced Matrix Extension BFloat16 instructions
|
|||
HasBMI1 bool // Bit manipulation instruction set 1
|
|||
HasBMI2 bool // Bit manipulation instruction set 2
|
|||
HasCX16 bool // Compare and exchange 16 Bytes
|
|||
HasERMS bool // Enhanced REP for MOVSB and STOSB
|
|||
HasFMA bool // Fused-multiply-add instructions
|
|||
HasOSXSAVE bool // OS supports XSAVE/XRESTOR for saving/restoring XMM registers.
|
|||
HasPCLMULQDQ bool // PCLMULQDQ instruction - most often used for AES-GCM
|
|||
HasPOPCNT bool // Hamming weight instruction POPCNT.
|
|||
HasRDRAND bool // RDRAND instruction (on-chip random number generator)
|
|||
HasRDSEED bool // RDSEED instruction (on-chip random number generator)
|
|||
HasSSE2 bool // Streaming SIMD extension 2 (always available on amd64)
|
|||
HasSSE3 bool // Streaming SIMD extension 3
|
|||
HasSSSE3 bool // Supplemental streaming SIMD extension 3
|
|||
HasSSE41 bool // Streaming SIMD extension 4 and 4.1
|
|||
HasSSE42 bool // Streaming SIMD extension 4 and 4.2
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
// ARM64 contains the supported CPU features of the
|
|||
// current ARMv8(aarch64) platform. If the current platform
|
|||
// is not arm64 then all feature flags are false.
|
|||
var ARM64 struct { |
|||
_ CacheLinePad |
|||
HasFP bool // Floating-point instruction set (always available)
|
|||
HasASIMD bool // Advanced SIMD (always available)
|
|||
HasEVTSTRM bool // Event stream support
|
|||
HasAES bool // AES hardware implementation
|
|||
HasPMULL bool // Polynomial multiplication instruction set
|
|||
HasSHA1 bool // SHA1 hardware implementation
|
|||
HasSHA2 bool // SHA2 hardware implementation
|
|||
HasCRC32 bool // CRC32 hardware implementation
|
|||
HasATOMICS bool // Atomic memory operation instruction set
|
|||
HasFPHP bool // Half precision floating-point instruction set
|
|||
HasASIMDHP bool // Advanced SIMD half precision instruction set
|
|||
HasCPUID bool // CPUID identification scheme registers
|
|||
HasASIMDRDM bool // Rounding double multiply add/subtract instruction set
|
|||
HasJSCVT bool // Javascript conversion from floating-point to integer
|
|||
HasFCMA bool // Floating-point multiplication and addition of complex numbers
|
|||
HasLRCPC bool // Release Consistent processor consistent support
|
|||
HasDCPOP bool // Persistent memory support
|
|||
HasSHA3 bool // SHA3 hardware implementation
|
|||
HasSM3 bool // SM3 hardware implementation
|
|||
HasSM4 bool // SM4 hardware implementation
|
|||
HasASIMDDP bool // Advanced SIMD double precision instruction set
|
|||
HasSHA512 bool // SHA512 hardware implementation
|
|||
HasSVE bool // Scalable Vector Extensions
|
|||
HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
// ARM contains the supported CPU features of the current ARM (32-bit) platform.
|
|||
// All feature flags are false if:
|
|||
// 1. the current platform is not arm, or
|
|||
// 2. the current operating system is not Linux.
|
|||
var ARM struct { |
|||
_ CacheLinePad |
|||
HasSWP bool // SWP instruction support
|
|||
HasHALF bool // Half-word load and store support
|
|||
HasTHUMB bool // ARM Thumb instruction set
|
|||
Has26BIT bool // Address space limited to 26-bits
|
|||
HasFASTMUL bool // 32-bit operand, 64-bit result multiplication support
|
|||
HasFPA bool // Floating point arithmetic support
|
|||
HasVFP bool // Vector floating point support
|
|||
HasEDSP bool // DSP Extensions support
|
|||
HasJAVA bool // Java instruction set
|
|||
HasIWMMXT bool // Intel Wireless MMX technology support
|
|||
HasCRUNCH bool // MaverickCrunch context switching and handling
|
|||
HasTHUMBEE bool // Thumb EE instruction set
|
|||
HasNEON bool // NEON instruction set
|
|||
HasVFPv3 bool // Vector floating point version 3 support
|
|||
HasVFPv3D16 bool // Vector floating point version 3 D8-D15
|
|||
HasTLS bool // Thread local storage support
|
|||
HasVFPv4 bool // Vector floating point version 4 support
|
|||
HasIDIVA bool // Integer divide instruction support in ARM mode
|
|||
HasIDIVT bool // Integer divide instruction support in Thumb mode
|
|||
HasVFPD32 bool // Vector floating point version 3 D15-D31
|
|||
HasLPAE bool // Large Physical Address Extensions
|
|||
HasEVTSTRM bool // Event stream support
|
|||
HasAES bool // AES hardware implementation
|
|||
HasPMULL bool // Polynomial multiplication instruction set
|
|||
HasSHA1 bool // SHA1 hardware implementation
|
|||
HasSHA2 bool // SHA2 hardware implementation
|
|||
HasCRC32 bool // CRC32 hardware implementation
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
// MIPS64X contains the supported CPU features of the current mips64/mips64le
|
|||
// platforms. If the current platform is not mips64/mips64le or the current
|
|||
// operating system is not Linux then all feature flags are false.
|
|||
var MIPS64X struct { |
|||
_ CacheLinePad |
|||
HasMSA bool // MIPS SIMD architecture
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
// PPC64 contains the supported CPU features of the current ppc64/ppc64le platforms.
|
|||
// If the current platform is not ppc64/ppc64le then all feature flags are false.
|
|||
//
|
|||
// For ppc64/ppc64le, it is safe to check only for ISA level starting on ISA v3.00,
|
|||
// since there are no optional categories. There are some exceptions that also
|
|||
// require kernel support to work (DARN, SCV), so there are feature bits for
|
|||
// those as well. The struct is padded to avoid false sharing.
|
|||
var PPC64 struct { |
|||
_ CacheLinePad |
|||
HasDARN bool // Hardware random number generator (requires kernel enablement)
|
|||
HasSCV bool // Syscall vectored (requires kernel enablement)
|
|||
IsPOWER8 bool // ISA v2.07 (POWER8)
|
|||
IsPOWER9 bool // ISA v3.00 (POWER9), implies IsPOWER8
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
// S390X contains the supported CPU features of the current IBM Z
|
|||
// (s390x) platform. If the current platform is not IBM Z then all
|
|||
// feature flags are false.
|
|||
//
|
|||
// S390X is padded to avoid false sharing. Further HasVX is only set
|
|||
// if the OS supports vector registers in addition to the STFLE
|
|||
// feature bit being set.
|
|||
var S390X struct { |
|||
_ CacheLinePad |
|||
HasZARCH bool // z/Architecture mode is active [mandatory]
|
|||
HasSTFLE bool // store facility list extended
|
|||
HasLDISP bool // long (20-bit) displacements
|
|||
HasEIMM bool // 32-bit immediates
|
|||
HasDFP bool // decimal floating point
|
|||
HasETF3EH bool // ETF-3 enhanced
|
|||
HasMSA bool // message security assist (CPACF)
|
|||
HasAES bool // KM-AES{128,192,256} functions
|
|||
HasAESCBC bool // KMC-AES{128,192,256} functions
|
|||
HasAESCTR bool // KMCTR-AES{128,192,256} functions
|
|||
HasAESGCM bool // KMA-GCM-AES{128,192,256} functions
|
|||
HasGHASH bool // KIMD-GHASH function
|
|||
HasSHA1 bool // K{I,L}MD-SHA-1 functions
|
|||
HasSHA256 bool // K{I,L}MD-SHA-256 functions
|
|||
HasSHA512 bool // K{I,L}MD-SHA-512 functions
|
|||
HasSHA3 bool // K{I,L}MD-SHA3-{224,256,384,512} and K{I,L}MD-SHAKE-{128,256} functions
|
|||
HasVX bool // vector facility
|
|||
HasVXE bool // vector-enhancements facility 1
|
|||
_ CacheLinePad |
|||
} |
|||
|
|||
func init() { |
|||
archInit() |
|||
initOptions() |
|||
processOptions() |
|||
} |
|||
|
|||
// options contains the cpu debug options that can be used in GODEBUG.
|
|||
// Options are arch dependent and are added by the arch specific initOptions functions.
|
|||
// Features that are mandatory for the specific GOARCH should have the Required field set
|
|||
// (e.g. SSE2 on amd64).
|
|||
var options []option |
|||
|
|||
// Option names should be lower case. e.g. avx instead of AVX.
|
|||
type option struct { |
|||
Name string |
|||
Feature *bool |
|||
Specified bool // whether feature value was specified in GODEBUG
|
|||
Enable bool // whether feature should be enabled
|
|||
Required bool // whether feature is mandatory and can not be disabled
|
|||
} |
|||
|
|||
func processOptions() { |
|||
env := os.Getenv("GODEBUG") |
|||
field: |
|||
for env != "" { |
|||
field := "" |
|||
i := strings.IndexByte(env, ',') |
|||
if i < 0 { |
|||
field, env = env, "" |
|||
} else { |
|||
field, env = env[:i], env[i+1:] |
|||
} |
|||
if len(field) < 4 || field[:4] != "cpu." { |
|||
continue |
|||
} |
|||
i = strings.IndexByte(field, '=') |
|||
if i < 0 { |
|||
print("GODEBUG sys/cpu: no value specified for \"", field, "\"\n") |
|||
continue |
|||
} |
|||
key, value := field[4:i], field[i+1:] // e.g. "SSE2", "on"
|
|||
|
|||
var enable bool |
|||
switch value { |
|||
case "on": |
|||
enable = true |
|||
case "off": |
|||
enable = false |
|||
default: |
|||
print("GODEBUG sys/cpu: value \"", value, "\" not supported for cpu option \"", key, "\"\n") |
|||
continue field |
|||
} |
|||
|
|||
if key == "all" { |
|||
for i := range options { |
|||
options[i].Specified = true |
|||
options[i].Enable = enable || options[i].Required |
|||
} |
|||
continue field |
|||
} |
|||
|
|||
for i := range options { |
|||
if options[i].Name == key { |
|||
options[i].Specified = true |
|||
options[i].Enable = enable |
|||
continue field |
|||
} |
|||
} |
|||
|
|||
print("GODEBUG sys/cpu: unknown cpu feature \"", key, "\"\n") |
|||
} |
|||
|
|||
for _, o := range options { |
|||
if !o.Specified { |
|||
continue |
|||
} |
|||
|
|||
if o.Enable && !*o.Feature { |
|||
print("GODEBUG sys/cpu: can not enable \"", o.Name, "\", missing CPU support\n") |
|||
continue |
|||
} |
|||
|
|||
if !o.Enable && o.Required { |
|||
print("GODEBUG sys/cpu: can not disable \"", o.Name, "\", required CPU feature\n") |
|||
continue |
|||
} |
|||
|
|||
*o.Feature = o.Enable |
|||
} |
|||
} |
@ -0,0 +1,33 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build aix
|
|||
|
|||
package cpu |
|||
|
|||
const ( |
|||
// getsystemcfg constants
|
|||
_SC_IMPL = 2 |
|||
_IMPL_POWER8 = 0x10000 |
|||
_IMPL_POWER9 = 0x20000 |
|||
) |
|||
|
|||
func archInit() { |
|||
impl := getsystemcfg(_SC_IMPL) |
|||
if impl&_IMPL_POWER8 != 0 { |
|||
PPC64.IsPOWER8 = true |
|||
} |
|||
if impl&_IMPL_POWER9 != 0 { |
|||
PPC64.IsPOWER8 = true |
|||
PPC64.IsPOWER9 = true |
|||
} |
|||
|
|||
Initialized = true |
|||
} |
|||
|
|||
func getsystemcfg(label int) (n uint64) { |
|||
r0, _ := callgetsystemcfg(label) |
|||
n = uint64(r0) |
|||
return |
|||
} |
@ -0,0 +1,73 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
const cacheLineSize = 32 |
|||
|
|||
// HWCAP/HWCAP2 bits.
|
|||
// These are specific to Linux.
|
|||
const ( |
|||
hwcap_SWP = 1 << 0 |
|||
hwcap_HALF = 1 << 1 |
|||
hwcap_THUMB = 1 << 2 |
|||
hwcap_26BIT = 1 << 3 |
|||
hwcap_FAST_MULT = 1 << 4 |
|||
hwcap_FPA = 1 << 5 |
|||
hwcap_VFP = 1 << 6 |
|||
hwcap_EDSP = 1 << 7 |
|||
hwcap_JAVA = 1 << 8 |
|||
hwcap_IWMMXT = 1 << 9 |
|||
hwcap_CRUNCH = 1 << 10 |
|||
hwcap_THUMBEE = 1 << 11 |
|||
hwcap_NEON = 1 << 12 |
|||
hwcap_VFPv3 = 1 << 13 |
|||
hwcap_VFPv3D16 = 1 << 14 |
|||
hwcap_TLS = 1 << 15 |
|||
hwcap_VFPv4 = 1 << 16 |
|||
hwcap_IDIVA = 1 << 17 |
|||
hwcap_IDIVT = 1 << 18 |
|||
hwcap_VFPD32 = 1 << 19 |
|||
hwcap_LPAE = 1 << 20 |
|||
hwcap_EVTSTRM = 1 << 21 |
|||
|
|||
hwcap2_AES = 1 << 0 |
|||
hwcap2_PMULL = 1 << 1 |
|||
hwcap2_SHA1 = 1 << 2 |
|||
hwcap2_SHA2 = 1 << 3 |
|||
hwcap2_CRC32 = 1 << 4 |
|||
) |
|||
|
|||
func initOptions() { |
|||
options = []option{ |
|||
{Name: "pmull", Feature: &ARM.HasPMULL}, |
|||
{Name: "sha1", Feature: &ARM.HasSHA1}, |
|||
{Name: "sha2", Feature: &ARM.HasSHA2}, |
|||
{Name: "swp", Feature: &ARM.HasSWP}, |
|||
{Name: "thumb", Feature: &ARM.HasTHUMB}, |
|||
{Name: "thumbee", Feature: &ARM.HasTHUMBEE}, |
|||
{Name: "tls", Feature: &ARM.HasTLS}, |
|||
{Name: "vfp", Feature: &ARM.HasVFP}, |
|||
{Name: "vfpd32", Feature: &ARM.HasVFPD32}, |
|||
{Name: "vfpv3", Feature: &ARM.HasVFPv3}, |
|||
{Name: "vfpv3d16", Feature: &ARM.HasVFPv3D16}, |
|||
{Name: "vfpv4", Feature: &ARM.HasVFPv4}, |
|||
{Name: "half", Feature: &ARM.HasHALF}, |
|||
{Name: "26bit", Feature: &ARM.Has26BIT}, |
|||
{Name: "fastmul", Feature: &ARM.HasFASTMUL}, |
|||
{Name: "fpa", Feature: &ARM.HasFPA}, |
|||
{Name: "edsp", Feature: &ARM.HasEDSP}, |
|||
{Name: "java", Feature: &ARM.HasJAVA}, |
|||
{Name: "iwmmxt", Feature: &ARM.HasIWMMXT}, |
|||
{Name: "crunch", Feature: &ARM.HasCRUNCH}, |
|||
{Name: "neon", Feature: &ARM.HasNEON}, |
|||
{Name: "idivt", Feature: &ARM.HasIDIVT}, |
|||
{Name: "idiva", Feature: &ARM.HasIDIVA}, |
|||
{Name: "lpae", Feature: &ARM.HasLPAE}, |
|||
{Name: "evtstrm", Feature: &ARM.HasEVTSTRM}, |
|||
{Name: "aes", Feature: &ARM.HasAES}, |
|||
{Name: "crc32", Feature: &ARM.HasCRC32}, |
|||
} |
|||
|
|||
} |
@ -0,0 +1,172 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
import "runtime" |
|||
|
|||
// cacheLineSize is used to prevent false sharing of cache lines.
|
|||
// We choose 128 because Apple Silicon, a.k.a. M1, has 128-byte cache line size.
|
|||
// It doesn't cost much and is much more future-proof.
|
|||
const cacheLineSize = 128 |
|||
|
|||
func initOptions() { |
|||
options = []option{ |
|||
{Name: "fp", Feature: &ARM64.HasFP}, |
|||
{Name: "asimd", Feature: &ARM64.HasASIMD}, |
|||
{Name: "evstrm", Feature: &ARM64.HasEVTSTRM}, |
|||
{Name: "aes", Feature: &ARM64.HasAES}, |
|||
{Name: "fphp", Feature: &ARM64.HasFPHP}, |
|||
{Name: "jscvt", Feature: &ARM64.HasJSCVT}, |
|||
{Name: "lrcpc", Feature: &ARM64.HasLRCPC}, |
|||
{Name: "pmull", Feature: &ARM64.HasPMULL}, |
|||
{Name: "sha1", Feature: &ARM64.HasSHA1}, |
|||
{Name: "sha2", Feature: &ARM64.HasSHA2}, |
|||
{Name: "sha3", Feature: &ARM64.HasSHA3}, |
|||
{Name: "sha512", Feature: &ARM64.HasSHA512}, |
|||
{Name: "sm3", Feature: &ARM64.HasSM3}, |
|||
{Name: "sm4", Feature: &ARM64.HasSM4}, |
|||
{Name: "sve", Feature: &ARM64.HasSVE}, |
|||
{Name: "crc32", Feature: &ARM64.HasCRC32}, |
|||
{Name: "atomics", Feature: &ARM64.HasATOMICS}, |
|||
{Name: "asimdhp", Feature: &ARM64.HasASIMDHP}, |
|||
{Name: "cpuid", Feature: &ARM64.HasCPUID}, |
|||
{Name: "asimrdm", Feature: &ARM64.HasASIMDRDM}, |
|||
{Name: "fcma", Feature: &ARM64.HasFCMA}, |
|||
{Name: "dcpop", Feature: &ARM64.HasDCPOP}, |
|||
{Name: "asimddp", Feature: &ARM64.HasASIMDDP}, |
|||
{Name: "asimdfhm", Feature: &ARM64.HasASIMDFHM}, |
|||
} |
|||
} |
|||
|
|||
func archInit() { |
|||
switch runtime.GOOS { |
|||
case "freebsd": |
|||
readARM64Registers() |
|||
case "linux", "netbsd", "openbsd": |
|||
doinit() |
|||
default: |
|||
// Many platforms don't seem to allow reading these registers.
|
|||
setMinimalFeatures() |
|||
} |
|||
} |
|||
|
|||
// setMinimalFeatures fakes the minimal ARM64 features expected by
|
|||
// TestARM64minimalFeatures.
|
|||
func setMinimalFeatures() { |
|||
ARM64.HasASIMD = true |
|||
ARM64.HasFP = true |
|||
} |
|||
|
|||
func readARM64Registers() { |
|||
Initialized = true |
|||
|
|||
parseARM64SystemRegisters(getisar0(), getisar1(), getpfr0()) |
|||
} |
|||
|
|||
func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) { |
|||
// ID_AA64ISAR0_EL1
|
|||
switch extractBits(isar0, 4, 7) { |
|||
case 1: |
|||
ARM64.HasAES = true |
|||
case 2: |
|||
ARM64.HasAES = true |
|||
ARM64.HasPMULL = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 8, 11) { |
|||
case 1: |
|||
ARM64.HasSHA1 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 12, 15) { |
|||
case 1: |
|||
ARM64.HasSHA2 = true |
|||
case 2: |
|||
ARM64.HasSHA2 = true |
|||
ARM64.HasSHA512 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 16, 19) { |
|||
case 1: |
|||
ARM64.HasCRC32 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 20, 23) { |
|||
case 2: |
|||
ARM64.HasATOMICS = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 28, 31) { |
|||
case 1: |
|||
ARM64.HasASIMDRDM = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 32, 35) { |
|||
case 1: |
|||
ARM64.HasSHA3 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 36, 39) { |
|||
case 1: |
|||
ARM64.HasSM3 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 40, 43) { |
|||
case 1: |
|||
ARM64.HasSM4 = true |
|||
} |
|||
|
|||
switch extractBits(isar0, 44, 47) { |
|||
case 1: |
|||
ARM64.HasASIMDDP = true |
|||
} |
|||
|
|||
// ID_AA64ISAR1_EL1
|
|||
switch extractBits(isar1, 0, 3) { |
|||
case 1: |
|||
ARM64.HasDCPOP = true |
|||
} |
|||
|
|||
switch extractBits(isar1, 12, 15) { |
|||
case 1: |
|||
ARM64.HasJSCVT = true |
|||
} |
|||
|
|||
switch extractBits(isar1, 16, 19) { |
|||
case 1: |
|||
ARM64.HasFCMA = true |
|||
} |
|||
|
|||
switch extractBits(isar1, 20, 23) { |
|||
case 1: |
|||
ARM64.HasLRCPC = true |
|||
} |
|||
|
|||
// ID_AA64PFR0_EL1
|
|||
switch extractBits(pfr0, 16, 19) { |
|||
case 0: |
|||
ARM64.HasFP = true |
|||
case 1: |
|||
ARM64.HasFP = true |
|||
ARM64.HasFPHP = true |
|||
} |
|||
|
|||
switch extractBits(pfr0, 20, 23) { |
|||
case 0: |
|||
ARM64.HasASIMD = true |
|||
case 1: |
|||
ARM64.HasASIMD = true |
|||
ARM64.HasASIMDHP = true |
|||
} |
|||
|
|||
switch extractBits(pfr0, 32, 35) { |
|||
case 1: |
|||
ARM64.HasSVE = true |
|||
} |
|||
} |
|||
|
|||
func extractBits(data uint64, start, end uint) uint { |
|||
return (uint)(data>>start) & ((1 << (end - start + 1)) - 1) |
|||
} |
@ -0,0 +1,31 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved. |
|||
// Use of this source code is governed by a BSD-style |
|||
// license that can be found in the LICENSE file. |
|||
|
|||
//go:build gc |
|||
|
|||
#include "textflag.h" |
|||
|
|||
// func getisar0() uint64 |
|||
TEXT ·getisar0(SB),NOSPLIT,$0-8 |
|||
// get Instruction Set Attributes 0 into x0 |
|||
// mrs x0, ID_AA64ISAR0_EL1 = d5380600 |
|||
WORD $0xd5380600 |
|||
MOVD R0, ret+0(FP) |
|||
RET |
|||
|
|||
// func getisar1() uint64 |
|||
TEXT ·getisar1(SB),NOSPLIT,$0-8 |
|||
// get Instruction Set Attributes 1 into x0 |
|||
// mrs x0, ID_AA64ISAR1_EL1 = d5380620 |
|||
WORD $0xd5380620 |
|||
MOVD R0, ret+0(FP) |
|||
RET |
|||
|
|||
// func getpfr0() uint64 |
|||
TEXT ·getpfr0(SB),NOSPLIT,$0-8 |
|||
// get Processor Feature Register 0 into x0 |
|||
// mrs x0, ID_AA64PFR0_EL1 = d5380400 |
|||
WORD $0xd5380400 |
|||
MOVD R0, ret+0(FP) |
|||
RET |
@ -0,0 +1,11 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build gc
|
|||
|
|||
package cpu |
|||
|
|||
func getisar0() uint64 |
|||
func getisar1() uint64 |
|||
func getpfr0() uint64 |
@ -0,0 +1,21 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build gc
|
|||
|
|||
package cpu |
|||
|
|||
// haveAsmFunctions reports whether the other functions in this file can
|
|||
// be safely called.
|
|||
func haveAsmFunctions() bool { return true } |
|||
|
|||
// The following feature detection functions are defined in cpu_s390x.s.
|
|||
// They are likely to be expensive to call so the results should be cached.
|
|||
func stfle() facilityList |
|||
func kmQuery() queryResult |
|||
func kmcQuery() queryResult |
|||
func kmctrQuery() queryResult |
|||
func kmaQuery() queryResult |
|||
func kimdQuery() queryResult |
|||
func klmdQuery() queryResult |
@ -0,0 +1,15 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build (386 || amd64 || amd64p32) && gc
|
|||
|
|||
package cpu |
|||
|
|||
// cpuid is implemented in cpu_x86.s for gc compiler
|
|||
// and in cpu_gccgo.c for gccgo.
|
|||
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) |
|||
|
|||
// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler
|
|||
// and in cpu_gccgo.c for gccgo.
|
|||
func xgetbv() (eax, edx uint32) |
@ -0,0 +1,11 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build gccgo
|
|||
|
|||
package cpu |
|||
|
|||
func getisar0() uint64 { return 0 } |
|||
func getisar1() uint64 { return 0 } |
|||
func getpfr0() uint64 { return 0 } |
@ -0,0 +1,22 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build gccgo
|
|||
|
|||
package cpu |
|||
|
|||
// haveAsmFunctions reports whether the other functions in this file can
|
|||
// be safely called.
|
|||
func haveAsmFunctions() bool { return false } |
|||
|
|||
// TODO(mundaym): the following feature detection functions are currently
|
|||
// stubs. See https://golang.org/cl/162887 for how to fix this.
|
|||
// They are likely to be expensive to call so the results should be cached.
|
|||
func stfle() facilityList { panic("not implemented for gccgo") } |
|||
func kmQuery() queryResult { panic("not implemented for gccgo") } |
|||
func kmcQuery() queryResult { panic("not implemented for gccgo") } |
|||
func kmctrQuery() queryResult { panic("not implemented for gccgo") } |
|||
func kmaQuery() queryResult { panic("not implemented for gccgo") } |
|||
func kimdQuery() queryResult { panic("not implemented for gccgo") } |
|||
func klmdQuery() queryResult { panic("not implemented for gccgo") } |
@ -0,0 +1,37 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build (386 || amd64 || amd64p32) && gccgo
|
|||
|
|||
#include <cpuid.h> |
|||
#include <stdint.h> |
|||
#include <x86intrin.h> |
|||
|
|||
// Need to wrap __get_cpuid_count because it's declared as static.
|
|||
int |
|||
gccgoGetCpuidCount(uint32_t leaf, uint32_t subleaf, |
|||
uint32_t *eax, uint32_t *ebx, |
|||
uint32_t *ecx, uint32_t *edx) |
|||
{ |
|||
return __get_cpuid_count(leaf, subleaf, eax, ebx, ecx, edx); |
|||
} |
|||
|
|||
#pragma GCC diagnostic ignored "-Wunknown-pragmas" |
|||
#pragma GCC push_options |
|||
#pragma GCC target("xsave") |
|||
#pragma clang attribute push (__attribute__((target("xsave"))), apply_to=function) |
|||
|
|||
// xgetbv reads the contents of an XCR (Extended Control Register)
|
|||
// specified in the ECX register into registers EDX:EAX.
|
|||
// Currently, the only supported value for XCR is 0.
|
|||
void |
|||
gccgoXgetbv(uint32_t *eax, uint32_t *edx) |
|||
{ |
|||
uint64_t v = _xgetbv(0); |
|||
*eax = v & 0xffffffff; |
|||
*edx = v >> 32; |
|||
} |
|||
|
|||
#pragma clang attribute pop |
|||
#pragma GCC pop_options |
@ -0,0 +1,31 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build (386 || amd64 || amd64p32) && gccgo
|
|||
|
|||
package cpu |
|||
|
|||
//extern gccgoGetCpuidCount
|
|||
func gccgoGetCpuidCount(eaxArg, ecxArg uint32, eax, ebx, ecx, edx *uint32) |
|||
|
|||
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) { |
|||
var a, b, c, d uint32 |
|||
gccgoGetCpuidCount(eaxArg, ecxArg, &a, &b, &c, &d) |
|||
return a, b, c, d |
|||
} |
|||
|
|||
//extern gccgoXgetbv
|
|||
func gccgoXgetbv(eax, edx *uint32) |
|||
|
|||
func xgetbv() (eax, edx uint32) { |
|||
var a, d uint32 |
|||
gccgoXgetbv(&a, &d) |
|||
return a, d |
|||
} |
|||
|
|||
// gccgo doesn't build on Darwin, per:
|
|||
// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76
|
|||
func darwinSupportsAVX512() bool { |
|||
return false |
|||
} |
@ -0,0 +1,15 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build !386 && !amd64 && !amd64p32 && !arm64
|
|||
|
|||
package cpu |
|||
|
|||
func archInit() { |
|||
if err := readHWCAP(); err != nil { |
|||
return |
|||
} |
|||
doinit() |
|||
Initialized = true |
|||
} |
@ -0,0 +1,39 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
func doinit() { |
|||
ARM.HasSWP = isSet(hwCap, hwcap_SWP) |
|||
ARM.HasHALF = isSet(hwCap, hwcap_HALF) |
|||
ARM.HasTHUMB = isSet(hwCap, hwcap_THUMB) |
|||
ARM.Has26BIT = isSet(hwCap, hwcap_26BIT) |
|||
ARM.HasFASTMUL = isSet(hwCap, hwcap_FAST_MULT) |
|||
ARM.HasFPA = isSet(hwCap, hwcap_FPA) |
|||
ARM.HasVFP = isSet(hwCap, hwcap_VFP) |
|||
ARM.HasEDSP = isSet(hwCap, hwcap_EDSP) |
|||
ARM.HasJAVA = isSet(hwCap, hwcap_JAVA) |
|||
ARM.HasIWMMXT = isSet(hwCap, hwcap_IWMMXT) |
|||
ARM.HasCRUNCH = isSet(hwCap, hwcap_CRUNCH) |
|||
ARM.HasTHUMBEE = isSet(hwCap, hwcap_THUMBEE) |
|||
ARM.HasNEON = isSet(hwCap, hwcap_NEON) |
|||
ARM.HasVFPv3 = isSet(hwCap, hwcap_VFPv3) |
|||
ARM.HasVFPv3D16 = isSet(hwCap, hwcap_VFPv3D16) |
|||
ARM.HasTLS = isSet(hwCap, hwcap_TLS) |
|||
ARM.HasVFPv4 = isSet(hwCap, hwcap_VFPv4) |
|||
ARM.HasIDIVA = isSet(hwCap, hwcap_IDIVA) |
|||
ARM.HasIDIVT = isSet(hwCap, hwcap_IDIVT) |
|||
ARM.HasVFPD32 = isSet(hwCap, hwcap_VFPD32) |
|||
ARM.HasLPAE = isSet(hwCap, hwcap_LPAE) |
|||
ARM.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM) |
|||
ARM.HasAES = isSet(hwCap2, hwcap2_AES) |
|||
ARM.HasPMULL = isSet(hwCap2, hwcap2_PMULL) |
|||
ARM.HasSHA1 = isSet(hwCap2, hwcap2_SHA1) |
|||
ARM.HasSHA2 = isSet(hwCap2, hwcap2_SHA2) |
|||
ARM.HasCRC32 = isSet(hwCap2, hwcap2_CRC32) |
|||
} |
|||
|
|||
func isSet(hwc uint, value uint) bool { |
|||
return hwc&value != 0 |
|||
} |
@ -0,0 +1,111 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
import ( |
|||
"strings" |
|||
"syscall" |
|||
) |
|||
|
|||
// HWCAP/HWCAP2 bits. These are exposed by Linux.
|
|||
const ( |
|||
hwcap_FP = 1 << 0 |
|||
hwcap_ASIMD = 1 << 1 |
|||
hwcap_EVTSTRM = 1 << 2 |
|||
hwcap_AES = 1 << 3 |
|||
hwcap_PMULL = 1 << 4 |
|||
hwcap_SHA1 = 1 << 5 |
|||
hwcap_SHA2 = 1 << 6 |
|||
hwcap_CRC32 = 1 << 7 |
|||
hwcap_ATOMICS = 1 << 8 |
|||
hwcap_FPHP = 1 << 9 |
|||
hwcap_ASIMDHP = 1 << 10 |
|||
hwcap_CPUID = 1 << 11 |
|||
hwcap_ASIMDRDM = 1 << 12 |
|||
hwcap_JSCVT = 1 << 13 |
|||
hwcap_FCMA = 1 << 14 |
|||
hwcap_LRCPC = 1 << 15 |
|||
hwcap_DCPOP = 1 << 16 |
|||
hwcap_SHA3 = 1 << 17 |
|||
hwcap_SM3 = 1 << 18 |
|||
hwcap_SM4 = 1 << 19 |
|||
hwcap_ASIMDDP = 1 << 20 |
|||
hwcap_SHA512 = 1 << 21 |
|||
hwcap_SVE = 1 << 22 |
|||
hwcap_ASIMDFHM = 1 << 23 |
|||
) |
|||
|
|||
// linuxKernelCanEmulateCPUID reports whether we're running
|
|||
// on Linux 4.11+. Ideally we'd like to ask the question about
|
|||
// whether the current kernel contains
|
|||
// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=77c97b4ee21290f5f083173d957843b615abbff2
|
|||
// but the version number will have to do.
|
|||
func linuxKernelCanEmulateCPUID() bool { |
|||
var un syscall.Utsname |
|||
syscall.Uname(&un) |
|||
var sb strings.Builder |
|||
for _, b := range un.Release[:] { |
|||
if b == 0 { |
|||
break |
|||
} |
|||
sb.WriteByte(byte(b)) |
|||
} |
|||
major, minor, _, ok := parseRelease(sb.String()) |
|||
return ok && (major > 4 || major == 4 && minor >= 11) |
|||
} |
|||
|
|||
func doinit() { |
|||
if err := readHWCAP(); err != nil { |
|||
// We failed to read /proc/self/auxv. This can happen if the binary has
|
|||
// been given extra capabilities(7) with /bin/setcap.
|
|||
//
|
|||
// When this happens, we have two options. If the Linux kernel is new
|
|||
// enough (4.11+), we can read the arm64 registers directly which'll
|
|||
// trap into the kernel and then return back to userspace.
|
|||
//
|
|||
// But on older kernels, such as Linux 4.4.180 as used on many Synology
|
|||
// devices, calling readARM64Registers (specifically getisar0) will
|
|||
// cause a SIGILL and we'll die. So for older kernels, parse /proc/cpuinfo
|
|||
// instead.
|
|||
//
|
|||
// See golang/go#57336.
|
|||
if linuxKernelCanEmulateCPUID() { |
|||
readARM64Registers() |
|||
} else { |
|||
readLinuxProcCPUInfo() |
|||
} |
|||
return |
|||
} |
|||
|
|||
// HWCAP feature bits
|
|||
ARM64.HasFP = isSet(hwCap, hwcap_FP) |
|||
ARM64.HasASIMD = isSet(hwCap, hwcap_ASIMD) |
|||
ARM64.HasEVTSTRM = isSet(hwCap, hwcap_EVTSTRM) |
|||
ARM64.HasAES = isSet(hwCap, hwcap_AES) |
|||
ARM64.HasPMULL = isSet(hwCap, hwcap_PMULL) |
|||
ARM64.HasSHA1 = isSet(hwCap, hwcap_SHA1) |
|||
ARM64.HasSHA2 = isSet(hwCap, hwcap_SHA2) |
|||
ARM64.HasCRC32 = isSet(hwCap, hwcap_CRC32) |
|||
ARM64.HasATOMICS = isSet(hwCap, hwcap_ATOMICS) |
|||
ARM64.HasFPHP = isSet(hwCap, hwcap_FPHP) |
|||
ARM64.HasASIMDHP = isSet(hwCap, hwcap_ASIMDHP) |
|||
ARM64.HasCPUID = isSet(hwCap, hwcap_CPUID) |
|||
ARM64.HasASIMDRDM = isSet(hwCap, hwcap_ASIMDRDM) |
|||
ARM64.HasJSCVT = isSet(hwCap, hwcap_JSCVT) |
|||
ARM64.HasFCMA = isSet(hwCap, hwcap_FCMA) |
|||
ARM64.HasLRCPC = isSet(hwCap, hwcap_LRCPC) |
|||
ARM64.HasDCPOP = isSet(hwCap, hwcap_DCPOP) |
|||
ARM64.HasSHA3 = isSet(hwCap, hwcap_SHA3) |
|||
ARM64.HasSM3 = isSet(hwCap, hwcap_SM3) |
|||
ARM64.HasSM4 = isSet(hwCap, hwcap_SM4) |
|||
ARM64.HasASIMDDP = isSet(hwCap, hwcap_ASIMDDP) |
|||
ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512) |
|||
ARM64.HasSVE = isSet(hwCap, hwcap_SVE) |
|||
ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM) |
|||
} |
|||
|
|||
func isSet(hwc uint, value uint) bool { |
|||
return hwc&value != 0 |
|||
} |
@ -0,0 +1,22 @@ |
|||
// Copyright 2020 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build linux && (mips64 || mips64le)
|
|||
|
|||
package cpu |
|||
|
|||
// HWCAP bits. These are exposed by the Linux kernel 5.4.
|
|||
const ( |
|||
// CPU features
|
|||
hwcap_MIPS_MSA = 1 << 1 |
|||
) |
|||
|
|||
func doinit() { |
|||
// HWCAP feature bits
|
|||
MIPS64X.HasMSA = isSet(hwCap, hwcap_MIPS_MSA) |
|||
} |
|||
|
|||
func isSet(hwc uint, value uint) bool { |
|||
return hwc&value != 0 |
|||
} |
@ -0,0 +1,9 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
|
|||
|
|||
package cpu |
|||
|
|||
func doinit() {} |
@ -0,0 +1,30 @@ |
|||
// Copyright 2018 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build linux && (ppc64 || ppc64le)
|
|||
|
|||
package cpu |
|||
|
|||
// HWCAP/HWCAP2 bits. These are exposed by the kernel.
|
|||
const ( |
|||
// ISA Level
|
|||
_PPC_FEATURE2_ARCH_2_07 = 0x80000000 |
|||
_PPC_FEATURE2_ARCH_3_00 = 0x00800000 |
|||
|
|||
// CPU features
|
|||
_PPC_FEATURE2_DARN = 0x00200000 |
|||
_PPC_FEATURE2_SCV = 0x00100000 |
|||
) |
|||
|
|||
func doinit() { |
|||
// HWCAP2 feature bits
|
|||
PPC64.IsPOWER8 = isSet(hwCap2, _PPC_FEATURE2_ARCH_2_07) |
|||
PPC64.IsPOWER9 = isSet(hwCap2, _PPC_FEATURE2_ARCH_3_00) |
|||
PPC64.HasDARN = isSet(hwCap2, _PPC_FEATURE2_DARN) |
|||
PPC64.HasSCV = isSet(hwCap2, _PPC_FEATURE2_SCV) |
|||
} |
|||
|
|||
func isSet(hwc uint, value uint) bool { |
|||
return hwc&value != 0 |
|||
} |
@ -0,0 +1,40 @@ |
|||
// Copyright 2019 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
package cpu |
|||
|
|||
const ( |
|||
// bit mask values from /usr/include/bits/hwcap.h
|
|||
hwcap_ZARCH = 2 |
|||
hwcap_STFLE = 4 |
|||
hwcap_MSA = 8 |
|||
hwcap_LDISP = 16 |
|||
hwcap_EIMM = 32 |
|||
hwcap_DFP = 64 |
|||
hwcap_ETF3EH = 256 |
|||
hwcap_VX = 2048 |
|||
hwcap_VXE = 8192 |
|||
) |
|||
|
|||
func initS390Xbase() { |
|||
// test HWCAP bit vector
|
|||
has := func(featureMask uint) bool { |
|||
return hwCap&featureMask == featureMask |
|||
} |
|||
|
|||
// mandatory
|
|||
S390X.HasZARCH = has(hwcap_ZARCH) |
|||
|
|||
// optional
|
|||
S390X.HasSTFLE = has(hwcap_STFLE) |
|||
S390X.HasLDISP = has(hwcap_LDISP) |
|||
S390X.HasEIMM = has(hwcap_EIMM) |
|||
S390X.HasETF3EH = has(hwcap_ETF3EH) |
|||
S390X.HasDFP = has(hwcap_DFP) |
|||
S390X.HasMSA = has(hwcap_MSA) |
|||
S390X.HasVX = has(hwcap_VX) |
|||
if S390X.HasVX { |
|||
S390X.HasVXE = has(hwcap_VXE) |
|||
} |
|||
} |
@ -0,0 +1,12 @@ |
|||
// Copyright 2022 The Go Authors. All rights reserved.
|
|||
// Use of this source code is governed by a BSD-style
|
|||
// license that can be found in the LICENSE file.
|
|||
|
|||
//go:build loong64
|
|||
|
|||
package cpu |
|||
|
|||
const cacheLineSize = 64 |
|||
|
|||
func initOptions() { |
|||
} |
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue