Your IP : 172.28.240.42


Current Path : /usr/local/go/src/crypto/internal/bigmod/_asm/
Upload File :
Current File : //usr/local/go/src/crypto/internal/bigmod/_asm/nat_amd64_asm.go

// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package main

import (
	"strconv"

	. "github.com/mmcloughlin/avo/build"
	. "github.com/mmcloughlin/avo/operand"
	. "github.com/mmcloughlin/avo/reg"
)

//go:generate go run . -out ../nat_amd64.s -pkg bigmod

func main() {
	Package("crypto/internal/bigmod")
	ConstraintExpr("!purego")

	addMulVVW(1024)
	addMulVVW(1536)
	addMulVVW(2048)

	Generate()
}

func addMulVVW(bits int) {
	if bits%64 != 0 {
		panic("bit size unsupported")
	}

	Implement("addMulVVW" + strconv.Itoa(bits))

	CMPB(Mem{Symbol: Symbol{Name: "·supportADX"}, Base: StaticBase}, Imm(1))
	JEQ(LabelRef("adx"))

	z := Mem{Base: Load(Param("z"), GP64())}
	x := Mem{Base: Load(Param("x"), GP64())}
	y := Load(Param("y"), GP64())

	carry := GP64()
	XORQ(carry, carry) // zero out carry

	for i := 0; i < bits/64; i++ {
		Comment("Iteration " + strconv.Itoa(i))
		hi, lo := RDX, RAX // implicit MULQ inputs and outputs
		MOVQ(x.Offset(i*8), lo)
		MULQ(y)
		ADDQ(z.Offset(i*8), lo)
		ADCQ(Imm(0), hi)
		ADDQ(carry, lo)
		ADCQ(Imm(0), hi)
		MOVQ(hi, carry)
		MOVQ(lo, z.Offset(i*8))
	}

	Store(carry, ReturnIndex(0))
	RET()

	Label("adx")

	// The ADX strategy implements the following function, where c1 and c2 are
	// the overflow and the carry flag respectively.
	//
	//    func addMulVVW(z, x []uint, y uint) (carry uint) {
	//        var c1, c2 uint
	//        for i := range z {
	//            hi, lo := bits.Mul(x[i], y)
	//            lo, c1 = bits.Add(lo, z[i], c1)
	//            z[i], c2 = bits.Add(lo, carry, c2)
	//            carry = hi
	//        }
	//        return carry + c1 + c2
	//    }
	//
	// The loop is fully unrolled and the hi / carry registers are alternated
	// instead of introducing a MOV.

	z = Mem{Base: Load(Param("z"), GP64())}
	x = Mem{Base: Load(Param("x"), GP64())}
	Load(Param("y"), RDX) // implicit source of MULXQ

	carry = GP64()
	XORQ(carry, carry) // zero out carry
	z0 := GP64()
	XORQ(z0, z0) // unset flags and zero out z0

	for i := 0; i < bits/64; i++ {
		hi, lo := GP64(), GP64()

		Comment("Iteration " + strconv.Itoa(i))
		MULXQ(x.Offset(i*8), lo, hi)
		ADCXQ(carry, lo)
		ADOXQ(z.Offset(i*8), lo)
		MOVQ(lo, z.Offset(i*8))

		i++

		Comment("Iteration " + strconv.Itoa(i))
		MULXQ(x.Offset(i*8), lo, carry)
		ADCXQ(hi, lo)
		ADOXQ(z.Offset(i*8), lo)
		MOVQ(lo, z.Offset(i*8))
	}

	Comment("Add back carry flags and return")
	ADCXQ(z0, carry)
	ADOXQ(z0, carry)

	Store(carry, ReturnIndex(0))
	RET()
}