123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169 |
- // Copyright 2012 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // This code was translated into a form compatible with 6a from the public
- // domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
- // +build amd64,!gccgo,!appengine
- #include "const_amd64.h"
- // func mul(dest, a, b *[5]uint64)
- TEXT ·mul(SB),0,$16-24
- MOVQ dest+0(FP), DI
- MOVQ a+8(FP), SI
- MOVQ b+16(FP), DX
- MOVQ DX,CX
- MOVQ 24(SI),DX
- IMUL3Q $19,DX,AX
- MOVQ AX,0(SP)
- MULQ 16(CX)
- MOVQ AX,R8
- MOVQ DX,R9
- MOVQ 32(SI),DX
- IMUL3Q $19,DX,AX
- MOVQ AX,8(SP)
- MULQ 8(CX)
- ADDQ AX,R8
- ADCQ DX,R9
- MOVQ 0(SI),AX
- MULQ 0(CX)
- ADDQ AX,R8
- ADCQ DX,R9
- MOVQ 0(SI),AX
- MULQ 8(CX)
- MOVQ AX,R10
- MOVQ DX,R11
- MOVQ 0(SI),AX
- MULQ 16(CX)
- MOVQ AX,R12
- MOVQ DX,R13
- MOVQ 0(SI),AX
- MULQ 24(CX)
- MOVQ AX,R14
- MOVQ DX,R15
- MOVQ 0(SI),AX
- MULQ 32(CX)
- MOVQ AX,BX
- MOVQ DX,BP
- MOVQ 8(SI),AX
- MULQ 0(CX)
- ADDQ AX,R10
- ADCQ DX,R11
- MOVQ 8(SI),AX
- MULQ 8(CX)
- ADDQ AX,R12
- ADCQ DX,R13
- MOVQ 8(SI),AX
- MULQ 16(CX)
- ADDQ AX,R14
- ADCQ DX,R15
- MOVQ 8(SI),AX
- MULQ 24(CX)
- ADDQ AX,BX
- ADCQ DX,BP
- MOVQ 8(SI),DX
- IMUL3Q $19,DX,AX
- MULQ 32(CX)
- ADDQ AX,R8
- ADCQ DX,R9
- MOVQ 16(SI),AX
- MULQ 0(CX)
- ADDQ AX,R12
- ADCQ DX,R13
- MOVQ 16(SI),AX
- MULQ 8(CX)
- ADDQ AX,R14
- ADCQ DX,R15
- MOVQ 16(SI),AX
- MULQ 16(CX)
- ADDQ AX,BX
- ADCQ DX,BP
- MOVQ 16(SI),DX
- IMUL3Q $19,DX,AX
- MULQ 24(CX)
- ADDQ AX,R8
- ADCQ DX,R9
- MOVQ 16(SI),DX
- IMUL3Q $19,DX,AX
- MULQ 32(CX)
- ADDQ AX,R10
- ADCQ DX,R11
- MOVQ 24(SI),AX
- MULQ 0(CX)
- ADDQ AX,R14
- ADCQ DX,R15
- MOVQ 24(SI),AX
- MULQ 8(CX)
- ADDQ AX,BX
- ADCQ DX,BP
- MOVQ 0(SP),AX
- MULQ 24(CX)
- ADDQ AX,R10
- ADCQ DX,R11
- MOVQ 0(SP),AX
- MULQ 32(CX)
- ADDQ AX,R12
- ADCQ DX,R13
- MOVQ 32(SI),AX
- MULQ 0(CX)
- ADDQ AX,BX
- ADCQ DX,BP
- MOVQ 8(SP),AX
- MULQ 16(CX)
- ADDQ AX,R10
- ADCQ DX,R11
- MOVQ 8(SP),AX
- MULQ 24(CX)
- ADDQ AX,R12
- ADCQ DX,R13
- MOVQ 8(SP),AX
- MULQ 32(CX)
- ADDQ AX,R14
- ADCQ DX,R15
- MOVQ $REDMASK51,SI
- SHLQ $13,R9:R8
- ANDQ SI,R8
- SHLQ $13,R11:R10
- ANDQ SI,R10
- ADDQ R9,R10
- SHLQ $13,R13:R12
- ANDQ SI,R12
- ADDQ R11,R12
- SHLQ $13,R15:R14
- ANDQ SI,R14
- ADDQ R13,R14
- SHLQ $13,BP:BX
- ANDQ SI,BX
- ADDQ R15,BX
- IMUL3Q $19,BP,DX
- ADDQ DX,R8
- MOVQ R8,DX
- SHRQ $51,DX
- ADDQ R10,DX
- MOVQ DX,CX
- SHRQ $51,DX
- ANDQ SI,R8
- ADDQ R12,DX
- MOVQ DX,R9
- SHRQ $51,DX
- ANDQ SI,CX
- ADDQ R14,DX
- MOVQ DX,AX
- SHRQ $51,DX
- ANDQ SI,R9
- ADDQ BX,DX
- MOVQ DX,R10
- SHRQ $51,DX
- ANDQ SI,AX
- IMUL3Q $19,DX,DX
- ADDQ DX,R8
- ANDQ SI,R10
- MOVQ R8,0(DI)
- MOVQ CX,8(DI)
- MOVQ R9,16(DI)
- MOVQ AX,24(DI)
- MOVQ R10,32(DI)
- RET
|