コグノスケ

未来から過去へ表示(*) link

過去から未来へ表示

もっと前

2020年7月19日 >>> 2020年7月19日

もっと後

2020年7月19日

permalink

編集する

GCCを調べる - その17 - ベクトル四則演算、論理演算の定義

目次: GCC

ベクトルのロード、ストアだけでは自動ベクトル化できるコードが少なすぎるので、他の演算も定義したいと思います。

ベクトル演算の加算、減算、乗算、除算、論理演算（and, or, xor）の定義


;; gcc/config/riscv/riscv.md

(define_attr "vecmode" "unknown,V32SI,V64SI"
  (const_string "unknown"))

...

;; Iterator for hardware supported vector modes.
(define_mode_iterator ANYV [(V32SI "TARGET_VECTOR")
			    (V64SI "TARGET_VECTOR")])

...


;;★★加算

(define_insn "add<mode>3"
  [(set (match_operand:ANYV	       0 "register_operand" "=v")
	(plus:ANYV (match_operand:ANYV 1 "register_operand" " v")
		   (match_operand:ANYV 2 "arith_operand"    " v")))]
  "TARGET_VECTOR"
  "vadd.vvt%0,%1,%2"
  [(set_attr "type" "arith")
   (set_attr "vecmode" "<MODE>")])


;;★★減算

(define_insn "sub<mode>3"
  [(set (match_operand:ANYV		0 "register_operand" "=v")
	(minus:ANYV (match_operand:ANYV 1 "register_operand" " v")
		    (match_operand:ANYV 2 "arith_operand"    " v")))]
  "TARGET_VECTOR"
  "vsub.vvt%0,%1,%2"
  [(set_attr "type" "arith")
   (set_attr "vecmode" "<MODE>")])


;;★★乗算

(define_insn "mul<mode>3"
  [(set (match_operand:ANYV	       0 "register_operand" "=v")
	(mult:ANYV (match_operand:ANYV 1 "register_operand" " v")
		   (match_operand:ANYV 2 "arith_operand"    " v")))]
  "TARGET_VECTOR"
  "vmul.vvt%0,%1,%2"
  [(set_attr "type" "arith")
   (set_attr "vecmode" "<MODE>")])


;;★★除算

;; This code iterator allows unsigned and signed division to be generated
;; from the same template.
(define_code_iterator any_div [div udiv mod umod])

(define_insn "<optab><mode>3"
  [(set (match_operand:ANYV		  0 "register_operand" "=v")
	(any_div:ANYV (match_operand:ANYV 1 "register_operand" " v")
		      (match_operand:ANYV 2 "arith_operand"    " v")))]
  "TARGET_VECTOR"
  "v<insn>.vvt%0,%1,%2"
  [(set_attr "type" "arith")
   (set_attr "vecmode" "<MODE>")])


;;★★論理演算

;; This code iterator allows the three bitwise instructions to be generated
;; from the same template.
(define_code_iterator any_bitwise [and ior xor])

...

(define_insn "<optab><mode>3"
  [(set (match_operand:ANYV		      0 "register_operand" "=v")
	(any_bitwise:ANYV (match_operand:ANYV 1 "register_operand" "%v")
			  (match_operand:ANYV 2 "arith_operand"    " v")))]
  "TAREGET_VECTOR"
  "v<insn>.vvt%0,%1,%2"
  [(set_attr "type" "logical")
   (set_attr "vecmode" "<MODE>")])

四則演算、論理演算を使う下記のプログラムを書きます。自動ベクトル化で四則演算のループをベクトル化しても良いですが、ベクトル拡張記法（Vector Extensions (Using the GNU Compiler Collection (GCC))）を使ったほうが狙った演算が出しやすく、テストするときに楽です。

ベクトル四則演算、論理演算のサンプルプログラム


typedef int __v64si __attribute__((__vector_size__(256)));

void test()
{
	__v64si v10, v11, v12, v13;0;

	__asm__ volatile ("vlw.v %0, %1\n" : "=&v"(v10) : "A"(b[10]));
	__asm__ volatile ("vlw.v %0, %1\n" : "=&v"(v11) : "A"(b[20]));
	__asm__ volatile ("vlw.v %0, %1\n" : "=&v"(v12) : "A"(b[30]));
	__asm__ volatile ("vlw.v %0, %1\n" : "=&v"(v13) : "A"(b[40]));

	v10 = v11 + v12;
	v11 &= v12 - v13;
	v12 |= v13 * v10;
	v13 ^= v10 / v11;

	__asm__ volatile ("vsw.v %1, %0\n" : "=A"(b[40]) : "v"(v10));
	__asm__ volatile ("vsw.v %1, %0\n" : "=A"(b[50]) : "v"(v11));
	__asm__ volatile ("vsw.v %1, %0\n" : "=A"(b[60]) : "v"(v12));
	__asm__ volatile ("vsw.v %1, %0\n" : "=A"(b[70]) : "v"(v13));
}

ビルド方法は何でも良いですが、最適化レベルをOgにするとアセンブラが見やすいと思います。

ベクトル四則演算、論理演算のサンプルプログラム（逆アセンブル）

$ riscv32-unknown-elf-gcc b.c -nostdlib -g -Og -march=rv32gcv -mabi=ilp32f

$ riscv32-unknown-elf-objdump -dS a.out

...

        __asm__ volatile ("vlw.v %0, %1\n" : "=&v"(v13) : "A"(b[40]));
   10092:       0a028793                addi    a5,t0,160
   10096:       1207e207                vlw.v   v4,(a5)

        v10 = v11 + v12;
   1009a:       022081d7                vadd.vv v3,v2,v1
        v11 &= v12 - v13;
   1009e:       0a120057                vsub.vv v0,v1,v4
   100a2:       26010057                vand.vv v0,v0,v2
        v12 |= v13 * v10;
   100a6:       9641a157                vmul.vv v2,v4,v3
   100aa:       2a208157                vor.vv  v2,v2,v1
        v13 ^= v10 / v11;
   100ae:       863020d7                vdiv.vv v1,v3,v0
   100b2:       2e1200d7                vxor.vv v1,v1,v4

        __asm__ volatile ("vsw.v %1, %0\n" : "=A"(b[40]) : "v"(v10));
   100b6:       0207e1a7                vsw.v   v3,(a5)

...

うまくいっているようです。良かった良かった。

編集者:すずき(2023/09/24 11:52)