目次: GCC
ベクトルのロード、ストアだけでは自動ベクトル化できるコードが少なすぎるので、他の演算も定義したいと思います。
;; gcc/config/riscv/riscv.md
(define_attr "vecmode" "unknown,V32SI,V64SI"
(const_string "unknown"))
...
;; Iterator for hardware supported vector modes.
(define_mode_iterator ANYV [(V32SI "TARGET_VECTOR")
(V64SI "TARGET_VECTOR")])
...
;;★★加算
(define_insn "add<mode>3"
[(set (match_operand:ANYV 0 "register_operand" "=v")
(plus:ANYV (match_operand:ANYV 1 "register_operand" " v")
(match_operand:ANYV 2 "arith_operand" " v")))]
"TARGET_VECTOR"
"vadd.vvt%0,%1,%2"
[(set_attr "type" "arith")
(set_attr "vecmode" "<MODE>")])
;;★★減算
(define_insn "sub<mode>3"
[(set (match_operand:ANYV 0 "register_operand" "=v")
(minus:ANYV (match_operand:ANYV 1 "register_operand" " v")
(match_operand:ANYV 2 "arith_operand" " v")))]
"TARGET_VECTOR"
"vsub.vvt%0,%1,%2"
[(set_attr "type" "arith")
(set_attr "vecmode" "<MODE>")])
;;★★乗算
(define_insn "mul<mode>3"
[(set (match_operand:ANYV 0 "register_operand" "=v")
(mult:ANYV (match_operand:ANYV 1 "register_operand" " v")
(match_operand:ANYV 2 "arith_operand" " v")))]
"TARGET_VECTOR"
"vmul.vvt%0,%1,%2"
[(set_attr "type" "arith")
(set_attr "vecmode" "<MODE>")])
;;★★除算
;; This code iterator allows unsigned and signed division to be generated
;; from the same template.
(define_code_iterator any_div [div udiv mod umod])
(define_insn "<optab><mode>3"
[(set (match_operand:ANYV 0 "register_operand" "=v")
(any_div:ANYV (match_operand:ANYV 1 "register_operand" " v")
(match_operand:ANYV 2 "arith_operand" " v")))]
"TARGET_VECTOR"
"v<insn>.vvt%0,%1,%2"
[(set_attr "type" "arith")
(set_attr "vecmode" "<MODE>")])
;;★★論理演算
;; This code iterator allows the three bitwise instructions to be generated
;; from the same template.
(define_code_iterator any_bitwise [and ior xor])
...
(define_insn "<optab><mode>3"
[(set (match_operand:ANYV 0 "register_operand" "=v")
(any_bitwise:ANYV (match_operand:ANYV 1 "register_operand" "%v")
(match_operand:ANYV 2 "arith_operand" " v")))]
"TAREGET_VECTOR"
"v<insn>.vvt%0,%1,%2"
[(set_attr "type" "logical")
(set_attr "vecmode" "<MODE>")])
四則演算、論理演算を使う下記のプログラムを書きます。自動ベクトル化で四則演算のループをベクトル化しても良いですが、ベクトル拡張記法(Vector Extensions (Using the GNU Compiler Collection (GCC)))を使ったほうが狙った演算が出しやすく、テストするときに楽です。
typedef int __v64si __attribute__((__vector_size__(256)));
void test()
{
__v64si v10, v11, v12, v13;0;
__asm__ volatile ("vlw.v %0, %1\n" : "=&v"(v10) : "A"(b[10]));
__asm__ volatile ("vlw.v %0, %1\n" : "=&v"(v11) : "A"(b[20]));
__asm__ volatile ("vlw.v %0, %1\n" : "=&v"(v12) : "A"(b[30]));
__asm__ volatile ("vlw.v %0, %1\n" : "=&v"(v13) : "A"(b[40]));
v10 = v11 + v12;
v11 &= v12 - v13;
v12 |= v13 * v10;
v13 ^= v10 / v11;
__asm__ volatile ("vsw.v %1, %0\n" : "=A"(b[40]) : "v"(v10));
__asm__ volatile ("vsw.v %1, %0\n" : "=A"(b[50]) : "v"(v11));
__asm__ volatile ("vsw.v %1, %0\n" : "=A"(b[60]) : "v"(v12));
__asm__ volatile ("vsw.v %1, %0\n" : "=A"(b[70]) : "v"(v13));
}
ビルド方法は何でも良いですが、最適化レベルをOgにするとアセンブラが見やすいと思います。
$ riscv32-unknown-elf-gcc b.c -nostdlib -g -Og -march=rv32gcv -mabi=ilp32f $ riscv32-unknown-elf-objdump -dS a.out ... __asm__ volatile ("vlw.v %0, %1\n" : "=&v"(v13) : "A"(b[40])); 10092: 0a028793 addi a5,t0,160 10096: 1207e207 vlw.v v4,(a5) v10 = v11 + v12; 1009a: 022081d7 vadd.vv v3,v2,v1 v11 &= v12 - v13; 1009e: 0a120057 vsub.vv v0,v1,v4 100a2: 26010057 vand.vv v0,v0,v2 v12 |= v13 * v10; 100a6: 9641a157 vmul.vv v2,v4,v3 100aa: 2a208157 vor.vv v2,v2,v1 v13 ^= v10 / v11; 100ae: 863020d7 vdiv.vv v1,v3,v0 100b2: 2e1200d7 vxor.vv v1,v1,v4 __asm__ volatile ("vsw.v %1, %0\n" : "=A"(b[40]) : "v"(v10)); 100b6: 0207e1a7 vsw.v v3,(a5) ...
うまくいっているようです。良かった良かった。
< | 2020 | > | ||||
<< | < | 07 | > | >> | ||
日 | 月 | 火 | 水 | 木 | 金 | 土 |
- | - | - | 1 | 2 | 3 | 4 |
5 | 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | 14 | 15 | 16 | 17 | 18 |
19 | 20 | 21 | 22 | 23 | 24 | 25 |
26 | 27 | 28 | 29 | 30 | 31 | - |
合計:
本日:
管理者: Katsuhiro Suzuki(katsuhiro( a t )katsuster.net)
This is Simple Diary 1.0
Copyright(C) Katsuhiro Suzuki 2006-2023.
Powered by PHP 8.2.15.
using GD bundled (2.1.0 compatible)(png support.)