没啥好说的,有图有真相。上个逐像素光照的图。
基本上编译器的架构已经确定了,好多功能亟待实现。
寻合作者。
编译器生成的LLVM未优化代码。
define void @"Mvs_main@@QSVSIn@@"(%.s.stri* %.arg.stri, %.s.bufi* %.arg.bufi, %.s.stro* %.arg.stro, %.s.bufo* %.arg.bufo) {
.init.vargs:
%in = alloca %VSIn, align 16
%0 = load %.s.stri* %.arg.stri, align 4
%1 = extractvalue %.s.stri %0, 0
%2 = load <4 x float>* %1, align 4
%3 = getelementptr %VSIn* %in, i32 0, i32 0
store <4 x float> %2, <4 x float>* %3, align 4
%4 = load %.s.stri* %.arg.stri, align 4
%5 = extractvalue %.s.stri %4, 1
%6 = load <4 x float>* %5, align 4
%7 = getelementptr %VSIn* %in, i32 0, i32 1
store <4 x float> %6, <4 x float>* %7, align 4
br label %.entry
.entry: ; preds = %.init.vargs
br label %.body
.body: ; preds = %.entry
%out = alloca %.s.bufo, align 16
%in1 = load %VSIn* %in, align 4
%8 = extractvalue %VSIn %in1, 1
%9 = getelementptr %.s.bufo* %out, i32 0, i32 1
store <4 x float> %8, <4 x float>* %9, align 4
%in2 = load %VSIn* %in, align 4
%10 = extractvalue %VSIn %in2, 0
%11 = load %.s.bufi* %.arg.bufi, align 4
%12 = extractvalue %.s.bufi %11, 0
%13 = call <4 x float> @"Mmul@@QV4F@@QM44F@@"(<4 x float> %10, [4 x <4 x float>] %12)
%14 = getelementptr %.s.bufo* %out, i32 0, i32 0
store <4 x float> %13, <4 x float>* %14, align 4
%15 = load %.s.bufi* %.arg.bufi, align 4
%16 = extractvalue %.s.bufi %15, 1
%in3 = load %VSIn* %in, align 4
%17 = extractvalue %VSIn %in3, 0
%18 = fsub <4 x float> %16, %17
%19 = getelementptr %.s.bufo* %out, i32 0, i32 2
store <4 x float> %18, <4 x float>* %19, align 4
%20 = load %.s.bufi* %.arg.bufi, align 4
%21 = extractvalue %.s.bufi %20, 2
%in4 = load %VSIn* %in, align 4
%22 = extractvalue %VSIn %in4, 0
%23 = fsub <4 x float> %21, %22
%24 = getelementptr %.s.bufo* %out, i32 0, i32 3
store <4 x float> %23, <4 x float>* %24, align 4
%25 = load %.s.bufi* %.arg.bufi, align 4
%26 = extractvalue %.s.bufi %25, 3
%in5 = load %VSIn* %in, align 4
%27 = extractvalue %VSIn %in5, 0
%28 = fsub <4 x float> %26, %27
%29 = getelementptr %.s.bufo* %out, i32 0, i32 4
store <4 x float> %28, <4 x float>* %29, align 4
%out6 = load %.s.bufo* %out, align 4
%30 = extractvalue %.s.bufo %out6, 0
%31 = getelementptr %.s.bufo* %.arg.bufo, i32 0, i32 0
store <4 x float> %30, <4 x float>* %31, align 4
%out7 = load %.s.bufo* %out, align 4
%32 = extractvalue %.s.bufo %out7, 1
%33 = getelementptr %.s.bufo* %.arg.bufo, i32 0, i32 1
store <4 x float> %32, <4 x float>* %33, align 4
%out8 = load %.s.bufo* %out, align 4
%34 = extractvalue %.s.bufo %out8, 2
%35 = getelementptr %.s.bufo* %.arg.bufo, i32 0, i32 2
store <4 x float> %34, <4 x float>* %35, align 4
%out9 = load %.s.bufo* %out, align 4
%36 = extractvalue %.s.bufo %out9, 3
%37 = getelementptr %.s.bufo* %.arg.bufo, i32 0, i32 3
store <4 x float> %36, <4 x float>* %37, align 4
%out10 = load %.s.bufo* %out, align 4
%38 = extractvalue %.s.bufo %out10, 4
%39 = getelementptr %.s.bufo* %.arg.bufo, i32 0, i32 4
store <4 x float> %38, <4 x float>* %39, align 4
ret void
; No predecessors!
ret void
}
JITed Code(x64):
0000000000250010 push rbp
0000000000250011 mov rbp,rsp
0000000000250014 push rsi
0000000000250015 push rdi
0000000000250016 push rbx
0000000000250017 sub rsp,78h
000000000025001B mov rax,qword ptr [rcx]
000000000025001E movups xmm0,xmmword ptr [rax]
0000000000250021 movaps xmmword ptr [rbp-40h],xmm0
0000000000250025 mov rax,qword ptr [rcx+8]
0000000000250029 movups xmm0,xmmword ptr [rax]
000000000025002C mov rsi,r9
000000000025002F mov rdi,rdx
0000000000250032 movaps xmmword ptr [rbp-30h],xmm0
0000000000250036 mov rbx,rsp
0000000000250039 lea rax,[rbx-50h]
000000000025003D mov rsp,rax
0000000000250040 movaps xmm0,xmmword ptr [rbp-30h]
0000000000250044 movups xmmword ptr [rbx-40h],xmm0
0000000000250048 lea rcx,[rbp-50h]
000000000025004C lea rdx,[rbp-60h]
0000000000250050 lea r8,[rbp-70h]
0000000000250054 lea r9,[rbp-80h]
0000000000250058 mov rax,130010h
0000000000250062 lea r10,[rbp-90h]
0000000000250069 movups xmm0,xmmword ptr [rdi]
000000000025006C movups xmm1,xmmword ptr [rdi+10h]
0000000000250070 movups xmm2,xmmword ptr [rdi+20h]
0000000000250074 movups xmm3,xmmword ptr [rdi+30h]
0000000000250078 movaps xmm4,xmmword ptr [rbp-40h]
000000000025007C sub rsp,30h
0000000000250080 movaps xmmword ptr [rbp-50h],xmm4
0000000000250084 movaps xmmword ptr [rbp-60h],xmm0
0000000000250088 movaps xmmword ptr [rbp-70h],xmm1
000000000025008C movaps xmmword ptr [rbp-80h],xmm2
0000000000250090 movaps xmmword ptr [rbp-90h],xmm3
0000000000250097 mov qword ptr [rsp+20h],r10
000000000025009C call rax
000000000025009E add rsp,30h
00000000002500A2 movups xmmword ptr [rbx-50h],xmm0
00000000002500A6 movups xmm0,xmmword ptr [rdi+40h]
00000000002500AA subps xmm0,xmmword ptr [rbp-40h]
00000000002500AE movups xmmword ptr [rbx-30h],xmm0
00000000002500B2 movups xmm0,xmmword ptr [rdi+50h]
00000000002500B6 subps xmm0,xmmword ptr [rbp-40h]
00000000002500BA movups xmmword ptr [rbx-20h],xmm0
00000000002500BE movups xmm0,xmmword ptr [rdi+60h]
00000000002500C2 subps xmm0,xmmword ptr [rbp-40h]
00000000002500C6 movups xmmword ptr [rbx-10h],xmm0
00000000002500CA movups xmm0,xmmword ptr [rbx-50h]
00000000002500CE movups xmmword ptr [rsi],xmm0
00000000002500D1 movups xmm0,xmmword ptr [rbx-40h]
00000000002500D5 movups xmmword ptr [rsi+10h],xmm0
00000000002500D9 movups xmm0,xmmword ptr [rbx-30h]
00000000002500DD movups xmmword ptr [rsi+20h],xmm0
00000000002500E1 movups xmm0,xmmword ptr [rbx-20h]
00000000002500E5 movups xmmword ptr [rsi+30h],xmm0
00000000002500E9 movups xmm0,xmmword ptr [rbx-10h]
00000000002500ED movups xmmword ptr [rsi+40h],xmm0
00000000002500F1 lea rsp,[rbp-18h]
00000000002500F5 pop rbx
00000000002500F6 pop rdi
00000000002500F7 pop rsi
00000000002500F8 pop rbp
00000000002500F9 ret