岁月流转,往昔空明

C++博客 首页 新随笔 联系 聚合 管理
  118 Posts :: 3 Stories :: 413 Comments :: 0 Trackbacks

没啥好说的,有图有真相。上个逐像素光照的图。

 

基本上编译器的架构已经确定了,好多功能亟待实现。

寻合作者。

 

XY][6}U9SVQ_5RTWG8(6}18

 

编译器生成的LLVM未优化代码。

define void @"Mvs_main@@QSVSIn@@"(%.s.stri* %.arg.stri, %.s.bufi* %.arg.bufi, %.s.stro* %.arg.stro, %.s.bufo* %.arg.bufo) {
.init.vargs:
  %in = alloca %VSIn, align 16
  %0 = load %.s.stri* %.arg.stri, align 4
  %1 = extractvalue %.s.stri %0, 0
  %2 = load <4 x float>* %1, align 4
  %3 = getelementptr %VSIn* %in, i32 0, i32 0
  store <4 x float> %2, <4 x float>* %3, align 4
  %4 = load %.s.stri* %.arg.stri, align 4
  %5 = extractvalue %.s.stri %4, 1
  %6 = load <4 x float>* %5, align 4
  %7 = getelementptr %VSIn* %in, i32 0, i32 1
  store <4 x float> %6, <4 x float>* %7, align 4
  br label %.entry

.entry:                                           ; preds = %.init.vargs
  br label %.body

.body:                                            ; preds = %.entry
  %out = alloca %.s.bufo, align 16
  %in1 = load %VSIn* %in, align 4
  %8 = extractvalue %VSIn %in1, 1
  %9 = getelementptr %.s.bufo* %out, i32 0, i32 1
  store <4 x float> %8, <4 x float>* %9, align 4
  %in2 = load %VSIn* %in, align 4
  %10 = extractvalue %VSIn %in2, 0
  %11 = load %.s.bufi* %.arg.bufi, align 4
  %12 = extractvalue %.s.bufi %11, 0
  %13 = call <4 x float> @"Mmul@@QV4F@@QM44F@@"(<4 x float> %10, [4 x <4 x float>] %12)
  %14 = getelementptr %.s.bufo* %out, i32 0, i32 0
  store <4 x float> %13, <4 x float>* %14, align 4
  %15 = load %.s.bufi* %.arg.bufi, align 4
  %16 = extractvalue %.s.bufi %15, 1
  %in3 = load %VSIn* %in, align 4
  %17 = extractvalue %VSIn %in3, 0
  %18 = fsub <4 x float> %16, %17
  %19 = getelementptr %.s.bufo* %out, i32 0, i32 2
  store <4 x float> %18, <4 x float>* %19, align 4
  %20 = load %.s.bufi* %.arg.bufi, align 4
  %21 = extractvalue %.s.bufi %20, 2
  %in4 = load %VSIn* %in, align 4
  %22 = extractvalue %VSIn %in4, 0
  %23 = fsub <4 x float> %21, %22
  %24 = getelementptr %.s.bufo* %out, i32 0, i32 3
  store <4 x float> %23, <4 x float>* %24, align 4
  %25 = load %.s.bufi* %.arg.bufi, align 4
  %26 = extractvalue %.s.bufi %25, 3
  %in5 = load %VSIn* %in, align 4
  %27 = extractvalue %VSIn %in5, 0
  %28 = fsub <4 x float> %26, %27
  %29 = getelementptr %.s.bufo* %out, i32 0, i32 4
  store <4 x float> %28, <4 x float>* %29, align 4
  %out6 = load %.s.bufo* %out, align 4
  %30 = extractvalue %.s.bufo %out6, 0
  %31 = getelementptr %.s.bufo* %.arg.bufo, i32 0, i32 0
  store <4 x float> %30, <4 x float>* %31, align 4
  %out7 = load %.s.bufo* %out, align 4
  %32 = extractvalue %.s.bufo %out7, 1
  %33 = getelementptr %.s.bufo* %.arg.bufo, i32 0, i32 1
  store <4 x float> %32, <4 x float>* %33, align 4
  %out8 = load %.s.bufo* %out, align 4
  %34 = extractvalue %.s.bufo %out8, 2
  %35 = getelementptr %.s.bufo* %.arg.bufo, i32 0, i32 2
  store <4 x float> %34, <4 x float>* %35, align 4
  %out9 = load %.s.bufo* %out, align 4
  %36 = extractvalue %.s.bufo %out9, 3
  %37 = getelementptr %.s.bufo* %.arg.bufo, i32 0, i32 3
  store <4 x float> %36, <4 x float>* %37, align 4
  %out10 = load %.s.bufo* %out, align 4
  %38 = extractvalue %.s.bufo %out10, 4
  %39 = getelementptr %.s.bufo* %.arg.bufo, i32 0, i32 4
  store <4 x float> %38, <4 x float>* %39, align 4
  ret void
                                                  ; No predecessors!
  ret void
}
JITed Code(x64):
0000000000250010  push        rbp  
0000000000250011  mov         rbp,rsp  
0000000000250014  push        rsi  
0000000000250015  push        rdi  
0000000000250016  push        rbx  
0000000000250017  sub         rsp,78h  
000000000025001B  mov         rax,qword ptr [rcx]  
000000000025001E  movups      xmm0,xmmword ptr [rax]  
0000000000250021  movaps      xmmword ptr [rbp-40h],xmm0  
0000000000250025  mov         rax,qword ptr [rcx+8]  
0000000000250029  movups      xmm0,xmmword ptr [rax]  
000000000025002C  mov         rsi,r9  
000000000025002F  mov         rdi,rdx  
0000000000250032  movaps      xmmword ptr [rbp-30h],xmm0  
0000000000250036  mov         rbx,rsp  
0000000000250039  lea         rax,[rbx-50h]  
000000000025003D  mov         rsp,rax  
0000000000250040  movaps      xmm0,xmmword ptr [rbp-30h]  
0000000000250044  movups      xmmword ptr [rbx-40h],xmm0  
0000000000250048  lea         rcx,[rbp-50h]  
000000000025004C  lea         rdx,[rbp-60h]  
0000000000250050  lea         r8,[rbp-70h]  
0000000000250054  lea         r9,[rbp-80h]  
0000000000250058  mov         rax,130010h  
0000000000250062  lea         r10,[rbp-90h]  
0000000000250069  movups      xmm0,xmmword ptr [rdi]  
000000000025006C  movups      xmm1,xmmword ptr [rdi+10h]  
0000000000250070  movups      xmm2,xmmword ptr [rdi+20h]  
0000000000250074  movups      xmm3,xmmword ptr [rdi+30h]  
0000000000250078  movaps      xmm4,xmmword ptr [rbp-40h]  
000000000025007C  sub         rsp,30h  
0000000000250080  movaps      xmmword ptr [rbp-50h],xmm4  
0000000000250084  movaps      xmmword ptr [rbp-60h],xmm0  
0000000000250088  movaps      xmmword ptr [rbp-70h],xmm1  
000000000025008C  movaps      xmmword ptr [rbp-80h],xmm2  
0000000000250090  movaps      xmmword ptr [rbp-90h],xmm3  
0000000000250097  mov         qword ptr [rsp+20h],r10  
000000000025009C  call        rax  
000000000025009E  add         rsp,30h  
00000000002500A2  movups      xmmword ptr [rbx-50h],xmm0  
00000000002500A6  movups      xmm0,xmmword ptr [rdi+40h]  
00000000002500AA  subps       xmm0,xmmword ptr [rbp-40h]  
00000000002500AE  movups      xmmword ptr [rbx-30h],xmm0  
00000000002500B2  movups      xmm0,xmmword ptr [rdi+50h]  
00000000002500B6  subps       xmm0,xmmword ptr [rbp-40h]  
00000000002500BA  movups      xmmword ptr [rbx-20h],xmm0  
00000000002500BE  movups      xmm0,xmmword ptr [rdi+60h]  
00000000002500C2  subps       xmm0,xmmword ptr [rbp-40h]  
00000000002500C6  movups      xmmword ptr [rbx-10h],xmm0  
00000000002500CA  movups      xmm0,xmmword ptr [rbx-50h]  
00000000002500CE  movups      xmmword ptr [rsi],xmm0  
00000000002500D1  movups      xmm0,xmmword ptr [rbx-40h]  
00000000002500D5  movups      xmmword ptr [rsi+10h],xmm0  
00000000002500D9  movups      xmm0,xmmword ptr [rbx-30h]  
00000000002500DD  movups      xmmword ptr [rsi+20h],xmm0  
00000000002500E1  movups      xmm0,xmmword ptr [rbx-20h]  
00000000002500E5  movups      xmmword ptr [rsi+30h],xmm0  
00000000002500E9  movups      xmm0,xmmword ptr [rbx-10h]  
00000000002500ED  movups      xmmword ptr [rsi+40h],xmm0  
00000000002500F1  lea         rsp,[rbp-18h]  
00000000002500F5  pop         rbx  
00000000002500F6  pop         rdi  
00000000002500F7  pop         rsi  
00000000002500F8  pop         rbp  
00000000002500F9  ret  
posted on 2011-06-25 16:10 空明流转 阅读(2221) 评论(5)  编辑 收藏 引用

评论

# re: SALVIA的Vertex Shader已经可以work了 2011-06-25 19:00 ooseven
楼主都用上llvm了?太赞!
能不能专门发一些介绍llvm主题的文章?
早就听过llvm的大名却无缘识得真面目  回复  更多评论
  

# re: SALVIA的Vertex Shader已经可以work了 2011-06-25 19:05 空明流转
@ooseven
LLVM的手册要好好看,一个是Tutorial,一个是Reference。
要理解LLVM IR和C++ Interface之间的关系就OK了。

LLVM的源代码质量很高,调试起来什么的都很方便,小bug甚至自己都可以修的。  回复  更多评论
  

# re: SALVIA的Vertex Shader已经可以work了 2011-06-25 19:19 千暮(zblc)
mark!  回复  更多评论
  

# re: SALVIA的Vertex Shader已经可以work了 2011-06-27 01:19 陈梓瀚(vczh)
坐等可以work变成可以leverage  回复  更多评论
  

# re: SALVIA的Vertex Shader已经可以work了 2011-12-21 19:35 Chansey
用llvm模拟的shader compiler?  回复  更多评论
  


只有注册用户登录后才能发表评论。
网站导航: 博客园   IT新闻   BlogJava   知识库   博问   管理