AOベンチ高速化計画(その1)

AOベンチを高速化してみましょうという企画を始めます。まずは、プロファイルを取ってみました。いつの間にかエンバグしてプロファイラが動かなくなってました。とりあえず直したので、試したい人(果たしているのだろうか?)は最新版にしておいてください。
結果を示します。左の数字がその行で費やしたクロック数*1です。

ちょっとみると、オブジェクトを作るところと、アクセサで時間を食っているみたいです。
これをどう料理するのか、方針はあるのですが具体的にどうするのか全然考えていないのでした。

続く(のか?)

                  1:  # AO render benchmark 
                  2:  # Original program (C) Syoyo Fujita in Javascript (and other languages)
                  3:  #      http://lucille.atso-net.jp/blog/?p=642
                  4:  #      http://lucille.atso-net.jp/blog/?p=711
                  5:  # Ruby(yarv2llvm) version by Hideki Miura
                  6:  #
                  7:  
                  8:  IMAGE_WIDTH = 128
                  9:  IMAGE_HEIGHT = 128
                 10:  NSUBSAMPLES = 2
                 11:  NAO_SAMPLES = 4
                 12:  
                 13:  =begin
                 14:  def rand
                 15:    0.5
                 16:  end
                 17:  =end
                 18:  
                 19:  class Vec
   3482907111    20:    def initialize(x, y, z)
   7745809730    21:      @x = x
   8049657700    22:      @y = y
  89874967912    23:      @z = z
                 24:    end
                 25:  
    214036759    26:    def x=(v); @x = v; end
    164287218    27:    def y=(v); @y = v; end
    179976792    28:    def z=(v); @z = v; end
  92319516647    29:    def x; @x; end
   9066123677    30:    def y; @y; end
   6470660511    31:    def z; @z; end
                 32:    
                 33:    def vadd(b)
                 34:      Vec.new(@x + b.x, @y + b.y, @z + b.z)
                 35:    end
                 36:  
    202794523    37:    def vsub(b)
    799888247    38:      Vec.new(@x - b.x, @y - b.y, @z - b.z)
                 39:    end
                 40:    
      7363025    41:    def vcross(b)
     28365353    42:      Vec.new(@y * b.z - @z * b.y,
                 43:              @z * b.x - @x * b.z,
                 44:              @x * b.y - @y * b.x)
                 45:    end
                 46:  
    611327399    47:    def vdot(b)
   2277799286    48:      @x * b.x + @y * b.y + @z * b.z
                 49:    end
                 50:  
     19571071    51:    def vlength
     71391784    52:      Math.sqrt(@x * @x + @y * @y + @z * @z)
                 53:    end
                 54:  
     20394222    55:    def vnormalize
    154529678    56:      len = vlength
   2454418673    57:      v = Vec.new(@x, @y, @z)
     19759771    58:      if len > 1.0e-17 then
    120984490    59:        v.x = v.x / len
    105079075    60:        v.y = v.y / len
     93239809    61:        v.z = v.z / len
                 62:      end
    132115526    63:      v
                 64:    end
                 65:  end
                 66:  
                 67:  
                 68:  class Sphere
          294    69:    def initialize(center, radius)
        12757    70:      @center = center
        21694    71:      @radius = radius
                 72:    end
                 73:    
                 74:    def center; @center; end
                 75:    def radius; @radius; end
                 76:  
    197566836    77:    def intersect(ray, isect)
    549177301    78:      rs = ray.org.vsub(@center)
   1025943933    79:      b = rs.vdot(ray.dir)
   1051817735    80:      c = rs.vdot(rs) - (@radius * @radius)
    203202707    81:      d = b * b - c
    451149528    82:      if d > 0.0 then
     14470075    83:        t = - b - Math.sqrt(d)
                 84:  
     63325012    85:        if t > 0.0 and t < isect.t then
     51692972    86:          isect.t = t
     49138790    87:          isect.hit = true
     14762316    88:          isect.pl = Vec.new(ray.org.x + ray.dir.x * t, 
                 89:                            ray.org.y + ray.dir.y * t, 
                 90:                            ray.org.z + ray.dir.z * t)
     40683471    91:          n = isect.pl.vsub(@center)
     38178484    92:          isect.n = n.vnormalize
                 93:        else
     29409513    94:          0.0
                 95:        end
                 96:      end
                 97:      nil
                 98:    end
                 99:  end
                100:  
                101:  class Plane
          105   102:    def initialize(p, n)
        11067   103:      @p = p
    848920391   104:      @n = n
                105:    end
                106:  
     66415232   107:    def intersect(ray, isect)
    284177670   108:      d = -@p.vdot(@n)
    218832810   109:      v = ray.dir.vdot(@n)
     66846341   110:      v0 = v
     72313262   111:      if v < 0.0 then
      7918877   112:        v0 = -v
                113:      end
     65515935   114:      if v0 < 1.0e-17 then
        32313   115:        return
                116:      end
                117:  
    237211844   118:      t = -(ray.org.vdot(@n) + d) / v
                119:  
    213796475   120:      if t > 0.0 and t < isect.t then
     21664931   121:        isect.hit = true
     24819490   122:        isect.t = t
     28158267   123:        isect.n = @n
     21091378   124:        isect.pl = Vec.new(ray.org.x + t * ray.dir.x,
                125:                          ray.org.y + t * ray.dir.y,
                126:                          ray.org.z + t * ray.dir.z)
                127:      end
                128:      nil
                129:    end
                130:  end
                131:  
                132:  class Ray
     68183865   133:    def initialize(org, dir)
    464286359   134:      @org = org
   5796958193   135:      @dir = dir
                136:    end
                137:  
   2653505502   138:    def org; @org; end
                139:    def org=(v); @org = v; end
   2599960532   140:    def dir; @dir; end
                141:    def dir=(v); @dir = v; end
                142:  end
                143:  
                144:  class Isect
     65991211   145:    def initialize
    399979619   146:      @t = 10000000.0
    141381218   147:      @hit = false
   1506247178   148:      @pl = Vec.new(0.0, 0.0, 0.0)
   1992140835   149:      @n = Vec.new(0.0, 0.0, 0.0)
                150:    end
                151:  
     62780868   152:    def t; @t; end
     69180946   153:    def t=(v); @t = v; end
    310561402   154:    def hit; @hit; end
     49700965   155:    def hit=(v); @hit = v; end
    111828741   156:    def pl; @pl; end 
     70391819   157:    def pl=(v); @pl = v; end
    341768739   158:    def n; @n; end 
     53883525   159:    def n=(v); @n = v; end
                160:  end
                161:  
      4964604   162:  def clamp(f)
      5013257   163:    i = f * 255.5
      5809394   164:    if i > 255.0 then
       345576   165:      i = 255.0
                166:    end
      5108489   167:    if i < 0.0 then
                168:      i = 0.0
                169:    end
    635615432   170:    i.to_i
                171:  end
                172:  
      3553251   173:  def otherBasis(basis, n)
     13719777   174:    basis[2] = Vec.new(n.x, n.y, n.z)
    276236476   175:    basis[1] = Vec.new(0.0, 0.0, 0.0)
                176:    
     20436274   177:    if n.x < 0.6 and n.x > -0.6 then
     27269672   178:      basis[1].x = 1.0
                179:    elsif n.y < 0.6 and n.y > -0.6 then
       756300   180:      basis[1].y = 1.0
                181:    elsif n.z < 0.6 and n.z > -0.6 then
        67901   182:      basis[1].z = 1.0
                183:    else
                184:      basis[1].x = 1.0
                185:    end
                186:  
    126451711   187:    basis[0] = basis[1].vcross(basis[2])
     40735089   188:    basis[0] = basis[0].vnormalize
                189:  
     29057644   190:    basis[1] = basis[2].vcross(basis[0])
     39266610   191:    basis[1] = basis[1].vnormalize
                192:  end
                193:  
                194:  class Scene
           95   195:    def initialize
        34619   196:      @spheres = Array.new
    162606024   197:      @spheres[0] = Sphere.new(Vec.new(-2.0, 0.0, -3.5), 0.5)
        12463   198:      @spheres[1] = Sphere.new(Vec.new(-0.5, 0.0, -3.0), 0.5)
         2163   199:      @spheres[2] = Sphere.new(Vec.new(1.0, 0.0, -2.2), 0.5)
         2027   200:      @plane = Plane.new(Vec.new(0.0, -0.5, 0.0), Vec.new(0.0, 1.0, 0.0))
                201:    end
                202:  
      3991300   203:    def ambient_occlusion(isect)
     86129974   204:      basis = Array.new
     42215920   205:      otherBasis(basis, isect.n)
                206:      
      3628173   207:      ntheta    = NAO_SAMPLES
      3554923   208:      nphi      = NAO_SAMPLES
      3907605   209:      eps       = 0.0001
      3585756   210:      occlusion = 0.0
                211:  
     14516153   212:      p0 = Vec.new(isect.pl.x + eps * isect.n.x, 
                213:                  isect.pl.y + eps * isect.n.y, 
                214:                  isect.pl.z + eps * isect.n.z)
     39819602   215:      nphi.times do |j|
    397433157   216:        ntheta.times do |i|
    205857814   217:          r = rand
    983589779   218:          phi = 2.0 * 3.14159265 * rand
     62048471   219:          x = Math.cos(phi) * Math.sqrt(1.0 - r)
     60359481   220:          y = Math.sin(phi) * Math.sqrt(1.0 - r)
     58835518   221:          z = Math.sqrt(r)
                222:  
    262180475   223:          rx = x * basis[0].x + y * basis[1].x + z * basis[2].x
    173465064   224:          ry = x * basis[0].y + y * basis[1].y + z * basis[2].y
    198884270   225:          rz = x * basis[0].z + y * basis[1].z + z * basis[2].z
                226:          
  54537043340   227:          raydir = Vec.new(rx, ry, rz)
   3202836233   228:          ray = Ray.new(p0, raydir)
                229:          
   1931629501   230:          occisect = Isect.new
    574108733   231:          @spheres[0].intersect(ray, occisect)
    395288862   232:          @spheres[1].intersect(ray, occisect)
    359305624   233:          @spheres[2].intersect(ray, occisect)
    310291804   234:          @plane.intersect(ray, occisect)
    220775329   235:          if occisect.hit then
     69999794   236:            occlusion = occlusion + 1.0
                237:          else
    359931630   238:            0.0
                239:          end
                240:        end
                241:      end
                242:      
     11206986   243:      occlusion = (ntheta.to_f * nphi.to_f - occlusion) / (ntheta.to_f * nphi.to_f)
                244:  
   2645612854   245:      Vec.new(occlusion, occlusion, occlusion)
                246:    end
                247:  
        20086   248:    def render(w, h, nsubsamples)
        15508   249:      cnt = 0
        14836   250:      nsf = nsubsamples.to_f
     43492145   251:      h.times do |y|
    154000401   252:        w.times do |x|
   2297040484   253:          rad = Vec.new(0.0, 0.0, 0.0)
                254:          
                255:          # Subsmpling
     40956785   256:          nsubsamples.times do |v|
    361985815   257:            nsubsamples.times do |u|
                258:  
      6759127   259:              cnt = cnt + 1
      6605290   260:              wf = w.to_f
      6998377   261:              hf = h.to_f
      6876010   262:              xf = x.to_f
      6817729   263:              yf = y.to_f
      6573161   264:              uf = u.to_f
      6575960   265:              vf = v.to_f
                266:  
     34139409   267:              px = (xf + (uf / nsf) - (wf / 2.0)) / (wf / 2.0)
     34857811   268:              py = -(yf + (vf / nsf) - (hf / 2.0)) / (hf / 2.0)
                269:  
   4640601492   270:              eye = Vec.new(px, py, -1.0).vnormalize
                271:              
   1900806167   272:              ray = Ray.new(Vec.new(0.0, 0.0, 0.0), eye)
                273:              
    456369628   274:              isect = Isect.new
    300801323   275:              @spheres[0].intersect(ray, isect)
     42257842   276:              @spheres[1].intersect(ray, isect)
     42565969   277:              @spheres[2].intersect(ray, isect)
    266949967   278:              @plane.intersect(ray, isect)
     51428494   279:              if isect.hit then
    202302210   280:                col = ambient_occlusion(isect)
     21356720   281:                rad.x = rad.x + col.x
     14780657   282:                rad.y = rad.y + col.y
     11872435   283:                rad.z = rad.z + col.z
                284:              end
                285:            end
                286:          end
                287:          
      5921625   288:          r = rad.x / (nsf * nsf)
      7886703   289:          g = rad.y / (nsf * nsf)
      5888393   290:          b = rad.z / (nsf * nsf)
     98414481   291:          printf("%c", clamp(r))
      9520453   292:          printf("%c", clamp(g))
      9690354   293:          printf("%c", clamp(b))
                294:        end
        46513   295:        nil
                296:      end
                297:      
                298:      nil
                299:    end
                300:  end
                301:  
                302:  # File.open("ao.ppm", "w") do |fp|
                303:    printf("P6\n")
                304:    printf("%d %d\n", IMAGE_WIDTH, IMAGE_HEIGHT)
                305:    printf("255\n", IMAGE_WIDTH, IMAGE_HEIGHT)
                306:    Scene.new.render(IMAGE_WIDTH, IMAGE_HEIGHT, NSUBSAMPLES)
                307:  # end

*1:RDTSCを使っています。なんか正確じゃないという話もあるみたいです