Prefix‑Sum v1

10 min read Original article ↗
              
                <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Minimal Prefix-Sum CPU Renderer (Fill + Stroke, Parallel)</title>
<style>
  :root { --bg:#0f1220; --panel:#12162b; --ink:#e7e9ef; --line:#232849; }
  html,body { height:100%; margin:0; background:var(--bg); color:var(--ink);
    font:14px/1.4 system-ui,-apple-system,Segoe UI,Roboto,Ubuntu,Cantarell,sans-serif; }
  header { padding:12px 16px; background:#151934; position:sticky; top:0; z-index:5;
    box-shadow:0 2px 8px rgba(0,0,0,.25); }
  header h1 { margin:0; font-size:16px; font-weight:600; }
  .row { display:grid; grid-template-columns:320px 1fr; gap:16px; padding:16px; }
  .panel { background:var(--panel); border:1px solid var(--line); border-radius:12px; padding:12px; }
  label { display:flex; align-items:center; gap:8px; margin:6px 0; }
  input[type="range"] { width:100%; }
  select, button { background:#2a3160; color:#fff; border:1px solid #3b4282;
    border-radius:10px; padding:6px 8px; }
  button { cursor:pointer; }
  button:hover { filter:brightness(1.06); }
  .mono { font-family:ui-monospace,SFMono-Regular,Menlo,Consolas,monospace; }
  .small { font-size:12px; opacity:.9; }
  canvas { background:#0b0e1d; border:1px solid var(--line); border-radius:12px; width:100%; height:auto; image-rendering:pixelated; }
  .row2 { display:grid; grid-template-columns:repeat(3,1fr); gap:8px; margin-top:8px; }
  .stat { background:#0e1330; border:1px dashed #2e3569; border-radius:10px; padding:8px; text-align:center; }
</style>
</head>
<body>
  <header><h1>Prefix‑Sum CPU Vector Renderer → Fill & Stroke (Parallel via Web Workers)</h1></header>
  <div class="row">
    <div class="panel">
      <div><strong>Scene controls</strong></div>
      <label>Canvas Size
        <select id="size">
          <option value="800x600">800 × 600</option>
          <option value="1024x768">1024 × 768</option>
          <option value="1280x720">1280 × 720</option>
        </select>
      </label>
      <label>Tile Size (px)
        <input id="tile" type="range" min="16" max="128" value="64" step="16" />
        <span id="tileVal" class="mono small">64</span>
      </label>
      <label>Workers
        <input id="workers" type="range" min="1" max="16" value="4" />
        <span id="workersVal" class="mono small">4</span>
      </label>
      <label>Stroke Width
        <input id="stroke" type="range" min="1" max="40" value="10" />
        <span id="strokeVal" class="mono small">10</span>
      </label>
      <div class="row2">
        <button id="rerender">Render</button>
        <button id="randomize">Randomize Lines</button>
        <button id="toggleRule">Fill Rule: <span id="ruleTxt">evenodd</span></button>
      </div>
      <p class="small">Pipeline: <span class="mono">count → exclusive-scan → scatter</span> for tiles, then per‑tile scanline fill using a <em>difference array + prefix sum across X</em>. Strokes are widened into quads and rendered with the same filler.</p>
      <div class="row2">
        <div class="stat"><div class="small">Build</div><div class="mono" id="tBuild">–</div></div>
        <div class="stat"><div class="small">Raster</div><div class="mono" id="tRaster">–</div></div>
        <div class="stat"><div class="small">Tiles</div><div class="mono" id="tTiles">–</div></div>
      </div>
    </div>
    <div class="panel"><canvas id="cv" width="800" height="600"></canvas></div>
  </div>

<script>
(function(){
  // ---------- DOM & Controls ----------
  const cv = document.getElementById('cv');
  const ctx = cv.getContext('2d');
  const sizeSel = document.getElementById('size');
  const tileInp = document.getElementById('tile'); const tileVal = document.getElementById('tileVal');
  const workersInp = document.getElementById('workers'); const workersVal = document.getElementById('workersVal');
  const strokeInp = document.getElementById('stroke'); const strokeVal = document.getElementById('strokeVal');
  const btnRender = document.getElementById('rerender');
  const btnRandom = document.getElementById('randomize');
  const btnRule = document.getElementById('toggleRule'); const ruleTxt = document.getElementById('ruleTxt');
  const tBuild = document.getElementById('tBuild'); const tRaster = document.getElementById('tRaster'); const tTiles = document.getElementById('tTiles');

  // Hardware hint for workers
  const HW = Math.max(1, Math.min(16, (navigator.hardwareConcurrency||4)));
  workersInp.max = String(HW); workersInp.value = String(Math.min(4, HW));
  workersVal.textContent = workersInp.value;

  tileInp.addEventListener('input', () => tileVal.textContent = tileInp.value);
  workersInp.addEventListener('input', () => workersVal.textContent = workersInp.value);
  strokeInp.addEventListener('input', () => strokeVal.textContent = strokeInp.value);

  let fillRule = 'evenodd';
  btnRule.addEventListener('click', () => {
    fillRule = (fillRule === 'evenodd') ? 'nonzero' : 'evenodd';
    ruleTxt.textContent = fillRule;
  });

  sizeSel.addEventListener('change', () => {
    const [w,h] = sizeSel.value.split('x').map(Number);
    cv.width = w; cv.height = h; draw();
  });
  btnRender.addEventListener('click', draw);
  btnRandom.addEventListener('click', () => { randomizeLines(); draw(); });

  // ---------- Geometry Helpers ----------
  function star(cx, cy, rOuter, rInner, n = 5) {
    const pts = [];
    const step = Math.PI / n;
    for (let i=0;i<2*n;i++) {
      const r = (i % 2 === 0) ? rOuter : rInner;
      const a = i * step - Math.PI/2;
      pts.push([cx + Math.cos(a)*r, cy + Math.sin(a)*r]);
    }
    return pts;
  }

  // Expand polyline to stroke quads (butt caps)
  function strokeToQuads(polyline, width) {
    const quads = [];
    if (polyline.length < 2) return quads;
    const hw = width * 0.5;
    for (let i=0;i<polyline.length-1;i++) {
      const [x0,y0] = polyline[i]; const [x1,y1] = polyline[i+1];
      const dx = x1-x0, dy=y1-y0; const len = Math.hypot(dx,dy); if (len === 0) continue;
      const nx = -dy/len, ny = dx/len; // unit normal
      const hx = nx*hw, hy = ny*hw;
      quads.push([
        [x0 - hx, y0 - hy],
        [x0 + hx, y0 + hy],
        [x1 + hx, y1 + hy],
        [x1 - hx, y1 - hy],
      ]);
    }
    return quads;
  }

  // Exclusive prefix sum (Uint32Array). Returns offsets and total.
  function exclusiveScan(u32) {
    const out = new Uint32Array(u32.length);
    let acc = 0>>>0;
    for (let i=0;i<u32.length;i++){ out[i] = acc; acc = (acc + u32[i])>>>0; }
    return { offsets: out, total: acc>>>0 };
  }

  // ---------- Scene ----------
  let randomLines = [];
  function randomizeLines() {
    const [W,H] = [cv.width, cv.height];
    const pts = [];
    const N = 200;
    let x = W*0.15, y = H*0.25;
    const step = Math.min(W,H) * 0.03;
    for (let i=0;i<N;i++) {
      x = Math.max(20, Math.min(W-20, x + (Math.random()-0.5)*step*2));
      y = Math.max(20, Math.min(H-20, y + (Math.random()-0.5)*step*2));
      pts.push([x,y]);
    }
    randomLines = pts;
  }

  function buildShapes() {
    const [W,H] = [cv.width, cv.height];
    const shapes = [];

    // Filled star
    const s = star(W*0.5, H*0.45, Math.min(W,H)*0.28, Math.min(W,H)*0.12, 7);
    shapes.push({
      verts: Float32Array.from(s.flat()),
      color: Uint8ClampedArray.from([88,156,255,220]),
      rule: fillRule
    });

    // Blobby squircle-like polygon
    const poly = [];
    const cx=W*0.25, cy=H*0.65, rx=Math.min(W,H)*0.12, ry=Math.min(W,H)*0.08;
    for (let i=0;i<32;i++){
      const a = (i/32)*Math.PI*2;
      const k = 0.72+0.28*Math.sin(3*a);
      poly.push([cx + Math.cos(a)*rx*k, cy + Math.sin(a)*ry*k]);
    }
    shapes.push({
      verts: Float32Array.from(poly.flat()),
      color: Uint8ClampedArray.from([255,120,88,190]),
      rule: fillRule
    });

    // Stroked random polyline → quads
    if (randomLines.length === 0) randomizeLines();
    const width = Number(strokeInp.value);
    const quads = strokeToQuads(randomLines, width);
    for (const q of quads) {
      shapes.push({
        verts: Float32Array.from(q.flat()),
        color: Uint8ClampedArray.from([250,245,140,255]),
        rule: 'evenodd' // convex quads
      });
    }
    return shapes;
  }

  // ---------- Tiling with count → scan → scatter ----------
  function binShapesIntoTiles(shapes, tileSize) {
    const W = cv.width, H = cv.height;
    const tilesX = Math.ceil(W / tileSize), tilesY = Math.ceil(H / tileSize);
    const tileCount = tilesX * tilesY;
    const counts = new Uint32Array(tileCount);
    const ranges = new Array(shapes.length);

    // Count coverage per tile using shape AABBs (cheap, conservative)
    for (let sIdx=0; sIdx<shapes.length; sIdx++) {
      const v = shapes[sIdx].verts;
      let minX=+Infinity, minY=+Infinity, maxX=-Infinity, maxY=-Infinity;
      for (let i=0;i<v.length;i+=2){ const x=v[i], y=v[i+1];
        if (x<minX) minX=x; if (y<minY) minY=y; if (x>maxX) maxX=x; if (y>maxY) maxY=y; }
      const minTx = Math.max(0, Math.floor(minX / tileSize));
      const maxTx = Math.min(tilesX-1, Math.floor(maxX / tileSize));
      const minTy = Math.max(0, Math.floor(minY / tileSize));
      const maxTy = Math.min(tilesY-1, Math.floor(maxY / tileSize));
      ranges[sIdx] = {minTx, maxTx, minTy, maxTy};
      for (let ty=minTy; ty<=maxTy; ty++) for (let tx=minTx; tx<=maxTx; tx++) counts[ty*tilesX+tx]++;
    }

    // Exclusive scan → offsets; then scatter shape indices into a compact list
    const {offsets, total} = exclusiveScan(counts);
    const tileShapeIndex = new Uint32Array(total);
    const cursors = offsets.slice();
    for (let sIdx=0; sIdx<shapes.length; sIdx++) {
      const r = ranges[sIdx];
      for (let ty=r.minTy; ty<=r.maxTy; ty++)
        for (let tx=r.minTx; tx<=r.maxTx; tx++) {
          const t = ty*tilesX + tx;
          tileShapeIndex[cursors[t]++] = sIdx;
        }
    }
    return { tilesX, tilesY, tileCount, counts, offsets, tileShapeIndex };
  }

  // ---------- Worker (tile raster) ----------
  function makeWorkerURL() {
    const workerFn = () => {
      // Convert RGBA8 to normalized (0..1) premultiplied
      function toPremul(rgba) {
        const a = rgba[3] / 255;
        return [rgba[0]/255*a, rgba[1]/255*a, rgba[2]/255*a, a];
      }

      // Blend src (premul) over dst (premul) in-place
      function over(dst, i, sr, sg, sb, sa) {
        const ida = 1 - sa;
        dst[i  ] = sr + dst[i  ] * ida;
        dst[i+1] = sg + dst[i+1] * ida;
        dst[i+2] = sb + dst[i+2] * ida;
        dst[i+3] = sa + dst[i+3] * ida;
      }

      // Scanline fill: build row difference array, prefix sum across X → paint pixels inside
      function fillPolygonTile(tileX, tileY, tileW, tileH, verts, rgba, rule, outF32) {
        const n = verts.length >>> 1; if (n < 3) return;
        const vx = new Float32Array(n), vy = new Float32Array(n);
        for (let i=0,j=0;i<n;i++,j+=2){ vx[i]=verts[j]; vy[i]=verts[j+1]; }

        const pitch = tileW * 4;
        const [sr,sg,sb,sa] = toPremul(rgba);
        const rowDelta = new Int32Array(tileW+1);

        for (let ry=0; ry<tileH; ry++) {
          const gy = tileY + ry + 0.5; // pixel-center scanline

          if (rule === 'evenodd') {
            // Collect intersection Xs
            const xs = [];
            for (let i=0, j=n-1;i<n;j=i++) {
              const x0=vx[j], y0=vy[j], x1=vx[i], y1=vy[i];
              const yMin = Math.min(y0,y1), yMax = Math.max(y0,y1);
              if (gy <= yMin || gy > yMax) continue; // half-open to avoid double counting
              const t = (gy - y0) / (y1 - y0);
              xs.push(x0 + t*(x1 - x0));
            }
            if (xs.length < 2) continue;
            xs.sort((a,b)=>a-b);
            // Build difference array from spans
            for (let k=0;k+1<xs.length;k+=2) {
              let x0 = Math.floor(xs[k]   - tileX);
              let x1 = Math.floor(xs[k+1] - tileX);
              if (x1 <= 0 || x0 >= tileW) continue;
              if (x0 < 0) x0 = 0; if (x1 > tileW) x1 = tileW;
              rowDelta[x0] += 1; rowDelta[x1] -= 1;
            }
            let acc = 0|0, base = ry*pitch;
            for (let x=0;x<tileW;x++) {
              acc += rowDelta[x];
              if ((acc & 1) !== 0) {
                const i = base + x*4;
                over(outF32, i, sr,sg,sb,sa);
              }
            }
            // clear rowDelta
            for (let x=0;x<=tileW;x++) rowDelta[x]=0;
          } else {
            // Non-zero winding: accumulate signed crossings into difference array at pixel buckets
            for (let i=0, j=n-1;i<n;j=i++) {
              const x0=vx[j], y0=vy[j], x1=vx[i], y1=vy[i];
              const yMin = Math.min(y0,y1), yMax = Math.max(y0,y1);
              if (gy <= yMin || gy > yMax) continue;
              const t = (gy - y0) / (y1 - y0);
              const x = x0 + t*(x1 - x0);
              let xi = Math.floor(x - tileX);
              if (xi < 0) xi = 0; if (xi > tileW) xi = tileW;
              rowDelta[xi] += (y1 > y0) ? +1 : -1;
            }
            let acc = 0|0, base = ry*pitch;
            for (let x=0;x<tileW;x++) {
              acc += rowDelta[x];
              if (acc !== 0) {
                const i = base + x*4;
                over(outF32, i, sr,sg,sb,sa);
              }
            }
            for (let x=0;x<=tileW;x++) rowDelta[x]=0;
          }
        }
      }

      // Convert premul-f32 tile buffer to Uint8Clamped (straight alpha) for ImageData
      function packToRGBA(outF32, tileW, tileH) {
        const outU8 = new Uint8ClampedArray(tileW*tileH*4);
        for (let p=0; p<tileW*tileH; p++) {
          const i = p*4;
          const a = Math.min(1, Math.max(0, outF32[i+3]));
          if (a <= 1e-6) { outU8[i]=outU8[i+1]=outU8[i+2]=0; outU8[i+3]=0; continue; }
          const r = Math.min(255, Math.max(0, Math.round((outF32[i  ]/a) * 255)));
          const g = Math.min(255, Math.max(0, Math.round((outF32[i+1]/a) * 255)));
          const b = Math.min(255, Math.max(0, Math.round((outF32[i+2]/a) * 255)));
          const A = Math.min(255, Math.max(0, Math.round(a*255)));
          outU8[i]=r; outU8[i+1]=g; outU8[i+2]=b; outU8[i+3]=A;
        }
        return outU8;
      }

      self.onmessage = (e) => {
        const msg = e.data;
        if (msg.cmd !== 'renderTiles') return;
        const { tileSize, canvasW, canvasH, tilesX, tileIndices, offsets, counts, tileShapeIndex, shapes } = msg;

        const results = [];
        for (const t of tileIndices) {
          const tx = t % tilesX, ty = (t / tilesX)|0;
          const tileX = tx * tileSize, tileY = ty * tileSize;
          const tileW = Math.min(tileSize, canvasW - tileX);
          const tileH = Math.min(tileSize, canvasH - tileY);

          // Premultiplied float buffer
          const fbuf = new Float32Array(tileW*tileH*4); // zeroed
          const start = offsets[t]>>>0, cnt = counts[t]>>>0;

          for (let i=0;i<cnt;i++) {
            const sIdx = tileShapeIndex[start+i]>>>0;
            const sh = shapes[sIdx];
            // Quick reject by tile AABB (optional improvement):
            // For minimal demo we skip per-shape/tile precise intersection tests.
            fillPolygonTile(tileX, tileY, tileW, tileH, sh.verts, sh.color, sh.rule, fbuf);
          }
          const pixels = packToRGBA(fbuf, tileW, tileH);
          results.push({ t, tx, ty, tileX, tileY, tileW, tileH, pixels });
        }
        self.postMessage({ kind:'tilesDone', results }, results.map(r => r.pixels.buffer));
      };
    };
    const src = `(${workerFn.toString()})()`;
    const blob = new Blob([src], {type:'text/javascript'});
    return URL.createObjectURL(blob);
  }

  // ---------- Orchestrate ----------
  async function draw() {
    const tileSize = Number(tileInp.value);
    const nWorkers = Number(workersInp.value);
    const [W,H] = [cv.width, cv.height];

    const t0 = performance.now();
    const shapes = buildShapes();
    const { tilesX, tilesY, tileCount, counts, offsets, tileShapeIndex } = binShapesIntoTiles(shapes, tileSize);
    const t1 = performance.now();

    // Distribute tiles round-robin to workers
    const tileIds = new Uint32Array(tileCount); for (let i=0;i<tileCount;i++) tileIds[i]=i;
    const buckets = Array.from({length:nWorkers}, ()=>[]);
    for (let i=0;i<tileCount;i++) buckets[i % nWorkers].push(tileIds[i]);

    const workerURL = makeWorkerURL();
    const workers = Array.from({length:nWorkers}, () => new Worker(workerURL));
    const jobs = buckets.map((tileIndices, wi) => new Promise(resolve => {
      const w = workers[wi];
      w.onmessage = (e) => { if (e.data && e.data.kind === 'tilesDone') resolve(e.data.results); };
      w.postMessage({
        cmd:'renderTiles',
        tileSize, canvasW:W, canvasH:H,
        tilesX, tileIndices,
        offsets, counts, tileShapeIndex,
        shapes
      });
    }));

    const resultSets = await Promise.all(jobs);
    for (const w of workers) w.terminate();
    URL.revokeObjectURL(workerURL);

    // Present tiles
    const t2 = performance.now();
    ctx.clearRect(0,0,W,H);
    for (const results of resultSets) {
      for (const r of results) {
        const img = new ImageData(r.pixels, r.tileW, r.tileH);
        ctx.putImageData(img, r.tileX, r.tileY);
      }
    }
    const t3 = performance.now();

    tBuild.textContent = (t1 - t0).toFixed(2) + ' ms';
    tRaster.textContent = (t3 - t1).toFixed(2) + ' ms';
    tTiles.textContent = `${tilesX}×${Math.ceil(H/tileSize)} = ${tileCount}`;
  }

  // First render
  draw();
})();
</script>
</body>
</html>

              
            

!