BizGenEval Leaderboard

BizGenEval is a benchmark for commercial visual content generation quality. This leaderboard reports score breakdowns by:

Capability dimensions: Layout, Attribute, Text, Knowledge
Content domains: Slides, Webpage, Poster, Chart, Scientific Figure

All leaderboard scores are displayed as hard(easy) on a 0-100 scale.

{

"headers": [
- "Model",
- "Average ⬆️",
- "Layout",
- "Attribute",
- "Text",
- "Knowledge",
- "Slides",
- "Webpage",
- "Poster",
- "Chart",
- "Scientific Figure"
],
"data": [
- [
  - "Nano-Banana-Pro",
  - "76.7(93.7)",
  - "72.2(91.2)",
  - "65.6(92.2)",
  - "86.4(95.0)",
  - "82.6(96.2)",
  - "82.2(94.8)",
  - "77.5(96.5)",
  - "76.5(94.8)",
  - "73.0(92.2)",
  - "74.2(90.0)"
  ],
- [
  - "Nano-Banana-2.0",
  - "68.5(92.5)",
  - "68.4(91.0)",
  - "57.4(91.6)",
  - "83.4(94.6)",
  - "64.6(93.0)",
  - "73.8(95.8)",
  - "71.2(94.5)",
  - "67.5(91.2)",
  - "60.2(89.2)",
  - "69.5(92.0)"
  ],
- [
  - "Seedream-5.0",
  - "48.8(79.2)",
  - "67.6(89.0)",
  - "42.4(77.2)",
  - "43.4(75.6)",
  - "41.8(75.2)",
  - "54.5(80.8)",
  - "47.0(80.8)",
  - "50.7(77.0)",
  - "46.0(76.2)",
  - "45.8(81.5)"
  ],
- [
  - "GPT-Image-1.5",
  - "35.9(81.6)",
  - "51.6(84.8)",
  - "25.8(75.2)",
  - "40.4(82.8)",
  - "26.0(83.6)",
  - "40.8(89.2)",
  - "41.0(86.0)",
  - "42.0(83.5)",
  - "28.2(76.5)",
  - "27.8(72.8)"
  ],
- [
  - "Seedream-4.5",
  - "30.1(66.2)",
  - "35.4(71.6)",
  - "22.4(62.8)",
  - "41.4(72.4)",
  - "21.4(58.2)",
  - "33.8(71.0)",
  - "36.2(75.5)",
  - "35.5(72.2)",
  - "18.0(47.8)",
  - "27.3(64.8)"
  ],
- [
  - "Wan2.6-T2I",
  - "21.9(58.7)",
  - "46.4(80.6)",
  - "16.6(60.6)",
  - "12.6(52.6)",
  - "12.2(41.0)",
  - "27.1(56.5)",
  - "25.5(67.0)",
  - "27.5(62.5)",
  - "17.2(48.8)",
  - "12.5(58.8)"
  ],
- [
  - "Seedream-4.0",
  - "14.3(60.1)",
  - "27.6(73.4)",
  - "11.4(59.2)",
  - "11.4(52.8)",
  - "6.8(54.8)",
  - "18.5(67.8)",
  - "19.2(71.8)",
  - "18.8(65.5)",
  - "7.8(46.0)",
  - "7.2(49.2)"
  ],
- [
  - "Emu3.5",
  - "13.2(40.2)",
  - "30.4(63.4)",
  - "14.2(52.6)",
  - "7.0(33.6)",
  - "1.2(11.0)",
  - "14.5(44.8)",
  - "20.0(48.8)",
  - "20.3(53.0)",
  - "4.8(20.3)",
  - "6.5(34.0)"
  ],
- [
  - "HunyuanImage-3.0",
  - "13.0(40.1)",
  - "27.8(65.0)",
  - "13.8(53.6)",
  - "10.2(39.6)",
  - "0.0(2.0)",
  - "19.3(47.0)",
  - "21.0(52.0)",
  - "19.8(53.5)",
  - "2.0(18.8)",
  - "2.8(29.0)"
  ],
- [
  - "GPT-Image-1.0",
  - "11.2(52.4)",
  - "21.4(60.2)",
  - "6.8(48.6)",
  - "8.6(41.0)",
  - "7.8(60.0)",
  - "18.2(64.8)",
  - "12.0(63.0)",
  - "17.2(64.8)",
  - "3.2(31.2)",
  - "5.0(38.5)"
  ],
- [
  - "HunyuanImage-2.1",
  - "8.6(27.7)",
  - "29.0(68.4)",
  - "5.2(39.8)",
  - "0.0(1.4)",
  - "0.0(1.0)",
  - "11.0(36.2)",
  - "16.5(40.0)",
  - "11.8(39.0)",
  - "1.2(8.0)",
  - "2.2(15.0)"
  ],
- [
  - "Z-Image",
  - "8.2(43.8)",
  - "26.8(69.2)",
  - "2.6(47.6)",
  - "2.8(45.0)",
  - "0.6(13.2)",
  - "12.2(43.8)",
  - "6.2(48.5)",
  - "12.0(50.0)",
  - "8.8(30.5)",
  - "1.8(46.0)"
  ],
- [
  - "Qwen-Image-2512",
  - "6.3(41.0)",
  - "22.2(70.6)",
  - "1.2(47.8)",
  - "1.8(39.2)",
  - "0.0(6.4)",
  - "10.2(45.0)",
  - "7.2(48.0)",
  - "11.5(47.0)",
  - "2.2(28.0)",
  - "0.2(37.0)"
  ],
- [
  - "FLUX.2-dev",
  - "4.9(42.0)",
  - "17.2(67.8)",
  - "1.2(49.2)",
  - "1.0(43.0)",
  - "0.0(8.2)",
  - "5.5(43.2)",
  - "5.5(48.8)",
  - "7.2(48.2)",
  - "5.5(33.5)",
  - "0.5(36.5)"
  ],
- [
  - "Z-Image-Turbo",
  - "3.4(32.4)",
  - "11.0(60.6)",
  - "1.2(35.0)",
  - "1.2(29.8)",
  - "0.2(4.0)",
  - "7.0(36.5)",
  - "4.5(45.8)",
  - "4.8(40.8)",
  - "0.8(15.5)",
  - "0.0(23.2)"
  ],
- [
  - "Qwen-Image",
  - "2.8(23.8)",
  - "10.4(51.2)",
  - "0.2(22.2)",
  - "0.6(17.6)",
  - "0.0(4.4)",
  - "3.5(28.5)",
  - "2.5(27.5)",
  - "5.8(32.2)",
  - "2.2(15.8)",
  - "0.0(15.2)"
  ],
- [
  - "FLUX.2-Pro",
  - "1.6(21.1)",
  - "6.1(36.2)",
  - "0.0(22.9)",
  - "0.0(13.7)",
  - "0.2(11.7)",
  - "1.8(23.0)",
  - "2.0(27.0)",
  - "2.8(26.2)",
  - "1.3(14.7)",
  - "0.0(14.3)"
  ],
- [
  - "GLM-Image",
  - "1.4(15.3)",
  - "5.4(43.2)",
  - "0.0(13.4)",
  - "0.2(4.4)",
  - "0.0(0.4)",
  - "1.5(22.8)",
  - "1.8(27.3)",
  - "3.8(24.8)",
  - "0.0(0.5)",
  - "0.0(1.5)"
  ],
- [
  - "Imagen-4",
  - "1.1(10.7)",
  - "4.2(26.8)",
  - "0.0(8.7)",
  - "0.2(4.0)",
  - "0.0(3.4)",
  - "1.5(15.0)",
  - "0.5(14.8)",
  - "1.8(12.8)",
  - "1.5(6.8)",
  - "0.2(4.3)"
  ],
- [
  - "LongCat-Image",
  - "0.7(13.0)",
  - "2.4(35.8)",
  - "0.2(11.6)",
  - "0.0(4.4)",
  - "0.0(0.0)",
  - "0.8(15.0)",
  - "1.8(22.3)",
  - "0.8(21.3)",
  - "0.0(2.5)",
  - "0.0(3.8)"
  ],
- [
  - "X-Omni-EN",
  - "0.5(9.4)",
  - "2.0(22.8)",
  - "0.0(5.6)",
  - "0.0(8.0)",
  - "0.0(1.4)",
  - "0.8(9.0)",
  - "0.2(14.2)",
  - "1.5(15.8)",
  - "0.0(6.5)",
  - "0.0(1.8)"
  ],
- [
  - "SD3.5-Large",
  - "0.5(2.1)",
  - "2.2(6.6)",
  - "0.0(0.4)",
  - "0.0(0.0)",
  - "0.0(1.2)",
  - "0.0(0.5)",
  - "0.2(2.2)",
  - "0.0(2.8)",
  - "1.2(3.2)",
  - "1.2(1.5)"
  ],
- [
  - "Bagel",
  - "0.3(3.7)",
  - "0.6(12.8)",
  - "0.0(1.6)",
  - "0.0(0.0)",
  - "0.0(0.2)",
  - "0.3(4.8)",
  - "0.0(5.0)",
  - "0.5(8.5)",
  - "0.0(0.0)",
  - "0.0(0.0)"
  ],
- [
  - "FLUX.1-Krea-dev",
  - "0.1(5.1)",
  - "0.2(17.8)",
  - "0.0(2.8)",
  - "0.0(0.0)",
  - "0.0(0.0)",
  - "0.0(4.8)",
  - "0.0(8.5)",
  - "0.2(11.8)",
  - "0.0(0.8)",
  - "0.0(0.0)"
  ],
- [
  - "FLUX.1-dev",
  - "0.1(5.0)",
  - "0.4(15.8)",
  - "0.0(2.8)",
  - "0.0(0.0)",
  - "0.0(1.4)",
  - "0.0(6.0)",
  - "0.0(11.0)",
  - "0.5(7.8)",
  - "0.0(0.2)",
  - "0.0(0.0)"
  ],
- [
  - "FLUX.1-schnell",
  - "0.0(5.1)",
  - "0.0(16.8)",
  - "0.0(2.6)",
  - "0.0(0.0)",
  - "0.0(1.2)",
  - "0.0(8.5)",
  - "0.0(8.2)",
  - "0.0(8.2)",
  - "0.0(0.8)",
  - "0.0(0.0)"
  ]
],
"metadata": null

}

BizGenEval Leaderboard

How it works

Score Protocol

Reproducibility

✉️ Submit Model Metadata