|
| 1 | +// |
| 2 | +// Created by Hercier on 2023/12/25. |
| 3 | +// |
| 4 | + |
| 5 | +#include <luisa-compute.h> |
| 6 | +#include <util/sampling.h> |
| 7 | +#include <base/pipeline.h> |
| 8 | +#include <base/integrator.h> |
| 9 | +#include <util/progress_bar.h> |
| 10 | + |
| 11 | +using namespace luisa; |
| 12 | +using namespace luisa::compute; |
| 13 | + |
| 14 | +namespace luisa::render { |
| 15 | + |
| 16 | +using namespace compute; |
| 17 | + |
| 18 | +class CoroutinePathTracing final : public ProgressiveIntegrator { |
| 19 | + |
| 20 | +public: |
| 21 | + enum struct Scheduler { |
| 22 | + Simple, |
| 23 | + Wavefront, |
| 24 | + Persistent, |
| 25 | + }; |
| 26 | + |
| 27 | +#define PT_CORO_SIGNATURE float, float, uint |
| 28 | + using SchedulerBase = coroutine::CoroScheduler<PT_CORO_SIGNATURE>; |
| 29 | + using SimpleScheduler = coroutine::StateMachineCoroScheduler<PT_CORO_SIGNATURE>; |
| 30 | + using WavefrontScheduler = coroutine::WavefrontCoroScheduler<PT_CORO_SIGNATURE>; |
| 31 | + using PersistentScheduler = coroutine::PersistentThreadsCoroScheduler<PT_CORO_SIGNATURE>; |
| 32 | +#undef PT_CORO_SIGNATURE |
| 33 | + |
| 34 | +private: |
| 35 | + uint _max_depth; |
| 36 | + uint _rr_depth; |
| 37 | + float _rr_threshold; |
| 38 | + luisa::string _scheduler_type; |
| 39 | + uint _samples_per_pass; |
| 40 | + Scheduler _scheduler; |
| 41 | + WavefrontScheduler::Config _wavefront_config; |
| 42 | + PersistentScheduler::Config _persistent_config; |
| 43 | + |
| 44 | +public: |
| 45 | + CoroutinePathTracing(Scene *scene, const SceneNodeDesc *desc) noexcept |
| 46 | + : ProgressiveIntegrator{scene, desc}, |
| 47 | + _max_depth{std::max(desc->property_uint_or_default("depth", 10u), 1u)}, |
| 48 | + _rr_depth{std::max(desc->property_uint_or_default("rr_depth", 0u), 0u)}, |
| 49 | + _rr_threshold{std::max(desc->property_float_or_default("rr_threshold", 0.95f), 0.05f)}, |
| 50 | + _scheduler_type{desc->property_string_or_default("scheduler_type", "wavefront")}, |
| 51 | + _samples_per_pass{std::max(desc->property_uint_or_default("samples_per_pass", 16u), 1u)}, |
| 52 | + _scheduler{[&] { |
| 53 | + auto s = desc->property_string_or_default( |
| 54 | + "scheduler", luisa::lazy_construct([&] { |
| 55 | + return desc->property_string_or_default("scheduler_type", "wavefront"); |
| 56 | + })); |
| 57 | + for (auto &c : s) { c = static_cast<char>(std::tolower(c)); } |
| 58 | + if (s == "wavefront") { return Scheduler::Wavefront; } |
| 59 | + if (s == "persistent") { return Scheduler::Persistent; } |
| 60 | + if (s == "simple") { return Scheduler::Simple; } |
| 61 | + LUISA_ERROR_WITH_LOCATION( |
| 62 | + "Unknown scheduler type '{}'. " |
| 63 | + "Supported types are: wavefront, persistent, simple.", |
| 64 | + s); |
| 65 | + }()} { |
| 66 | + switch (_scheduler) { |
| 67 | + case Scheduler::Simple: break; |
| 68 | + case Scheduler::Wavefront: { |
| 69 | + if (desc->has_property("soa")) { _wavefront_config.global_memory_soa = desc->property_bool("soa"); } |
| 70 | + if (desc->has_property("sort")) { _wavefront_config.gather_by_sorting = desc->property_bool("sort"); } |
| 71 | + if (desc->has_property("compact")) { _wavefront_config.frame_buffer_compaction = desc->property_bool("compact"); } |
| 72 | + if (desc->has_property("instances")) { _wavefront_config.thread_count = std::max<uint>(desc->property_uint("instances"), 1_k); } |
| 73 | + if (desc->has_property("max_instance_count")) { _wavefront_config.thread_count = std::max<uint>(desc->property_uint("max_instance_count"), 1_k); } |
| 74 | + if (desc->has_property("sort_hints")) { _wavefront_config.hint_fields = desc->property_string_list_or_default("sort_hints"); } |
| 75 | + break; |
| 76 | + } |
| 77 | + case Scheduler::Persistent: { |
| 78 | + _persistent_config.shared_memory_soa = true; |
| 79 | + if (desc->has_property("max_thread_count")) { _persistent_config.thread_count = std::max<uint>(desc->property_uint("max_thread_count"), 5_k); } |
| 80 | + if (desc->has_property("threads")) { _persistent_config.thread_count = std::max<uint>(desc->property_uint("threads"), 5_k); } |
| 81 | + if (desc->has_property("block_size")) { _persistent_config.block_size = std::max<uint>(desc->property_uint("block_size"), 32u); } |
| 82 | + if (desc->has_property("fetch_size")) { _persistent_config.fetch_size = std::max<uint>(desc->property_uint("fetch_size"), 1u); } |
| 83 | + if (desc->has_property("global")) { _persistent_config.global_memory_ext = desc->property_bool("global"); } |
| 84 | + break; |
| 85 | + } |
| 86 | + } |
| 87 | + } |
| 88 | + [[nodiscard]] auto max_depth() const noexcept { return _max_depth; } |
| 89 | + [[nodiscard]] auto rr_depth() const noexcept { return _rr_depth; } |
| 90 | + [[nodiscard]] auto rr_threshold() const noexcept { return _rr_threshold; } |
| 91 | + [[nodiscard]] auto scheduler_type() const noexcept { return _scheduler_type; } |
| 92 | + [[nodiscard]] auto samples_per_pass() const noexcept { return _samples_per_pass; } |
| 93 | + [[nodiscard]] luisa::string_view impl_type() const noexcept override { return LUISA_RENDER_PLUGIN_NAME; } |
| 94 | + [[nodiscard]] luisa::unique_ptr<Integrator::Instance> build( |
| 95 | + Pipeline &pipeline, CommandBuffer &command_buffer) const noexcept override; |
| 96 | + |
| 97 | + // scheduler config |
| 98 | + [[nodiscard]] auto scheduler() const noexcept { return _scheduler; } |
| 99 | + [[nodiscard]] auto &wavefront_config() const noexcept { return _wavefront_config; } |
| 100 | + [[nodiscard]] auto &persistent_config() const noexcept { return _persistent_config; } |
| 101 | +}; |
| 102 | + |
| 103 | +class CoroutinePathTracingInstance final : public ProgressiveIntegrator::Instance { |
| 104 | + |
| 105 | +public: |
| 106 | + using ProgressiveIntegrator::Instance::Instance; |
| 107 | + |
| 108 | +protected: |
| 109 | + void _render_one_camera(CommandBuffer &command_buffer, Camera::Instance *camera) noexcept override { |
| 110 | + if (!pipeline().has_lighting()) [[unlikely]] { |
| 111 | + LUISA_WARNING_WITH_LOCATION( |
| 112 | + "No lights in scene. Rendering aborted."); |
| 113 | + return; |
| 114 | + } |
| 115 | + auto spp = camera->node()->spp(); |
| 116 | + auto resolution = camera->film()->node()->resolution(); |
| 117 | + auto image_file = camera->node()->file(); |
| 118 | + |
| 119 | + auto pixel_count = resolution.x * resolution.y; |
| 120 | + sampler()->reset(command_buffer, resolution, pixel_count, spp); |
| 121 | + command_buffer << synchronize(); |
| 122 | + |
| 123 | + LUISA_INFO( |
| 124 | + "Rendering to '{}' of resolution {}x{} at {}spp.", |
| 125 | + image_file.string(), |
| 126 | + resolution.x, resolution.y, spp); |
| 127 | + |
| 128 | + using namespace luisa::compute; |
| 129 | + |
| 130 | + coroutine::Coroutine render = [&](Float time, Float shutter_weight, UInt spp_offset) { |
| 131 | + auto frame_index = spp_offset + dispatch_z(); |
| 132 | + auto pixel_id = dispatch_id().xy(); |
| 133 | + auto L = Li(camera, frame_index, pixel_id, time); |
| 134 | + camera->film()->accumulate(pixel_id, shutter_weight * L); |
| 135 | + }; |
| 136 | + Clock clock_compile; |
| 137 | + auto coro_pt = node<CoroutinePathTracing>(); |
| 138 | + auto scheduler = [&]() noexcept -> luisa::unique_ptr<CoroutinePathTracing::SchedulerBase> { |
| 139 | + auto &device = pipeline().device(); |
| 140 | + auto &stream = *command_buffer.stream(); |
| 141 | + switch (coro_pt->scheduler()) { |
| 142 | + case CoroutinePathTracing::Scheduler::Simple: { |
| 143 | + return luisa::make_unique<CoroutinePathTracing::SimpleScheduler>(device, render); |
| 144 | + } |
| 145 | + case CoroutinePathTracing::Scheduler::Wavefront: { |
| 146 | + auto config = coro_pt->wavefront_config(); |
| 147 | + // BUG: metal has trouble with the sorting kernel... |
| 148 | + if (device.backend_name() == "metal") { |
| 149 | + config.gather_by_sorting = false; |
| 150 | + config.hint_fields = {}; |
| 151 | + } |
| 152 | + if (!config.hint_fields.empty()) { |
| 153 | + config.hint_range = pipeline().surfaces().size(); |
| 154 | + } |
| 155 | + LUISA_INFO("config: soa:{}, sort:{}, compact:{}, max_instance_count:{}, hint_range:{}, hint_fields[0]:{}", |
| 156 | + config.global_memory_soa, |
| 157 | + config.gather_by_sorting, |
| 158 | + config.frame_buffer_compaction, |
| 159 | + config.thread_count, |
| 160 | + config.hint_range, |
| 161 | + !config.hint_fields.empty() ? config.hint_fields[0] : "NULL"); |
| 162 | + return luisa::make_unique<CoroutinePathTracing::WavefrontScheduler>(device, render, config); |
| 163 | + } |
| 164 | + case CoroutinePathTracing::Scheduler::Persistent: { |
| 165 | + auto config = coro_pt->persistent_config(); |
| 166 | + LUISA_INFO("config: max_thread_count:{}, block_size:{}, fetch_size:{}, global:{}", |
| 167 | + config.thread_count, |
| 168 | + config.block_size, |
| 169 | + config.fetch_size, |
| 170 | + config.global_memory_ext); |
| 171 | + return luisa::make_unique<CoroutinePathTracing::PersistentScheduler>(device, render, config); |
| 172 | + } |
| 173 | + default: |
| 174 | + break; |
| 175 | + } |
| 176 | + LUISA_ERROR_WITH_LOCATION( |
| 177 | + "Unknown scheduler type '{}'. " |
| 178 | + "Supported types are: wavefront, persistent, simple.", |
| 179 | + luisa::to_string(coro_pt->scheduler())); |
| 180 | + }(); |
| 181 | + auto integrator_shader_compilation_time = clock_compile.toc(); |
| 182 | + LUISA_INFO("Integrator shader compile in {} ms with {} coroutine scheduler.", |
| 183 | + integrator_shader_compilation_time, |
| 184 | + luisa::to_string(coro_pt->scheduler())); |
| 185 | + auto shutter_samples = camera->node()->shutter_samples(); |
| 186 | + command_buffer << synchronize(); |
| 187 | + |
| 188 | + LUISA_INFO("Rendering started."); |
| 189 | + Clock clock; |
| 190 | + ProgressBar progress; |
| 191 | + progress.update(0.); |
| 192 | + auto sample_id = 0u; |
| 193 | + for (auto s : shutter_samples) { |
| 194 | + pipeline().update(command_buffer, s.point.time); |
| 195 | + auto aligned_spp = luisa::align(s.spp, coro_pt->samples_per_pass()); |
| 196 | + for (auto i = 0u; i < aligned_spp; i += coro_pt->samples_per_pass()) { |
| 197 | + auto ns = std::min<uint>(coro_pt->samples_per_pass(), s.spp - i); |
| 198 | + command_buffer << (*scheduler)(s.point.time, s.point.weight, sample_id) |
| 199 | + .dispatch(resolution.x, resolution.y, ns); |
| 200 | + sample_id += spp; |
| 201 | + camera->film()->show(command_buffer); |
| 202 | + auto p = sample_id / static_cast<double>(spp); |
| 203 | + command_buffer << [&progress, p] { progress.update(p); }; |
| 204 | + } |
| 205 | + } |
| 206 | + command_buffer << synchronize(); |
| 207 | + progress.done(); |
| 208 | + |
| 209 | + auto render_time = clock.toc(); |
| 210 | + LUISA_INFO("Rendering finished in {} ms.", render_time); |
| 211 | + } |
| 212 | + |
| 213 | + [[nodiscard]] Float3 Li(const Camera::Instance *camera, Expr<uint> frame_index, |
| 214 | + Expr<uint2> pixel_id, Expr<float> time) const noexcept override { |
| 215 | + sampler()->start(pixel_id, frame_index); |
| 216 | + auto u_filter = sampler()->generate_pixel_2d(); |
| 217 | + auto u_lens = camera->node()->requires_lens_sampling() ? sampler()->generate_2d() : make_float2(.5f); |
| 218 | + auto [camera_ray, _, camera_weight] = camera->generate_ray(pixel_id, time, u_filter, u_lens); |
| 219 | + auto spectrum = pipeline().spectrum(); |
| 220 | + auto u_swl = spectrum->node()->is_fixed() ? 0.f : sampler()->generate_1d(); |
| 221 | + auto sample_wl = [&] { return spectrum->sample(spectrum->node()->is_fixed() ? 0.f : u_swl); }; |
| 222 | + auto swl = sample_wl(); |
| 223 | + SampledSpectrum beta{swl.dimension(), camera_weight}; |
| 224 | + SampledSpectrum Li{swl.dimension()}; |
| 225 | + |
| 226 | + auto ray = camera_ray; |
| 227 | + auto pdf_bsdf = def(1e16f); |
| 228 | + $for (depth, node<CoroutinePathTracing>()->max_depth()) { |
| 229 | + $suspend("intersect"); |
| 230 | + // trace |
| 231 | + auto hit = pipeline().geometry()->trace_closest(ray); |
| 232 | + |
| 233 | + // miss |
| 234 | + $if (hit->miss()) { |
| 235 | + if (pipeline().environment()) { |
| 236 | + $suspend("miss"); |
| 237 | + auto swl = sample_wl(); |
| 238 | + auto eval = light_sampler()->evaluate_miss(ray->direction(), swl, time); |
| 239 | + Li += beta * eval.L * balance_heuristic(pdf_bsdf, eval.pdf); |
| 240 | + } |
| 241 | + $break; |
| 242 | + }; |
| 243 | + auto shape = pipeline().geometry()->instance(hit.inst); |
| 244 | + // hit light |
| 245 | + if (!pipeline().lights().empty()) { |
| 246 | + $if (shape.has_light()) { |
| 247 | + auto it = pipeline().geometry()->interaction(ray, hit); |
| 248 | + auto swl = sample_wl(); |
| 249 | + auto eval = light_sampler()->evaluate_hit(*it, ray->origin(), swl, time); |
| 250 | + Li += beta * eval.L * balance_heuristic(pdf_bsdf, eval.pdf); |
| 251 | + }; |
| 252 | + } |
| 253 | + |
| 254 | + $if (!shape.has_surface()) { $break; }; |
| 255 | + |
| 256 | + $suspend("sample_ray"); |
| 257 | + // generate uniform samples |
| 258 | + //$if((pixel_id.x==124) & (pixel_id.y==700)){ |
| 259 | + // device_log("frame_id:{}, ray:o: {}, d:{}, hit:{}, Li:{}, beta:{}", frame_index, ray->origin(), ray->direction(), hit.bary,spectrum->srgb(swl, Li),spectrum->srgb(swl, beta)); |
| 260 | + //}; |
| 261 | + auto it = pipeline().geometry()->interaction(ray, hit); |
| 262 | + auto u_light_selection = sampler()->generate_1d(); |
| 263 | + auto u_light_surface = sampler()->generate_2d(); |
| 264 | + |
| 265 | + // sample one light |
| 266 | + auto swl = sample_wl(); |
| 267 | + auto light_sample = light_sampler()->sample( |
| 268 | + *it, u_light_selection, u_light_surface, swl, time); |
| 269 | + |
| 270 | + // trace shadow ray |
| 271 | + auto occluded = pipeline().geometry()->intersect_any(light_sample.shadow_ray); |
| 272 | + |
| 273 | + // evaluate material |
| 274 | + auto surface_tag = it->shape().surface_tag(); |
| 275 | + if (node<CoroutinePathTracing>()->scheduler_type() == "wavefront") { |
| 276 | + $suspend("evaluate_surface", std::pair(surface_tag, "coro_hint")); |
| 277 | + } else { |
| 278 | + $suspend("evaluate_surface"); |
| 279 | + } |
| 280 | + swl = sample_wl(); |
| 281 | + auto wo = -ray->direction(); |
| 282 | + auto u_lobe = sampler()->generate_1d(); |
| 283 | + auto u_bsdf = sampler()->generate_2d(); |
| 284 | + auto u_rr = def(0.f); |
| 285 | + auto rr_depth = node<CoroutinePathTracing>()->rr_depth(); |
| 286 | + $if (depth + 1u >= rr_depth) { u_rr = sampler()->generate_1d(); }; |
| 287 | + it = pipeline().geometry()->interaction(ray, hit); |
| 288 | + surface_tag = it->shape().surface_tag(); |
| 289 | + auto eta_scale = def(1.f); |
| 290 | + |
| 291 | + PolymorphicCall<Surface::Closure> call; |
| 292 | + pipeline().surfaces().dispatch(surface_tag, [&](auto surface) noexcept { |
| 293 | + surface->closure(call, *it, swl, wo, 1.f, time); |
| 294 | + }); |
| 295 | + call.execute([&](const Surface::Closure *closure) noexcept { |
| 296 | + if (auto dispersive = closure->is_dispersive()) { |
| 297 | + $if (*dispersive) { swl.terminate_secondary(); }; |
| 298 | + } |
| 299 | + // direct lighting |
| 300 | + $if (light_sample.eval.pdf > 0.0f & !occluded) { |
| 301 | + auto wi = light_sample.shadow_ray->direction(); |
| 302 | + auto eval = closure->evaluate(wo, wi); |
| 303 | + auto w = balance_heuristic(light_sample.eval.pdf, eval.pdf) / |
| 304 | + light_sample.eval.pdf; |
| 305 | + Li += w * beta * eval.f * light_sample.eval.L; |
| 306 | + }; |
| 307 | + // sample material |
| 308 | + auto surface_sample = closure->sample(wo, u_lobe, u_bsdf); |
| 309 | + ray = it->spawn_ray(surface_sample.wi); |
| 310 | + pdf_bsdf = surface_sample.eval.pdf; |
| 311 | + auto w = ite(surface_sample.eval.pdf > 0.f, 1.f / surface_sample.eval.pdf, 0.f); |
| 312 | + beta *= w * surface_sample.eval.f; |
| 313 | + // apply eta scale |
| 314 | + auto eta = closure->eta().value_or(1.f); |
| 315 | + $switch (surface_sample.event) { |
| 316 | + $case (Surface::event_enter) { eta_scale = sqr(eta); }; |
| 317 | + $case (Surface::event_exit) { eta_scale = sqr(1.f / eta); }; |
| 318 | + }; |
| 319 | + }); |
| 320 | + beta = zero_if_any_nan(beta); |
| 321 | + $if (beta.all([](auto b) noexcept { return b <= 0.f; })) { $break; }; |
| 322 | + auto rr_threshold = node<CoroutinePathTracing>()->rr_threshold(); |
| 323 | + auto q = max(beta.max() * eta_scale, .05f); |
| 324 | + $if (depth + 1u >= rr_depth) { |
| 325 | + $if (q < rr_threshold & u_rr >= q) { $break; }; |
| 326 | + beta *= ite(q < rr_threshold, 1.0f / q, 1.f); |
| 327 | + }; |
| 328 | + }; |
| 329 | + return spectrum->srgb(swl, Li); |
| 330 | + } |
| 331 | +}; |
| 332 | + |
| 333 | +luisa::unique_ptr<Integrator::Instance> CoroutinePathTracing::build( |
| 334 | + Pipeline &pipeline, CommandBuffer &command_buffer) const noexcept { |
| 335 | + return luisa::make_unique<CoroutinePathTracingInstance>( |
| 336 | + pipeline, command_buffer, this); |
| 337 | +} |
| 338 | + |
| 339 | +}// namespace luisa::render |
| 340 | + |
| 341 | +LUISA_RENDER_MAKE_SCENE_NODE_PLUGIN(luisa::render::CoroutinePathTracing) |
0 commit comments