tests(GROUPS correctness_multi_gpu
      SOURCES
      gpu_multi_device.cpp
      )

tests(GROUPS correctness
      SOURCES
      align_bounds.cpp
      argmax.cpp
      async_device_copy.cpp
      async_order.cpp
      autodiff.cpp
      bad_likely.cpp
      bad_partition_always_throws.cpp
      bit_counting.cpp
      bits_known.cpp
      bitwise_ops.cpp
      bool_compute_root_vectorize.cpp
      bool_predicate_cast.cpp
      bound.cpp
      bound_small_allocations.cpp
      bound_storage.cpp
      boundary_conditions.cpp
      bounds.cpp
      bounds_inference.cpp
      bounds_inference_chunk.cpp
      bounds_inference_complex.cpp
      bounds_inference_outer_split.cpp
      bounds_of_abs.cpp
      bounds_of_cast.cpp
      bounds_of_func.cpp
      bounds_of_monotonic_math.cpp
      bounds_of_multiply.cpp
      bounds_of_pure_intrinsics.cpp
      bounds_of_split.cpp
      bounds_query.cpp
      bounds_query_respects_specialize_fail.cpp
      buffer_t.cpp
      c_function.cpp
      callable.cpp
      callable_errors.cpp
      callable_generator.cpp
      callable_typed.cpp
      cascaded_filters.cpp
      cast.cpp
      cast_handle.cpp
      chunk.cpp
      chunk_sharing.cpp
      circular_reference_leak.cpp
      code_explosion.cpp
      compare_vars.cpp
      compile_to.cpp
      compile_to_bitcode.cpp
      compile_to_lowered_stmt.cpp
      compile_to_multitarget.cpp
      compute_at_reordered_update_stage.cpp
      compute_at_split_rvar.cpp
      compute_inside_guard.cpp
      compute_with_in.cpp
      compute_with_inlined.cpp
      computed_index.cpp
      concat.cpp
      constant_expr.cpp
      constant_interval.cpp
      constant_type.cpp
      constraints.cpp
      convolution_multiple_kernels.cpp
      cross_compilation.cpp
      cse_name_collision.cpp
      cse_nan.cpp
      cuda_8_bit_dot_product.cpp
      custom_allocator.cpp
      custom_auto_scheduler.cpp
      custom_cuda_context.cpp
      custom_error_reporter.cpp
      custom_jit_context.cpp
      custom_lowering_pass.cpp
      dead_realization_in_specialization.cpp
      debug_to_file.cpp
      debug_to_file_multiple_outputs.cpp
      debug_to_file_reorder.cpp
      deferred_loop_level.cpp
      deinterleave4.cpp
      device_buffer_copies_with_profile.cpp
      device_buffer_copy.cpp
      device_copy_at_inner_loop.cpp
      device_crop.cpp
      device_slice.cpp
      dilate3x3.cpp
      div_by_zero.cpp
      div_round_to_zero.cpp
      ring_buffer.cpp
      dynamic_allocation_in_gpu_kernel.cpp
      dynamic_reduction_bounds.cpp
      early_out.cpp
      embed_bitcode.cpp
      erf.cpp
      error_macro_unreachable.cpp
      exception.cpp
      explicit_inline_reductions.cpp
      extern_bounds_inference.cpp
      extern_consumer.cpp
      extern_error.cpp
      extern_output_expansion.cpp
      extern_partial.cpp
      extern_producer.cpp
      extern_reorder_storage.cpp
      extern_sort.cpp
      extern_stage_on_device.cpp
      extract_concat_bits.cpp
      failed_unroll.cpp
      fast_trigonometric.cpp
      fibonacci.cpp
      fit_function.cpp
      float16_t.cpp
      float16_t_comparison.cpp
      float16_t_constants.cpp
      float16_t_image_type.cpp
      float16_t_neon_op_check.cpp
      for_each_element.cpp
      force_onto_stack.cpp
      func_lifetime.cpp
      func_lifetime_2.cpp
      fuse.cpp
      fuse_gpu_threads.cpp
      fused_where_inner_extent_is_zero.cpp
      fuzz_float_stores.cpp
      fuzz_schedule.cpp
      fuzz_simplify.cpp
      gameoflife.cpp
      gather.cpp
      gpu_allocation_cache.cpp
      gpu_alloc_group_profiling.cpp
      gpu_arg_types.cpp
      gpu_assertion_in_kernel.cpp
      gpu_bounds_inference_failure.cpp
      gpu_condition_lifting.cpp
      gpu_cpu_simultaneous_read.cpp
      gpu_data_flows.cpp
      gpu_different_blocks_threads_dimensions.cpp
      gpu_dynamic_shared.cpp
      gpu_f16_intrinsics.cpp
      gpu_free_sync.cpp
      gpu_give_input_buffers_device_allocations.cpp
      gpu_jit_explicit_copy_to_device.cpp
      gpu_large_alloc.cpp
      gpu_many_kernels.cpp
      gpu_metal_completion_handler_error_check.cpp
      gpu_mixed_dimensionality.cpp
      gpu_mixed_shared_mem_types.cpp
      gpu_multi_kernel.cpp
      gpu_non_contiguous_copy.cpp
      gpu_non_monotonic_shared_mem_size.cpp
      gpu_object_lifetime_1.cpp
      gpu_object_lifetime_2.cpp
      gpu_object_lifetime_3.cpp
      gpu_param_allocation.cpp
      gpu_reuse_shared_memory.cpp
      gpu_specialize.cpp
      gpu_store_in_register_with_no_lanes_loop.cpp
      gpu_sum_scan.cpp
      gpu_texture.cpp
      gpu_thread_barrier.cpp
      gpu_transpose.cpp
      gpu_vectorize.cpp
      gpu_vectorized_shared_memory.cpp
      growing_stack.cpp
      half_native_interleave.cpp
      halide_buffer.cpp
      handle.cpp
      heap_cleanup.cpp
      hello_gpu.cpp
      hexagon_scatter.cpp
      histogram.cpp
      histogram_equalize.cpp
      hoist_loop_invariant_if_statements.cpp
      hoist_storage.cpp
      host_alignment.cpp
      image_io.cpp
      image_of_lists.cpp
      implicit_args.cpp
      implicit_args_tests.cpp
      in_place.cpp
      indexing_access_undef.cpp
      infer_arguments.cpp
      inline_reduction.cpp
      inlined_generator.cpp
      input_image_bounds_check.cpp
      input_larger_than_two_gigs.cpp
      integer_powers.cpp
      interleave.cpp
      interleave_rgb.cpp
      interleave_x.cpp
      interval.cpp
      intrinsics.cpp
      invalid_gpu_loop_nests.cpp
      inverse.cpp
      irprinter.cpp
      isnan.cpp
      issue_3926.cpp
      iterate_over_circle.cpp
      lambda.cpp
      lazy_convolution.cpp
      leak_device_memory.cpp
      left_shift_negative.cpp
      lerp.cpp
      let_in_rdom_bound.cpp
      likely.cpp
      load_library.cpp
      logical.cpp
      loop_carry.cpp
      loop_invariant_extern_calls.cpp
      loop_level_generator_param.cpp
      lossless_cast.cpp
      lots_of_loop_invariants.cpp
      low_bit_depth_noise.cpp
      make_struct.cpp
      many_dimensions.cpp
      many_small_extern_stages.cpp
      many_updates.cpp
      math.cpp
      median3x3.cpp
      memoize_cloned.cpp
      min_extent.cpp
      mod.cpp
      mul_div_mod.cpp
      multi_output_pipeline_with_bad_sizes.cpp
      multi_splits_with_diff_tail_strategies.cpp
      multi_way_select.cpp
      multipass_constraints.cpp
      multiple_outputs.cpp
      mux.cpp
      narrow_predicates.cpp
      negative_split_factors.cpp
      nested_tail_strategies.cpp
      newtons_method.cpp
      non_nesting_extern_bounds_query.cpp
      non_vector_aligned_embeded_buffer.cpp
      obscure_image_references.cpp
      out_constraint.cpp
      out_of_memory.cpp
      output_larger_than_two_gigs.cpp
      parallel_gpu_nested.cpp
      param.cpp
      parameter_constraints.cpp
      partial_application.cpp
      partial_realization.cpp
      partition_loops.cpp
      partition_loops_bug.cpp
      partition_max_filter.cpp
      pipeline_set_jit_externs_func.cpp
      plain_c_includes.c
      popc_clz_ctz_bounds.cpp
      predicated_store_load.cpp
      prefetch.cpp
      print.cpp
      print_loop_nest.cpp
      process_some_tiles.cpp
      pseudostack_shares_slots.cpp
      python_extension_gen.cpp
      pytorch.cpp
      realize_condition_depends_on_tuple.cpp
      realize_larger_than_two_gigs.cpp
      realize_over_shifted_domain.cpp
      recursive_box_filters.cpp
      reduction_chain.cpp
      reduction_predicate_racing.cpp
      reduction_non_rectangular.cpp
      reduction_schedule.cpp
      register_shuffle.cpp
      reorder_storage.cpp
      require.cpp
      reschedule.cpp
      respect_input_constraint_in_bounds_inference.cpp
      reuse_stack_alloc.cpp
      round.cpp
      saturating_casts.cpp
      scatter.cpp
      set_custom_trace.cpp
      shadowed_bound.cpp
      shared_self_references.cpp
      shift_by_unsigned_negated.cpp
      shifted_image.cpp
      side_effects.cpp
      simd_op_check_arm.cpp
      simd_op_check_hvx.cpp
      simd_op_check_powerpc.cpp
      simd_op_check_riscv.cpp
      simd_op_check_sve2.cpp
      simd_op_check_wasm.cpp
      simd_op_check_x86.cpp
      simplified_away_embedded_image.cpp
      simplify.cpp
      skip_stages.cpp
      skip_stages_external_array_functions.cpp
      skip_stages_memoize.cpp
      sliding_backwards.cpp
      sliding_over_guard_with_if.cpp
      sliding_reduction.cpp
      sliding_window.cpp
      sort_exprs.cpp
      specialize.cpp
      specialize_to_gpu.cpp
      specialize_trim_condition.cpp
      split_by_non_factor.cpp
      split_fuse_rvar.cpp
      split_reuse_inner_name_bug.cpp
      split_store_compute.cpp
      stable_realization_order.cpp
      stack_allocations.cpp
      stage_strided_loads.cpp
      stencil_chain_in_update_definitions.cpp
      stmt_to_html.cpp
      storage_folding.cpp
      store_in.cpp
      strict_float.cpp
      strict_float_bounds.cpp
      strided_load.cpp
      target.cpp
      target_query.cpp
      tiled_matmul.cpp
      tracing.cpp
      tracing_bounds.cpp
      tracing_broadcast.cpp
      tracing_stack.cpp
      transitive_bounds.cpp
      trim_no_ops.cpp
      tuple_partial_update.cpp
      tuple_reduction.cpp
      tuple_select.cpp
      tuple_undef.cpp
      tuple_update_ops.cpp
      two_vector_args.cpp
      typed_func.cpp
      undef.cpp
      uninitialized_read.cpp
      unique_func_image.cpp
      unroll_dynamic_loop.cpp
      unroll_loop_with_implied_constant_bounds.cpp
      unrolled_reduction.cpp
      unsafe_dedup_lets.cpp
      unsafe_promises.cpp
      unused_func.cpp
      update_chunk.cpp
      vector_bounds_inference.cpp
      vector_cast.cpp
      vector_extern.cpp
      vector_math.cpp
      vector_print_bug.cpp
      vector_reductions.cpp
      vector_shuffle.cpp
      vector_tile.cpp
      vectorize_guard_with_if.cpp
      vectorize_mixed_widths.cpp
      vectorize_nested.cpp
      vectorize_varying_allocation_size.cpp
      vectorized_assert.cpp
      vectorized_gpu_allocation.cpp
      vectorized_guard_with_if_tail.cpp
      vectorized_initialization.cpp
      vectorized_load_from_vectorized_allocation.cpp
      vectorized_reduction_bug.cpp
      widening_lerp.cpp
      widening_reduction.cpp
      )

tests(GROUPS correctness multithreaded
      SOURCES
      assertion_failure_in_parallel_for.cpp
      async.cpp
      async_copy_chain.cpp
      atomic_tuples.cpp
      atomics.cpp
      compute_outermost.cpp
      compute_with.cpp
      convolution.cpp
      extern_consumer_tiled.cpp
      extern_stage.cpp
      func_clone.cpp
      func_wrapper.cpp
      image_wrapper.cpp
      interpreter.cpp
      legal_race_condition.cpp
      lots_of_dimensions.cpp
      memoize.cpp
      multi_pass_reduction.cpp
      multiple_outputs_extern.cpp
      multiple_scatter.cpp
      named_updates.cpp
      nested_shiftinwards.cpp
      oddly_sized_output.cpp
      parallel.cpp
      parallel_alloc.cpp
      parallel_fork.cpp
      parallel_nested.cpp
      parallel_nested_1.cpp
      parallel_reductions.cpp
      parallel_rvar.cpp
      parallel_scatter.cpp
      random.cpp
      reorder_rvars.cpp
      rfactor.cpp
      stream_compaction.cpp
      thread_safety.cpp
      truncated_pyramid.cpp
      tuple_vector_reduce.cpp
      unroll_huge_mux.cpp
      )

# Make sure the test that needs Halide::ImageIO has it
target_link_libraries(correctness_image_io PRIVATE Halide::ImageIO)

# Make sure the test that needs Halide::ThreadPool has it
target_link_libraries(correctness_gpu_allocation_cache PRIVATE Halide::ThreadPool)

# Tests which use external funcs need to enable exports.
set_target_properties(correctness_async
                      correctness_atomics
                      correctness_c_function
                      correctness_callable
                      correctness_callable_generator
                      correctness_callable_typed
                      correctness_compute_at_split_rvar
                      correctness_concat
                      correctness_custom_lowering_pass
                      correctness_extern_bounds_inference
                      correctness_extern_consumer
                      correctness_extern_consumer_tiled
                      correctness_extern_error
                      correctness_extern_output_expansion
                      correctness_extern_partial
                      correctness_extern_producer
                      correctness_extern_reorder_storage
                      correctness_extern_sort
                      correctness_extern_stage
                      correctness_extern_stage_on_device
                      correctness_handle
                      correctness_image_of_lists
                      correctness_lazy_convolution
                      correctness_loop_invariant_extern_calls
                      correctness_make_struct
                      correctness_many_small_extern_stages
                      correctness_memoize
                      correctness_memoize_cloned
                      correctness_multiple_outputs_extern
                      correctness_non_nesting_extern_bounds_query
                      correctness_parallel_fork
                      correctness_pipeline_set_jit_externs_func
                      correctness_process_some_tiles
                      correctness_side_effects
                      correctness_skip_stages
                      correctness_skip_stages_external_array_functions
                      correctness_sliding_backwards
                      correctness_sliding_over_guard_with_if
                      correctness_sliding_reduction
                      correctness_sliding_window
                      correctness_storage_folding
                      PROPERTIES ENABLE_EXPORTS TRUE)

# Tests which are internally parallelized should not be run at the
# same time as other tests, or they may time out

set_tests_properties(correctness_mul_div_mod
                     correctness_simd_op_check_arm
                     correctness_simd_op_check_hvx
                     correctness_simd_op_check_powerpc
                     correctness_simd_op_check_riscv
                     correctness_simd_op_check_sve2
                     correctness_simd_op_check_wasm
                     correctness_simd_op_check_x86
                     correctness_vector_cast
                     correctness_vector_math
                     correctness_vector_reductions
                     PROPERTIES RUN_SERIAL TRUE)
