12 #ifndef AMDKERNELCODET_H 13 #define AMDKERNELCODET_H 48 #define AMD_HSA_BITS_SET(dst, mask, val) \ 49 dst &= (~(1 << mask ## _SHIFT) & ~mask); \ 50 dst |= (((val) << mask ## _SHIFT) & mask) 53 #define AMD_HSA_BITS_GET(src, mask) \ 54 ((src & mask) >> mask ## _SHIFT) \ 314 uint8_t reserved[75];
647 uint8_t reserved3[12];
649 uint64_t control_directives[16];
652 #endif // AMDKERNELCODET_H hsa_ext_control_directive_present64_t enabled_control_directives
This is a bit set indicating which control directives have been specified.
uint32_t hsa_ext_code_kind32_t
Indicate if the generated ISA is using a dynamically sized call stack.
hsa_dim3_t required_workgroup_size
If requiredWorkgroupSize is not enabled then all elements for Dim3 must be 0, and the produced code c...
hsa_ext_exception_kind16_t enable_detect_exceptions
If enableDetectExceptions is not enabled then must be 0, otherwise must be non-0 and specifies the se...
uint32_t amd_kernel_code_version_major
uint32_t code_properties
Code properties.
AMD Kernel Code Object (amd_kernel_code_t).
uint16_t amd_machine_version_major
uint32_t max_dynamic_group_size
If maxDynamicGroupSize is not enabled then must be 0, and any amount of dynamic group segment can be ...
uint16_t reserved_sgpr_first
If reserved_sgpr_count is 0 then must be 0.
int64_t kernel_code_prefetch_byte_offset
Range of bytes to consider prefetching expressed as an offset and size.
uint8_t kernarg_segment_alignment
The maximum byte alignment of variables used by the kernel in the specified memory segment...
uint8_t group_segment_alignment
uint8_t hsa_ext_brig_machine_model8_t
Indicate if code generated has support for debugging.
uint16_t amd_machine_version_minor
uint64_t hsa_ext_control_directive_present64_t
uint32_t requested_workgroups_per_cu
If requestedWorkgroupsPerCu is not enabled then must be 0, and the finalizer is free to generate ISA ...
uint32_t amd_kernel_code_version_minor
uint64_t runtime_loader_kernel_symbol
uint64_t compute_pgm_resource_registers
Shader program settings for CS.
uint16_t wavefront_sgpr_count
Number of scalar registers used by a wavefront.
uint32_t amd_code_property32_t
Every amd_*_code_t has the following properties, which are composed of a number of bit fields...
uint32_t gds_segment_byte_size
Number of byte of GDS required by kernel dispatch.
uint8_t private_segment_alignment
struct hsa_ext_control_directives_s hsa_ext_control_directives_t
The hsa_ext_control_directives_t specifies the values for the HSAIL control directives.
struct amd_kernel_code_s amd_kernel_code_t
AMD Kernel Code Object (amd_kernel_code_t).
uint32_t amd_code_version32_t
The version of the amd_*_code_t struct.
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
struct hsa_dim3_s hsa_dim3_t
Are global memory addresses 64 bits.
uint64_t kernarg_segment_byte_size
The size in bytes of the kernarg segment that holds the values of the arguments to the kernel...
uint16_t hsa_ext_exception_kind16_t
uint64_t reserved0
Reserved. Must be 0.
uint16_t debug_wavefront_private_segment_offset_sgpr
If is_debug_supported is 0 then must be 0.
uint16_t amd_machine_version_stepping
uint16_t workitem_vgpr_count
Number of vector registers used by each work-item.
uint8_t wavefront_size
Wavefront size expressed as a power of two.
uint32_t workgroup_group_segment_byte_size
The amount of group segment memory required by a work-group in bytes.
The interleave (swizzle) element size in bytes required by the code for private memory.
hsa_ext_exception_kind16_t enable_break_exceptions
If enableBreakExceptions is not enabled then must be 0, otherwise must be non-0 and specifies the set...
Enable the setup of the SGPR user data registers (AMD_CODE_PROPERTY_ENABLE_SGPR_*), see documentation of amd_kernel_code_t for initial register state.
uint64_t amd_compute_pgm_resource_register64_t
Shader program settings for CS.
The hsa_ext_control_directives_t specifies the values for the HSAIL control directives.
uint8_t hsa_ext_brig_profile8_t
uint32_t workitem_private_segment_byte_size
The amount of memory required for the combined private, spill and arg segments for a work-item in byt...
hsa_dim3_t required_grid_size
If not enabled then all elements for Dim3 must be 0, otherwise every element must be greater than 0...
uint32_t max_flat_grid_size
If maxFlatGridSize is not enabled then must be 0, otherwise must be greater than 0.
uint64_t kernel_code_prefetch_byte_size
uint32_t max_flat_workgroup_size
If maxFlatWorkgroupSize is not enabled then must be 0, otherwise must be greater than 0...
Control wave ID base counter for GDS ordered-append.
int64_t kernel_code_entry_byte_offset
Byte offset (possibly negative) from start of amd_kernel_code_t object to kernel's entry point instru...
uint32_t hsa_ext_code_kind_t
uint16_t amd_machine_kind
uint32_t workgroup_fbarrier_count
Number of fbarrier's used in the kernel and all functions it calls.
uint16_t debug_private_segment_buffer_sgpr
If is_debug_supported is 0 then must be 0.
uint16_t reserved_vgpr_count
The number of consecutive VGPRs reserved by the client.
uint8_t required_dim
If requiredDim is not enabled then must be 0 and the produced kernel code can be dispatched with 1...
uint16_t reserved_vgpr_first
If reserved_vgpr_count is 0 then must be 0.
uint16_t reserved_sgpr_count
The number of consecutive SGPRs reserved by the client.