Hi, We are currently trying to make getattribute() calls work on GPU with OptiX 7.3 and OSL 1.11.9.0 but we are facing an issue. We've implemented the osl_get_attribute() callback in cuda and it's called properly. But as soon as we try to access the content of the attribute type, we get an illegal memory access. Here's the implementation of osl_get_attribute for our test: __device__ int osl_get_attribute(void *sg_, int dest_derivs, void *obj_name_, void *attr_name_, int array_lookup, int index, const void *attr_type, void *attr_dest) { float *f = reinterpret_cast<float *>(attr_dest); if (attr_type == nullptr) { f[0] = 0.5f; f[1] = 0.5f; f[2] = 0.5f; return 1; } const OSL::TypeDesc& type = *(const OSL::TypeDesc *)attr_type; f[0] = type.basetype; f[1] = type.aggregate; f[2] = type.vecsemantics; return 1; }
If we remove the lines in bold red and replace them by the following ones, it works just fine: f[0] = 1.0f; f[1] = 0.5f; f[2] = 0.0f; In our test, we have a simple shader doing this: surface attrtest() { color c; getattribute("foo", c); Ci = c * emission(); }
And here's the generated PTX code:
// // Generated by LLVM NVPTX Back-End // .version 5.0 .target sm_35 .address_size 64 // .globl __direct_callable__group_unnamed_group_3_3_init .extern .func (.param .b32 func_retval0) osl_get_attribute ( .param .b64 osl_get_attribute_param_0, .param .b32 osl_get_attribute_param_1, .param .b64 osl_get_attribute_param_2, .param .b64 osl_get_attribute_param_3, .param .b32 osl_get_attribute_param_4, .param .b32 osl_get_attribute_param_5, .param .b64 osl_get_attribute_param_6, .param .b64 osl_get_attribute_param_7 ) ; .extern .func (.param .b64 func_retval0) osl_allocate_weighted_closure_component ( .param .b64 osl_allocate_weighted_closure_component_param_0, .param .b32 osl_allocate_weighted_closure_component_param_1, .param .b32 osl_allocate_weighted_closure_component_param_2, .param .b64 osl_allocate_weighted_closure_component_param_3 ) ; .extern .global .align 8 .u64 ds_555c6f602f9383e3_0003; .visible .func __direct_callable__group_unnamed_group_3_3_init( .param .b64 __direct_callable__group_unnamed_group_3_3_init_param_0, .param .b64 __direct_callable__group_unnamed_group_3_3_init_param_1 ) {
ret; } // .globl __direct_callable__unnamed_group_3_build___project_scene_attrtest_4 .visible .func __direct_callable__unnamed_group_3_build___project_scene_attrtest_4( .param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0, .param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_1 ) { .local .align 8 .b8 __local_depot1[16]; .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<2>; .reg .b16 %rs<2>; .reg .b32 %r<6>; .reg .b64 %rd<8>; mov.u64 %SPL, __local_depot1; cvta.local.u64 %SP, %SPL; ld.param.u64 %rd2, [__direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0]; ld.global.u64 %rd3, [ds_555c6f602f9383e3_0003]; mov.u64 %rd4, 0; mov.u32 %r1, 0; mov.u64 %rd5, 140457662007856; add.u64 %rd6, %SP, 0; { // callseq 0, 0 .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd2; .param .b32 param1; st.param.b32 [param1+0], %r1; .param .b64 param2; st.param.b64 [param2+0], %rd4; .param .b64 param3; st.param.b64 [param3+0], %rd3; .param .b32 param4; st.param.b32 [param4+0], %r1; .param .b32 param5; st.param.b32 [param5+0], %r2; .param .b64 param6; st.param.b64 [param6+0], %rd5; .param .b64 param7; st.param.b64 [param7+0], %rd6; .param .b32 retval0; call.uni (retval0), osl_get_attribute, ( param0, param1, param2, param3, param4, param5, param6, param7 ); ld.param.b32 %r3, [retval0+0]; } // callseq 0 mov.u32 %r5, 1; { // callseq 1, 0 .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd2; .param .b32 param1; st.param.b32 [param1+0], %r5; .param .b32 param2; st.param.b32 [param2+0], %r5; .param .b64 param3; st.param.b64 [param3+0], %rd6; .param .b64 retval0; call.uni (retval0), osl_allocate_weighted_closure_component, ( param0, param1, param2, param3 ); ld.param.b64 %rd7, [retval0+0]; } // callseq 1 setp.eq.s64 %p1, %rd7, 0; @%p1 bra LBB1_2; mov.u16 %rs1, 0; st.u8 [%rd7+16], %rs1; LBB1_2: st.u64 [%rd2+272], %rd7; ret; } As we can see in bold red in the generated code, the address of the type is a constant value and this value changes at each execution. Is there something we are not configuring properly or is it a bug? Thanks in advance for any help on this! Cheers  | Nicolas Guiard | Head of R&D |  | +33 972 452 846 |
|
| |
This email and any files transmitted with it are confidential and intended solely for the use of the individual or entity to whom they are addressed.
|
|
Hi,
I've been able to make it work by changing the prototype of the osl_get_attribute callback and passing the TypeDesc casted into a long long instead of passing its address. This is exactly what is already done in osl_bind_interpolated_param.
Cheers  | Nicolas Guiard | Head of R&D |  | +33 972 452 846 |
|
| |
This email and any files transmitted with it are confidential and intended solely for the use of the individual or entity to whom they are addressed.
toggle quoted message
Show quoted text
Hi, We are currently trying to make getattribute() calls work on GPU with OptiX 7.3 and OSL 1.11.9.0 but we are facing an issue. We've implemented the osl_get_attribute() callback in cuda and it's called properly. But as soon as we try to access the content of the attribute type, we get an illegal memory access. Here's the implementation of osl_get_attribute for our test: __device__ int osl_get_attribute(void *sg_, int dest_derivs, void *obj_name_, void *attr_name_, int array_lookup, int index, const void *attr_type, void *attr_dest) { float *f = reinterpret_cast<float *>(attr_dest); if (attr_type == nullptr) { f[0] = 0.5f; f[1] = 0.5f; f[2] = 0.5f; return 1; } const OSL::TypeDesc& type = *(const OSL::TypeDesc *)attr_type; f[0] = type.basetype; f[1] = type.aggregate; f[2] = type.vecsemantics; return 1; }
If we remove the lines in bold red and replace them by the following ones, it works just fine: f[0] = 1.0f; f[1] = 0.5f; f[2] = 0.0f; In our test, we have a simple shader doing this: surface attrtest() { color c; getattribute("foo", c); Ci = c * emission(); }
And here's the generated PTX code:
// // Generated by LLVM NVPTX Back-End // .version 5.0 .target sm_35 .address_size 64 // .globl __direct_callable__group_unnamed_group_3_3_init .extern .func (.param .b32 func_retval0) osl_get_attribute ( .param .b64 osl_get_attribute_param_0, .param .b32 osl_get_attribute_param_1, .param .b64 osl_get_attribute_param_2, .param .b64 osl_get_attribute_param_3, .param .b32 osl_get_attribute_param_4, .param .b32 osl_get_attribute_param_5, .param .b64 osl_get_attribute_param_6, .param .b64 osl_get_attribute_param_7 ) ; .extern .func (.param .b64 func_retval0) osl_allocate_weighted_closure_component ( .param .b64 osl_allocate_weighted_closure_component_param_0, .param .b32 osl_allocate_weighted_closure_component_param_1, .param .b32 osl_allocate_weighted_closure_component_param_2, .param .b64 osl_allocate_weighted_closure_component_param_3 ) ; .extern .global .align 8 .u64 ds_555c6f602f9383e3_0003; .visible .func __direct_callable__group_unnamed_group_3_3_init( .param .b64 __direct_callable__group_unnamed_group_3_3_init_param_0, .param .b64 __direct_callable__group_unnamed_group_3_3_init_param_1 ) {
ret; } // .globl __direct_callable__unnamed_group_3_build___project_scene_attrtest_4 .visible .func __direct_callable__unnamed_group_3_build___project_scene_attrtest_4( .param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0, .param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_1 ) { .local .align 8 .b8 __local_depot1[16]; .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<2>; .reg .b16 %rs<2>; .reg .b32 %r<6>; .reg .b64 %rd<8>; mov.u64 %SPL, __local_depot1; cvta.local.u64 %SP, %SPL; ld.param.u64 %rd2, [__direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0]; ld.global.u64 %rd3, [ds_555c6f602f9383e3_0003]; mov.u64 %rd4, 0; mov.u32 %r1, 0; mov.u64 %rd5, 140457662007856; add.u64 %rd6, %SP, 0; { // callseq 0, 0 .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd2; .param .b32 param1; st.param.b32 [param1+0], %r1; .param .b64 param2; st.param.b64 [param2+0], %rd4; .param .b64 param3; st.param.b64 [param3+0], %rd3; .param .b32 param4; st.param.b32 [param4+0], %r1; .param .b32 param5; st.param.b32 [param5+0], %r2; .param .b64 param6; st.param.b64 [param6+0], %rd5; .param .b64 param7; st.param.b64 [param7+0], %rd6; .param .b32 retval0; call.uni (retval0), osl_get_attribute, ( param0, param1, param2, param3, param4, param5, param6, param7 ); ld.param.b32 %r3, [retval0+0]; } // callseq 0 mov.u32 %r5, 1; { // callseq 1, 0 .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd2; .param .b32 param1; st.param.b32 [param1+0], %r5; .param .b32 param2; st.param.b32 [param2+0], %r5; .param .b64 param3; st.param.b64 [param3+0], %rd6; .param .b64 retval0; call.uni (retval0), osl_allocate_weighted_closure_component, ( param0, param1, param2, param3 ); ld.param.b64 %rd7, [retval0+0]; } // callseq 1 setp.eq.s64 %p1, %rd7, 0; @%p1 bra LBB1_2; mov.u16 %rs1, 0; st.u8 [%rd7+16], %rs1; LBB1_2: st.u64 [%rd2+272], %rd7; ret; } As we can see in bold red in the generated code, the address of the type is a constant value and this value changes at each execution. Is there something we are not configuring properly or is it a bug? Thanks in advance for any help on this! Cheers  | Nicolas Guiard | Head of R&D |  | +33 972 452 846 |
|
| |
This email and any files transmitted with it are confidential and intended solely for the use of the individual or entity to whom they are addressed.
|
|
And I've merged this PR into master now, so hopefully this will be fixed.
-- lg On Sep 13, 2021, at 6:14 AM, Nicolas Guiard < nicolas@...> wrote:
Hi,
I've been able to make it work by changing the prototype of the osl_get_attribute callback and passing the TypeDesc casted into a long long instead of passing its address. This is exactly what is already done in osl_bind_interpolated_param.
Cheers  | Nicolas Guiard | Head of R&D |  | +33 972 452 846 |
|
| |
This email and any files transmitted with it are confidential and intended solely for the use of the individual or entity to whom they are addressed. Hi, We are currently trying to make getattribute() calls work on GPU with OptiX 7.3 and OSL 1.11.9.0 but we are facing an issue. We've implemented the osl_get_attribute() callback in cuda and it's called properly. But as soon as we try to access the content of the attribute type, we get an illegal memory access. Here's the implementation of osl_get_attribute for our test: __device__ int osl_get_attribute(void *sg_, int dest_derivs, void *obj_name_, void *attr_name_, int array_lookup, int index, const void *attr_type, void *attr_dest) { float *f = reinterpret_cast<float *>(attr_dest); if (attr_type == nullptr) { f[0] = 0.5f; f[1] = 0.5f; f[2] = 0.5f; return 1; } const OSL::TypeDesc& type = *(const OSL::TypeDesc *)attr_type; f[0] = type.basetype; f[1] = type.aggregate; f[2] = type.vecsemantics; return 1; }
If we remove the lines in bold red and replace them by the following ones, it works just fine: f[0] = 1.0f; f[1] = 0.5f; f[2] = 0.0f; In our test, we have a simple shader doing this: surface attrtest() { color c; getattribute("foo", c); Ci = c * emission(); }
And here's the generated PTX code:
// // Generated by LLVM NVPTX Back-End // .version 5.0 .target sm_35 .address_size 64 // .globl __direct_callable__group_unnamed_group_3_3_init .extern .func (.param .b32 func_retval0) osl_get_attribute ( .param .b64 osl_get_attribute_param_0, .param .b32 osl_get_attribute_param_1, .param .b64 osl_get_attribute_param_2, .param .b64 osl_get_attribute_param_3, .param .b32 osl_get_attribute_param_4, .param .b32 osl_get_attribute_param_5, .param .b64 osl_get_attribute_param_6, .param .b64 osl_get_attribute_param_7 ) ; .extern .func (.param .b64 func_retval0) osl_allocate_weighted_closure_component ( .param .b64 osl_allocate_weighted_closure_component_param_0, .param .b32 osl_allocate_weighted_closure_component_param_1, .param .b32 osl_allocate_weighted_closure_component_param_2, .param .b64 osl_allocate_weighted_closure_component_param_3 ) ; .extern .global .align 8 .u64 ds_555c6f602f9383e3_0003; .visible .func __direct_callable__group_unnamed_group_3_3_init( .param .b64 __direct_callable__group_unnamed_group_3_3_init_param_0, .param .b64 __direct_callable__group_unnamed_group_3_3_init_param_1 ) {
ret; } // .globl __direct_callable__unnamed_group_3_build___project_scene_attrtest_4 .visible .func __direct_callable__unnamed_group_3_build___project_scene_attrtest_4( .param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0, .param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_1 ) { .local .align 8 .b8 __local_depot1[16]; .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<2>; .reg .b16 %rs<2>; .reg .b32 %r<6>; .reg .b64 %rd<8>; mov.u64 %SPL, __local_depot1; cvta.local.u64 %SP, %SPL; ld.param.u64 %rd2, [__direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0]; ld.global.u64 %rd3, [ds_555c6f602f9383e3_0003]; mov.u64 %rd4, 0; mov.u32 %r1, 0; mov.u64 %rd5, 140457662007856; add.u64 %rd6, %SP, 0; { // callseq 0, 0 .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd2; .param .b32 param1; st.param.b32 [param1+0], %r1; .param .b64 param2; st.param.b64 [param2+0], %rd4; .param .b64 param3; st.param.b64 [param3+0], %rd3; .param .b32 param4; st.param.b32 [param4+0], %r1; .param .b32 param5; st.param.b32 [param5+0], %r2; .param .b64 param6; st.param.b64 [param6+0], %rd5; .param .b64 param7; st.param.b64 [param7+0], %rd6; .param .b32 retval0; call.uni (retval0), osl_get_attribute, ( param0, param1, param2, param3, param4, param5, param6, param7 ); ld.param.b32 %r3, [retval0+0]; } // callseq 0 mov.u32 %r5, 1; { // callseq 1, 0 .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd2; .param .b32 param1; st.param.b32 [param1+0], %r5; .param .b32 param2; st.param.b32 [param2+0], %r5; .param .b64 param3; st.param.b64 [param3+0], %rd6; .param .b64 retval0; call.uni (retval0), osl_allocate_weighted_closure_component, ( param0, param1, param2, param3 ); ld.param.b64 %rd7, [retval0+0]; } // callseq 1 setp.eq.s64 %p1, %rd7, 0; @%p1 bra LBB1_2; mov.u16 %rs1, 0; st.u8 [%rd7+16], %rs1; LBB1_2: st.u64 [%rd2+272], %rd7; ret; } As we can see in bold red in the generated code, the address of the type is a constant value and this value changes at each execution. Is there something we are not configuring properly or is it a bug? Thanks in advance for any help on this! Cheers  | Nicolas Guiard | Head of R&D |  | +33 972 452 846 |
|
| |
This email and any files transmitted with it are confidential and intended solely for the use of the individual or entity to whom they are addressed.
|
|
toggle quoted message
Show quoted text
Le mar. 14 sept. 2021 à 09:00, Larry Gritz < lg@...> a écrit : And I've merged this PR into master now, so hopefully this will be fixed.
-- lg On Sep 13, 2021, at 6:14 AM, Nicolas Guiard < nicolas@...> wrote:
Hi,
I've been able to make it work by changing the prototype of the osl_get_attribute callback and passing the TypeDesc casted into a long long instead of passing its address. This is exactly what is already done in osl_bind_interpolated_param.
Cheers  | Nicolas Guiard | Head of R&D |  | +33 972 452 846 |
|
| |
This email and any files transmitted with it are confidential and intended solely for the use of the individual or entity to whom they are addressed. Hi, We are currently trying to make getattribute() calls work on GPU with OptiX 7.3 and OSL 1.11.9.0 but we are facing an issue. We've implemented the osl_get_attribute() callback in cuda and it's called properly. But as soon as we try to access the content of the attribute type, we get an illegal memory access. Here's the implementation of osl_get_attribute for our test: __device__ int osl_get_attribute(void *sg_, int dest_derivs, void *obj_name_, void *attr_name_, int array_lookup, int index, const void *attr_type, void *attr_dest) { float *f = reinterpret_cast<float *>(attr_dest); if (attr_type == nullptr) { f[0] = 0.5f; f[1] = 0.5f; f[2] = 0.5f; return 1; } const OSL::TypeDesc& type = *(const OSL::TypeDesc *)attr_type; f[0] = type.basetype; f[1] = type.aggregate; f[2] = type.vecsemantics; return 1; }
If we remove the lines in bold red and replace them by the following ones, it works just fine: f[0] = 1.0f; f[1] = 0.5f; f[2] = 0.0f; In our test, we have a simple shader doing this: surface attrtest() { color c; getattribute("foo", c); Ci = c * emission(); }
And here's the generated PTX code:
// // Generated by LLVM NVPTX Back-End // .version 5.0 .target sm_35 .address_size 64 // .globl __direct_callable__group_unnamed_group_3_3_init .extern .func (.param .b32 func_retval0) osl_get_attribute ( .param .b64 osl_get_attribute_param_0, .param .b32 osl_get_attribute_param_1, .param .b64 osl_get_attribute_param_2, .param .b64 osl_get_attribute_param_3, .param .b32 osl_get_attribute_param_4, .param .b32 osl_get_attribute_param_5, .param .b64 osl_get_attribute_param_6, .param .b64 osl_get_attribute_param_7 ) ; .extern .func (.param .b64 func_retval0) osl_allocate_weighted_closure_component ( .param .b64 osl_allocate_weighted_closure_component_param_0, .param .b32 osl_allocate_weighted_closure_component_param_1, .param .b32 osl_allocate_weighted_closure_component_param_2, .param .b64 osl_allocate_weighted_closure_component_param_3 ) ; .extern .global .align 8 .u64 ds_555c6f602f9383e3_0003; .visible .func __direct_callable__group_unnamed_group_3_3_init( .param .b64 __direct_callable__group_unnamed_group_3_3_init_param_0, .param .b64 __direct_callable__group_unnamed_group_3_3_init_param_1 ) {
ret; } // .globl __direct_callable__unnamed_group_3_build___project_scene_attrtest_4 .visible .func __direct_callable__unnamed_group_3_build___project_scene_attrtest_4( .param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0, .param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_1 ) { .local .align 8 .b8 __local_depot1[16]; .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<2>; .reg .b16 %rs<2>; .reg .b32 %r<6>; .reg .b64 %rd<8>; mov.u64 %SPL, __local_depot1; cvta.local.u64 %SP, %SPL; ld.param.u64 %rd2, [__direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0]; ld.global.u64 %rd3, [ds_555c6f602f9383e3_0003]; mov.u64 %rd4, 0; mov.u32 %r1, 0; mov.u64 %rd5, 140457662007856; add.u64 %rd6, %SP, 0; { // callseq 0, 0 .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd2; .param .b32 param1; st.param.b32 [param1+0], %r1; .param .b64 param2; st.param.b64 [param2+0], %rd4; .param .b64 param3; st.param.b64 [param3+0], %rd3; .param .b32 param4; st.param.b32 [param4+0], %r1; .param .b32 param5; st.param.b32 [param5+0], %r2; .param .b64 param6; st.param.b64 [param6+0], %rd5; .param .b64 param7; st.param.b64 [param7+0], %rd6; .param .b32 retval0; call.uni (retval0), osl_get_attribute, ( param0, param1, param2, param3, param4, param5, param6, param7 ); ld.param.b32 %r3, [retval0+0]; } // callseq 0 mov.u32 %r5, 1; { // callseq 1, 0 .reg .b32 temp_param_reg; .param .b64 param0; st.param.b64 [param0+0], %rd2; .param .b32 param1; st.param.b32 [param1+0], %r5; .param .b32 param2; st.param.b32 [param2+0], %r5; .param .b64 param3; st.param.b64 [param3+0], %rd6; .param .b64 retval0; call.uni (retval0), osl_allocate_weighted_closure_component, ( param0, param1, param2, param3 ); ld.param.b64 %rd7, [retval0+0]; } // callseq 1 setp.eq.s64 %p1, %rd7, 0; @%p1 bra LBB1_2; mov.u16 %rs1, 0; st.u8 [%rd7+16], %rs1; LBB1_2: st.u64 [%rd2+272], %rd7; ret; } As we can see in bold red in the generated code, the address of the type is a constant value and this value changes at each execution. Is there something we are not configuring properly or is it a bug? Thanks in advance for any help on this! Cheers  | Nicolas Guiard | Head of R&D |  | +33 972 452 846 |
|
| |
This email and any files transmitted with it are confidential and intended solely for the use of the individual or entity to whom they are addressed.
|
|