Date
1 - 4 of 4
getattribute() call on GPU
Nicolas Guiard
Hi,
We are currently trying to make getattribute() calls work on GPU with OptiX 7.3 and OSL 1.11.9.0 but we are facing an issue.
We've implemented the osl_get_attribute() callback in cuda and it's called properly. But as soon as we try to access the content of the attribute type, we get an illegal memory access.
Here's the implementation of osl_get_attribute for our test:
In our test, we have a simple shader doing this:
As we can see in bold red in the generated code, the address of the type is a constant value and this value changes at each execution. Is there something we are not configuring properly or is it a bug?
We are currently trying to make getattribute() calls work on GPU with OptiX 7.3 and OSL 1.11.9.0 but we are facing an issue.
We've implemented the osl_get_attribute() callback in cuda and it's called properly. But as soon as we try to access the content of the attribute type, we get an illegal memory access.
Here's the implementation of osl_get_attribute for our test:
__device__
int osl_get_attribute(void *sg_, int dest_derivs, void *obj_name_, void *attr_name_, int array_lookup, int index, const void *attr_type, void *attr_dest)
{
float *f = reinterpret_cast<float *>(attr_dest);
if (attr_type == nullptr) {
f[0] = 0.5f;
f[1] = 0.5f;
f[2] = 0.5f;
return 1;
}
const OSL::TypeDesc& type = *(const OSL::TypeDesc *)attr_type;
f[0] = type.basetype;
f[1] = type.aggregate;
f[2] = type.vecsemantics;
return 1;
}
If we remove the lines in bold red and replace them by the following ones, it works just fine:
f[0] = 1.0f;
f[1] = 0.5f;
f[2] = 0.0f;
In our test, we have a simple shader doing this:
surface attrtest()
{
color c;
getattribute("foo", c);
Ci = c * emission();
}
And here's the generated PTX code:
//
// Generated by LLVM NVPTX Back-End
//
.version 5.0
.target sm_35
.address_size 64
// .globl __direct_callable__group_unnamed_group_3_3_init
.extern .func (.param .b32 func_retval0) osl_get_attribute
(
.param .b64 osl_get_attribute_param_0,
.param .b32 osl_get_attribute_param_1,
.param .b64 osl_get_attribute_param_2,
.param .b64 osl_get_attribute_param_3,
.param .b32 osl_get_attribute_param_4,
.param .b32 osl_get_attribute_param_5,
.param .b64 osl_get_attribute_param_6,
.param .b64 osl_get_attribute_param_7
)
;
.extern .func (.param .b64 func_retval0) osl_allocate_weighted_closure_component
(
.param .b64 osl_allocate_weighted_closure_component_param_0,
.param .b32 osl_allocate_weighted_closure_component_param_1,
.param .b32 osl_allocate_weighted_closure_component_param_2,
.param .b64 osl_allocate_weighted_closure_component_param_3
)
;
.extern .global .align 8 .u64 ds_555c6f602f9383e3_0003;
.visible .func __direct_callable__group_unnamed_group_3_3_init(
.param .b64 __direct_callable__group_unnamed_group_3_3_init_param_0,
.param .b64 __direct_callable__group_unnamed_group_3_3_init_param_1
)
{
ret;
}
// .globl __direct_callable__unnamed_group_3_build___project_scene_attrtest_4
.visible .func __direct_callable__unnamed_group_3_build___project_scene_attrtest_4(
.param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0,
.param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_1
)
{
.local .align 8 .b8 __local_depot1[16];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<2>;
.reg .b16 %rs<2>;
.reg .b32 %r<6>;
.reg .b64 %rd<8>;
mov.u64 %SPL, __local_depot1;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd2, [__direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0];
ld.global.u64 %rd3, [ds_555c6f602f9383e3_0003];
mov.u64 %rd4, 0;
mov.u32 %r1, 0;
mov.u64 %rd5, 140457662007856;
add.u64 %rd6, %SP, 0;
{ // callseq 0, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r1;
.param .b64 param2;
st.param.b64 [param2+0], %rd4;
.param .b64 param3;
st.param.b64 [param3+0], %rd3;
.param .b32 param4;
st.param.b32 [param4+0], %r1;
.param .b32 param5;
st.param.b32 [param5+0], %r2;
.param .b64 param6;
st.param.b64 [param6+0], %rd5;
.param .b64 param7;
st.param.b64 [param7+0], %rd6;
.param .b32 retval0;
call.uni (retval0),
osl_get_attribute,
(
param0,
param1,
param2,
param3,
param4,
param5,
param6,
param7
);
ld.param.b32 %r3, [retval0+0];
} // callseq 0
mov.u32 %r5, 1;
{ // callseq 1, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r5;
.param .b32 param2;
st.param.b32 [param2+0], %r5;
.param .b64 param3;
st.param.b64 [param3+0], %rd6;
.param .b64 retval0;
call.uni (retval0),
osl_allocate_weighted_closure_component,
(
param0,
param1,
param2,
param3
);
ld.param.b64 %rd7, [retval0+0];
} // callseq 1
setp.eq.s64 %p1, %rd7, 0;
@%p1 bra LBB1_2;
mov.u16 %rs1, 0;
st.u8 [%rd7+16], %rs1;
LBB1_2:
st.u64 [%rd2+272], %rd7;
ret;
}
As we can see in bold red in the generated code, the address of the type is a constant value and this value changes at each execution. Is there something we are not configuring properly or is it a bug?
Nicolas Guiard
Hi,
I've been able to make it work by changing the prototype of the osl_get_attribute callback and passing the TypeDesc casted into a long long instead of passing its address.
This is exactly what is already done in osl_bind_interpolated_param.
Here's the corresponding pull request: https://github.com/AcademySoftwareFoundation/OpenShadingLanguage/pull/1404
Cheers
Hi,
We are currently trying to make getattribute() calls work on GPU with OptiX 7.3 and OSL 1.11.9.0 but we are facing an issue.
We've implemented the osl_get_attribute() callback in cuda and it's called properly. But as soon as we try to access the content of the attribute type, we get an illegal memory access.
Here's the implementation of osl_get_attribute for our test:__device__
int osl_get_attribute(void *sg_, int dest_derivs, void *obj_name_, void *attr_name_, int array_lookup, int index, const void *attr_type, void *attr_dest)
{
float *f = reinterpret_cast<float *>(attr_dest);
if (attr_type == nullptr) {
f[0] = 0.5f;
f[1] = 0.5f;
f[2] = 0.5f;
return 1;
}
const OSL::TypeDesc& type = *(const OSL::TypeDesc *)attr_type;
f[0] = type.basetype;
f[1] = type.aggregate;
f[2] = type.vecsemantics;
return 1;
}If we remove the lines in bold red and replace them by the following ones, it works just fine:f[0] = 1.0f;
f[1] = 0.5f;
f[2] = 0.0f;
In our test, we have a simple shader doing this:surface attrtest()
{
color c;
getattribute("foo", c);
Ci = c * emission();
}And here's the generated PTX code://
// Generated by LLVM NVPTX Back-End
//
.version 5.0
.target sm_35
.address_size 64
// .globl __direct_callable__group_unnamed_group_3_3_init
.extern .func (.param .b32 func_retval0) osl_get_attribute
(
.param .b64 osl_get_attribute_param_0,
.param .b32 osl_get_attribute_param_1,
.param .b64 osl_get_attribute_param_2,
.param .b64 osl_get_attribute_param_3,
.param .b32 osl_get_attribute_param_4,
.param .b32 osl_get_attribute_param_5,
.param .b64 osl_get_attribute_param_6,
.param .b64 osl_get_attribute_param_7
)
;
.extern .func (.param .b64 func_retval0) osl_allocate_weighted_closure_component
(
.param .b64 osl_allocate_weighted_closure_component_param_0,
.param .b32 osl_allocate_weighted_closure_component_param_1,
.param .b32 osl_allocate_weighted_closure_component_param_2,
.param .b64 osl_allocate_weighted_closure_component_param_3
)
;
.extern .global .align 8 .u64 ds_555c6f602f9383e3_0003;
.visible .func __direct_callable__group_unnamed_group_3_3_init(
.param .b64 __direct_callable__group_unnamed_group_3_3_init_param_0,
.param .b64 __direct_callable__group_unnamed_group_3_3_init_param_1
)
{
ret;
}
// .globl __direct_callable__unnamed_group_3_build___project_scene_attrtest_4
.visible .func __direct_callable__unnamed_group_3_build___project_scene_attrtest_4(
.param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0,
.param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_1
)
{
.local .align 8 .b8 __local_depot1[16];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<2>;
.reg .b16 %rs<2>;
.reg .b32 %r<6>;
.reg .b64 %rd<8>;
mov.u64 %SPL, __local_depot1;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd2, [__direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0];
ld.global.u64 %rd3, [ds_555c6f602f9383e3_0003];
mov.u64 %rd4, 0;
mov.u32 %r1, 0;
mov.u64 %rd5, 140457662007856;
add.u64 %rd6, %SP, 0;
{ // callseq 0, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r1;
.param .b64 param2;
st.param.b64 [param2+0], %rd4;
.param .b64 param3;
st.param.b64 [param3+0], %rd3;
.param .b32 param4;
st.param.b32 [param4+0], %r1;
.param .b32 param5;
st.param.b32 [param5+0], %r2;
.param .b64 param6;
st.param.b64 [param6+0], %rd5;
.param .b64 param7;
st.param.b64 [param7+0], %rd6;
.param .b32 retval0;
call.uni (retval0),
osl_get_attribute,
(
param0,
param1,
param2,
param3,
param4,
param5,
param6,
param7
);
ld.param.b32 %r3, [retval0+0];
} // callseq 0
mov.u32 %r5, 1;
{ // callseq 1, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r5;
.param .b32 param2;
st.param.b32 [param2+0], %r5;
.param .b64 param3;
st.param.b64 [param3+0], %rd6;
.param .b64 retval0;
call.uni (retval0),
osl_allocate_weighted_closure_component,
(
param0,
param1,
param2,
param3
);
ld.param.b64 %rd7, [retval0+0];
} // callseq 1
setp.eq.s64 %p1, %rd7, 0;
@%p1 bra LBB1_2;
mov.u16 %rs1, 0;
st.u8 [%rd7+16], %rs1;
LBB1_2:
st.u64 [%rd2+272], %rd7;
ret;
}
As we can see in bold red in the generated code, the address of the type is a constant value and this value changes at each execution. Is there something we are not configuring properly or is it a bug?
Larry Gritz
And I've merged this PR into master now, so hopefully this will be fixed.
-- lg
On Sep 13, 2021, at 6:14 AM, Nicolas Guiard <nicolas@...> wrote:Hi,I've been able to make it work by changing the prototype of the osl_get_attribute callback and passing the TypeDesc casted into a long long instead of passing its address.This is exactly what is already done in osl_bind_interpolated_param.Here's the corresponding pull request: https://github.com/AcademySoftwareFoundation/OpenShadingLanguage/pull/1404CheersHi,
We are currently trying to make getattribute() calls work on GPU with OptiX 7.3 and OSL 1.11.9.0 but we are facing an issue.
We've implemented the osl_get_attribute() callback in cuda and it's called properly. But as soon as we try to access the content of the attribute type, we get an illegal memory access.
Here's the implementation of osl_get_attribute for our test:__device__
int osl_get_attribute(void *sg_, int dest_derivs, void *obj_name_, void *attr_name_, int array_lookup, int index, const void *attr_type, void *attr_dest)
{
float *f = reinterpret_cast<float *>(attr_dest);
if (attr_type == nullptr) {
f[0] = 0.5f;
f[1] = 0.5f;
f[2] = 0.5f;
return 1;
}
const OSL::TypeDesc& type = *(const OSL::TypeDesc *)attr_type;
f[0] = type.basetype;
f[1] = type.aggregate;
f[2] = type.vecsemantics;
return 1;
}If we remove the lines in bold red and replace them by the following ones, it works just fine:f[0] = 1.0f;
f[1] = 0.5f;
f[2] = 0.0f;
In our test, we have a simple shader doing this:surface attrtest()
{
color c;
getattribute("foo", c);
Ci = c * emission();
}And here's the generated PTX code://
// Generated by LLVM NVPTX Back-End
//
.version 5.0
.target sm_35
.address_size 64
// .globl __direct_callable__group_unnamed_group_3_3_init
.extern .func (.param .b32 func_retval0) osl_get_attribute
(
.param .b64 osl_get_attribute_param_0,
.param .b32 osl_get_attribute_param_1,
.param .b64 osl_get_attribute_param_2,
.param .b64 osl_get_attribute_param_3,
.param .b32 osl_get_attribute_param_4,
.param .b32 osl_get_attribute_param_5,
.param .b64 osl_get_attribute_param_6,
.param .b64 osl_get_attribute_param_7
)
;
.extern .func (.param .b64 func_retval0) osl_allocate_weighted_closure_component
(
.param .b64 osl_allocate_weighted_closure_component_param_0,
.param .b32 osl_allocate_weighted_closure_component_param_1,
.param .b32 osl_allocate_weighted_closure_component_param_2,
.param .b64 osl_allocate_weighted_closure_component_param_3
)
;
.extern .global .align 8 .u64 ds_555c6f602f9383e3_0003;
.visible .func __direct_callable__group_unnamed_group_3_3_init(
.param .b64 __direct_callable__group_unnamed_group_3_3_init_param_0,
.param .b64 __direct_callable__group_unnamed_group_3_3_init_param_1
)
{
ret;
}
// .globl __direct_callable__unnamed_group_3_build___project_scene_attrtest_4
.visible .func __direct_callable__unnamed_group_3_build___project_scene_attrtest_4(
.param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0,
.param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_1
)
{
.local .align 8 .b8 __local_depot1[16];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<2>;
.reg .b16 %rs<2>;
.reg .b32 %r<6>;
.reg .b64 %rd<8>;
mov.u64 %SPL, __local_depot1;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd2, [__direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0];
ld.global.u64 %rd3, [ds_555c6f602f9383e3_0003];
mov.u64 %rd4, 0;
mov.u32 %r1, 0;
mov.u64 %rd5, 140457662007856;
add.u64 %rd6, %SP, 0;
{ // callseq 0, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r1;
.param .b64 param2;
st.param.b64 [param2+0], %rd4;
.param .b64 param3;
st.param.b64 [param3+0], %rd3;
.param .b32 param4;
st.param.b32 [param4+0], %r1;
.param .b32 param5;
st.param.b32 [param5+0], %r2;
.param .b64 param6;
st.param.b64 [param6+0], %rd5;
.param .b64 param7;
st.param.b64 [param7+0], %rd6;
.param .b32 retval0;
call.uni (retval0),
osl_get_attribute,
(
param0,
param1,
param2,
param3,
param4,
param5,
param6,
param7
);
ld.param.b32 %r3, [retval0+0];
} // callseq 0
mov.u32 %r5, 1;
{ // callseq 1, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r5;
.param .b32 param2;
st.param.b32 [param2+0], %r5;
.param .b64 param3;
st.param.b64 [param3+0], %rd6;
.param .b64 retval0;
call.uni (retval0),
osl_allocate_weighted_closure_component,
(
param0,
param1,
param2,
param3
);
ld.param.b64 %rd7, [retval0+0];
} // callseq 1
setp.eq.s64 %p1, %rd7, 0;
@%p1 bra LBB1_2;
mov.u16 %rs1, 0;
st.u8 [%rd7+16], %rs1;
LBB1_2:
st.u64 [%rd2+272], %rd7;
ret;
}
As we can see in bold red in the generated code, the address of the type is a constant value and this value changes at each execution. Is there something we are not configuring properly or is it a bug?
Nicolas Guiard
Thanks Larry!
Le mar. 14 sept. 2021 à 09:00, Larry Gritz <lg@...> a écrit :
And I've merged this PR into master now, so hopefully this will be fixed.-- lgOn Sep 13, 2021, at 6:14 AM, Nicolas Guiard <nicolas@...> wrote:Hi,I've been able to make it work by changing the prototype of the osl_get_attribute callback and passing the TypeDesc casted into a long long instead of passing its address.This is exactly what is already done in osl_bind_interpolated_param.Here's the corresponding pull request: https://github.com/AcademySoftwareFoundation/OpenShadingLanguage/pull/1404CheersHi,
We are currently trying to make getattribute() calls work on GPU with OptiX 7.3 and OSL 1.11.9.0 but we are facing an issue.
We've implemented the osl_get_attribute() callback in cuda and it's called properly. But as soon as we try to access the content of the attribute type, we get an illegal memory access.
Here's the implementation of osl_get_attribute for our test:__device__
int osl_get_attribute(void *sg_, int dest_derivs, void *obj_name_, void *attr_name_, int array_lookup, int index, const void *attr_type, void *attr_dest)
{
float *f = reinterpret_cast<float *>(attr_dest);
if (attr_type == nullptr) {
f[0] = 0.5f;
f[1] = 0.5f;
f[2] = 0.5f;
return 1;
}
const OSL::TypeDesc& type = *(const OSL::TypeDesc *)attr_type;
f[0] = type.basetype;
f[1] = type.aggregate;
f[2] = type.vecsemantics;
return 1;
}If we remove the lines in bold red and replace them by the following ones, it works just fine:f[0] = 1.0f;
f[1] = 0.5f;
f[2] = 0.0f;
In our test, we have a simple shader doing this:surface attrtest()
{
color c;
getattribute("foo", c);
Ci = c * emission();
}And here's the generated PTX code://
// Generated by LLVM NVPTX Back-End
//
.version 5.0
.target sm_35
.address_size 64
// .globl __direct_callable__group_unnamed_group_3_3_init
.extern .func (.param .b32 func_retval0) osl_get_attribute
(
.param .b64 osl_get_attribute_param_0,
.param .b32 osl_get_attribute_param_1,
.param .b64 osl_get_attribute_param_2,
.param .b64 osl_get_attribute_param_3,
.param .b32 osl_get_attribute_param_4,
.param .b32 osl_get_attribute_param_5,
.param .b64 osl_get_attribute_param_6,
.param .b64 osl_get_attribute_param_7
)
;
.extern .func (.param .b64 func_retval0) osl_allocate_weighted_closure_component
(
.param .b64 osl_allocate_weighted_closure_component_param_0,
.param .b32 osl_allocate_weighted_closure_component_param_1,
.param .b32 osl_allocate_weighted_closure_component_param_2,
.param .b64 osl_allocate_weighted_closure_component_param_3
)
;
.extern .global .align 8 .u64 ds_555c6f602f9383e3_0003;
.visible .func __direct_callable__group_unnamed_group_3_3_init(
.param .b64 __direct_callable__group_unnamed_group_3_3_init_param_0,
.param .b64 __direct_callable__group_unnamed_group_3_3_init_param_1
)
{
ret;
}
// .globl __direct_callable__unnamed_group_3_build___project_scene_attrtest_4
.visible .func __direct_callable__unnamed_group_3_build___project_scene_attrtest_4(
.param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0,
.param .b64 __direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_1
)
{
.local .align 8 .b8 __local_depot1[16];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<2>;
.reg .b16 %rs<2>;
.reg .b32 %r<6>;
.reg .b64 %rd<8>;
mov.u64 %SPL, __local_depot1;
cvta.local.u64 %SP, %SPL;
ld.param.u64 %rd2, [__direct_callable__unnamed_group_3_build___project_scene_attrtest_4_param_0];
ld.global.u64 %rd3, [ds_555c6f602f9383e3_0003];
mov.u64 %rd4, 0;
mov.u32 %r1, 0;
mov.u64 %rd5, 140457662007856;
add.u64 %rd6, %SP, 0;
{ // callseq 0, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r1;
.param .b64 param2;
st.param.b64 [param2+0], %rd4;
.param .b64 param3;
st.param.b64 [param3+0], %rd3;
.param .b32 param4;
st.param.b32 [param4+0], %r1;
.param .b32 param5;
st.param.b32 [param5+0], %r2;
.param .b64 param6;
st.param.b64 [param6+0], %rd5;
.param .b64 param7;
st.param.b64 [param7+0], %rd6;
.param .b32 retval0;
call.uni (retval0),
osl_get_attribute,
(
param0,
param1,
param2,
param3,
param4,
param5,
param6,
param7
);
ld.param.b32 %r3, [retval0+0];
} // callseq 0
mov.u32 %r5, 1;
{ // callseq 1, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r5;
.param .b32 param2;
st.param.b32 [param2+0], %r5;
.param .b64 param3;
st.param.b64 [param3+0], %rd6;
.param .b64 retval0;
call.uni (retval0),
osl_allocate_weighted_closure_component,
(
param0,
param1,
param2,
param3
);
ld.param.b64 %rd7, [retval0+0];
} // callseq 1
setp.eq.s64 %p1, %rd7, 0;
@%p1 bra LBB1_2;
mov.u16 %rs1, 0;
st.u8 [%rd7+16], %rs1;
LBB1_2:
st.u64 [%rd2+272], %rd7;
ret;
}
As we can see in bold red in the generated code, the address of the type is a constant value and this value changes at each execution. Is there something we are not configuring properly or is it a bug?