|
|
@ -9,6 +9,7 @@ |
|
|
|
// Niko Li, newlife20080214@gmail.com |
|
|
|
// Niko Li, newlife20080214@gmail.com |
|
|
|
// Wang Weiyan, wangweiyanster@gmail.com |
|
|
|
// Wang Weiyan, wangweiyanster@gmail.com |
|
|
|
// Jia Haipeng, jiahaipeng95@gmail.com |
|
|
|
// Jia Haipeng, jiahaipeng95@gmail.com |
|
|
|
|
|
|
|
// Nathan, liujun@multicorewareinc.com |
|
|
|
// Redistribution and use in source and binary forms, with or without modification, |
|
|
|
// Redistribution and use in source and binary forms, with or without modification, |
|
|
|
// are permitted provided that the following conditions are met: |
|
|
|
// are permitted provided that the following conditions are met: |
|
|
|
// |
|
|
|
// |
|
|
@ -299,6 +300,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa |
|
|
|
} |
|
|
|
} |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
int queuecount = lclcount[0]; |
|
|
|
int queuecount = lclcount[0]; |
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
nodecounter = splitnode; |
|
|
|
nodecounter = splitnode; |
|
|
|
for(int stageloop = split_stage; stageloop< end_stage && queuecount>0; stageloop++) |
|
|
|
for(int stageloop = split_stage; stageloop< end_stage && queuecount>0; stageloop++) |
|
|
|
{ |
|
|
|
{ |
|
|
@ -324,7 +326,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa |
|
|
|
int queue_pixel = mad24(((temp_coord & (int)0xffff0000)>>16),readwidth,temp_coord & 0xffff); |
|
|
|
int queue_pixel = mad24(((temp_coord & (int)0xffff0000)>>16),readwidth,temp_coord & 0xffff); |
|
|
|
|
|
|
|
|
|
|
|
//barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
//barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
if(lcl_compute_win_id < queuecount) { |
|
|
|
if(lcl_compute_win_id < queuecount) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
|
|
|
|
int tempnodecounter = lcl_compute_id; |
|
|
|
int tempnodecounter = lcl_compute_id; |
|
|
|
float part_sum = 0.f; |
|
|
|
float part_sum = 0.f; |
|
|
@ -363,7 +366,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa |
|
|
|
partialsum[lcl_id]=part_sum; |
|
|
|
partialsum[lcl_id]=part_sum; |
|
|
|
} |
|
|
|
} |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
if(lcl_compute_win_id < queuecount) { |
|
|
|
if(lcl_compute_win_id < queuecount) |
|
|
|
|
|
|
|
{ |
|
|
|
for(int i=0; i<lcl_compute_win && (lcl_compute_id==0); i++) |
|
|
|
for(int i=0; i<lcl_compute_win && (lcl_compute_id==0); i++) |
|
|
|
{ |
|
|
|
{ |
|
|
|
stage_sum += partialsum[lcl_id+i]; |
|
|
|
stage_sum += partialsum[lcl_id+i]; |
|
|
@ -378,8 +382,9 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa |
|
|
|
} |
|
|
|
} |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
}//end for(int queueloop=0;queueloop<queuecount_loop;queueloop++) |
|
|
|
}//end for(int queueloop=0;queueloop<queuecount_loop;queueloop++) |
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
//barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
queuecount = lclcount[0]; |
|
|
|
queuecount = lclcount[0]; |
|
|
|
|
|
|
|
barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
nodecounter += stageinfo.x; |
|
|
|
nodecounter += stageinfo.x; |
|
|
|
}//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++) |
|
|
|
}//end for(int stageloop = splitstage; stageloop< endstage && queuecount>0;stageloop++) |
|
|
|
//barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|
//barrier(CLK_LOCAL_MEM_FENCE); |
|
|
|