tesseract v5.3.3.20231005
oclkernels.h
Go to the documentation of this file.
1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License at
4// http://www.apache.org/licenses/LICENSE-2.0
5// Unless required by applicable law or agreed to in writing, software
6// distributed under the License is distributed on an "AS IS" BASIS,
7// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
8// See the License for the specific language governing permissions and
9// limitations under the License.
10
11#ifndef TESSERACT_OPENCL_OCLKERNELS_H_
12# define TESSERACT_OPENCL_OCLKERNELS_H_
13
14# ifndef USE_EXTERNAL_KERNEL
15# define KERNEL(...) # __VA_ARGS__ "\n"
16// Double precision is a default of spreadsheets
17// cl_khr_fp64: Khronos extension
18// cl_amd_fp64: AMD extension
19// use build option outside to define fp_t
21static const char *kernel_src = KERNEL(
22\n #ifdef KHR_DP_EXTENSION\n
23\n #pragma OPENCL EXTENSION cl_khr_fp64
24 : enable\n
25\n #elif AMD_DP_EXTENSION\n
26\n #pragma OPENCL EXTENSION cl_amd_fp64
27 : enable\n
28\n #else \n
29\n #endif \n
30 __kernel void composeRGBPixel(__global uint *tiffdata, int w, int h, int wpl, __global uint *output) {
31 int i = get_global_id(1);
32 int j = get_global_id(0);
33 int tiffword, rval, gval, bval;
34
35 //Ignore the excess
36 if ((i >= h) || (j >= w))
37 return;
38
39 tiffword = tiffdata[i * w + j];
40 rval = ((tiffword)&0xff);
41 gval = (((tiffword) >> 8) & 0xff);
42 bval = (((tiffword) >> 16) & 0xff);
43 output[i * wpl + j] = (rval << (8 * (sizeof(uint) - 1 - 0))) | (gval << (8 * (sizeof(uint) - 1 - 1))) | (bval << (8 * (sizeof(uint) - 1 - 2)));
44 })
45
46 KERNEL(
47\n__kernel void pixSubtract_inplace(__global int *dword, __global int *sword, const int wpl, const int h) {
48 const unsigned int row = get_global_id(1);
49 const unsigned int col = get_global_id(0);
50 const unsigned int pos = row * wpl + col;
51
52 //Ignore the execss
53 if (row >= h || col >= wpl)
54 return;
55
56 *(dword + pos) &= ~(*(sword + pos));
57 }\n)
58
59 KERNEL(
60\n__kernel void morphoDilateHor_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
61 const unsigned int pos = get_global_id(0);
62 unsigned int prevword, nextword, currword, tempword;
63 unsigned int destword;
64 const int col = pos % wpl;
65
66 //Ignore the execss
67 if (pos >= (wpl * h))
68 return;
69
70 currword = *(sword + pos);
71 destword = currword;
72
73 //Handle boundary conditions
74 if (col == 0)
75 prevword = 0;
76 else
77 prevword = *(sword + pos - 1);
78
79 if (col == (wpl - 1))
80 nextword = 0;
81 else
82 nextword = *(sword + pos + 1);
83
84 //Loop unrolled
85
86 //1 bit to left and 1 bit to right
87 //Get the max value on LHS of every pixel
88 tempword = (prevword << (31)) | ((currword >> 1));
89 destword |= tempword;
90 //Get max value on RHS of every pixel
91 tempword = (currword << 1) | (nextword >> (31));
92 destword |= tempword;
93
94 //2 bit to left and 2 bit to right
95 //Get the max value on LHS of every pixel
96 tempword = (prevword << (30)) | ((currword >> 2));
97 destword |= tempword;
98 //Get max value on RHS of every pixel
99 tempword = (currword << 2) | (nextword >> (30));
100 destword |= tempword;
101
102 *(dword + pos) = destword;
103 }\n)
104
105 KERNEL(
106\n__kernel void morphoDilateVer_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
107 const int col = get_global_id(0);
108 const int row = get_global_id(1);
109 const unsigned int pos = row * wpl + col;
110 unsigned int tempword;
111 unsigned int destword;
112 int i;
113
114 //Ignore the execss
115 if (row >= h || col >= wpl)
116 return;
117
118 destword = *(sword + pos);
119
120 //2 words above
121 i = (row - 2) < 0 ? row : (row - 2);
122 tempword = *(sword + i * wpl + col);
123 destword |= tempword;
124
125 //1 word above
126 i = (row - 1) < 0 ? row : (row - 1);
127 tempword = *(sword + i * wpl + col);
128 destword |= tempword;
129
130 //1 word below
131 i = (row >= (h - 1)) ? row : (row + 1);
132 tempword = *(sword + i * wpl + col);
133 destword |= tempword;
134
135 //2 words below
136 i = (row >= (h - 2)) ? row : (row + 2);
137 tempword = *(sword + i * wpl + col);
138 destword |= tempword;
139
140 *(dword + pos) = destword;
141 }\n)
142
143 KERNEL(
144\n__kernel void morphoDilateHor(__global int *sword, __global int *dword, const int xp, const int xn, const int wpl, const int h) {
145 const int col = get_global_id(0);
146 const int row = get_global_id(1);
147 const unsigned int pos = row * wpl + col;
148 unsigned int parbitsxp, parbitsxn, nwords;
149 unsigned int destword, tempword, lastword, currword;
150 unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
151 int i, j, siter, eiter;
152
153 //Ignore the execss
154 if (pos >= (wpl * h) || (xn < 1 && xp < 1))
155 return;
156
157 currword = *(sword + pos);
158 destword = currword;
159
160 parbitsxp = xp & 31;
161 parbitsxn = xn & 31;
162 nwords = xp >> 5;
163
164 if (parbitsxp > 0)
165 nwords += 1;
166 else
167 parbitsxp = 31;
168
169 siter = (col - nwords);
170 eiter = (col + nwords);
171
172 //Get prev word
173 if (col == 0)
174 firstword = 0x0;
175 else
176 firstword = *(sword + pos - 1);
177
178 //Get next word
179 if (col == (wpl - 1))
180 secondword = 0x0;
181 else
182 secondword = *(sword + pos + 1);
183
184 //Last partial bits on either side
185 for (i = 1; i <= parbitsxp; i++) {
186 //Get the max value on LHS of every pixel
187 tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32 - i)) | ((currword >> i));
188
189 destword |= tempword;
190
191 //Get max value on RHS of every pixel
192 tempword = (currword << i) | (secondword >> (32 - i));
193 destword |= tempword;
194 }
195
196 //Return if halfwidth <= 1 word
197 if (nwords == 1) {
198 if (xn == 32) {
199 destword |= firstword;
200 }
201 if (xp == 32) {
202 destword |= secondword;
203 }
204
205 *(dword + pos) = destword;
206 return;
207 }
208
209 if (siter < 0)
210 firstword = 0x0;
211 else
212 firstword = *(sword + row * wpl + siter);
213
214 if (eiter >= wpl)
215 lastword = 0x0;
216 else
217 lastword = *(sword + row * wpl + eiter);
218
219 for (i = 1; i < nwords; i++) {
220 //Gets LHS words
221 if ((siter + i) < 0)
222 secondword = 0x0;
223 else
224 secondword = *(sword + row * wpl + siter + i);
225
226 lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
227
228 firstword = secondword;
229
230 if ((siter + i + 1) < 0)
231 secondword = 0x0;
232 else
233 secondword = *(sword + row * wpl + siter + i + 1);
234
235 lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn;
236
237 //Gets RHS words
238 if ((eiter - i) >= wpl)
239 firstword = 0x0;
240 else
241 firstword = *(sword + row * wpl + eiter - i);
242
243 rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
244
245 lastword = firstword;
246 if ((eiter - i - 1) >= wpl)
247 firstword = 0x0;
248 else
249 firstword = *(sword + row * wpl + eiter - i - 1);
250
251 rprevword = firstword << parbitsxp | lastword >> (32 - parbitsxp);
252
253 for (j = 1; j < 32; j++) {
254 //OR LHS full words
255 tempword = (lprevword << j) | (lnextword >> (32 - j));
256 destword |= tempword;
257
258 //OR RHS full words
259 tempword = (rprevword << j) | (rnextword >> (32 - j));
260 destword |= tempword;
261 }
262
263 destword |= lprevword;
264 destword |= lnextword;
265 destword |= rprevword;
266 destword |= rnextword;
267
268 lastword = firstword;
269 firstword = secondword;
270 }
271
272 *(dword + pos) = destword;
273 }\n)
274
275 KERNEL(
276\n__kernel void morphoDilateHor_32word(__global int *sword, __global int *dword, const int halfwidth, const int wpl, const int h, const char isEven) {
277 const int col = get_global_id(0);
278 const int row = get_global_id(1);
279 const unsigned int pos = row * wpl + col;
280 unsigned int prevword, nextword, currword, tempword;
281 unsigned int destword;
282 int i;
283
284 //Ignore the execss
285 if (pos >= (wpl * h))
286 return;
287
288 currword = *(sword + pos);
289 destword = currword;
290
291 //Handle boundary conditions
292 if (col == 0)
293 prevword = 0;
294 else
295 prevword = *(sword + pos - 1);
296
297 if (col == (wpl - 1))
298 nextword = 0;
299 else
300 nextword = *(sword + pos + 1);
301
302 for (i = 1; i <= halfwidth; i++) {
303 //Get the max value on LHS of every pixel
304 if (i == halfwidth && isEven) {
305 tempword = 0x0;
306 } else {
307 tempword = (prevword << (32 - i)) | ((currword >> i));
308 }
309
310 destword |= tempword;
311
312 //Get max value on RHS of every pixel
313 tempword = (currword << i) | (nextword >> (32 - i));
314
315 destword |= tempword;
316 }
317
318 *(dword + pos) = destword;
319 }\n)
320
321 KERNEL(
322\n__kernel void morphoDilateVer(__global int *sword, __global int *dword, const int yp, const int wpl, const int h, const int yn) {
323 const int col = get_global_id(0);
324 const int row = get_global_id(1);
325 const unsigned int pos = row * wpl + col;
326 unsigned int tempword;
327 unsigned int destword;
328 int i, siter, eiter;
329
330 //Ignore the execss
331 if (row >= h || col >= wpl)
332 return;
333
334 destword = *(sword + pos);
335
336 //Set start position and end position considering the boundary conditions
337 siter = (row - yn) < 0 ? 0 : (row - yn);
338 eiter = (row >= (h - yp)) ? (h - 1) : (row + yp);
339
340 for (i = siter; i <= eiter; i++) {
341 tempword = *(sword + i * wpl + col);
342
343 destword |= tempword;
344 }
345
346 *(dword + pos) = destword;
347 }\n)
348
349 KERNEL(
350\n__kernel void morphoErodeHor_5x5(__global int *sword, __global int *dword, const int wpl, const int h) {
351 const unsigned int pos = get_global_id(0);
352 unsigned int prevword, nextword, currword, tempword;
353 unsigned int destword;
354 const int col = pos % wpl;
355
356 //Ignore the execss
357 if (pos >= (wpl * h))
358 return;
359
360 currword = *(sword + pos);
361 destword = currword;
362
363 //Handle boundary conditions
364 if (col == 0)
365 prevword = 0xffffffff;
366 else
367 prevword = *(sword + pos - 1);
368
369 if (col == (wpl - 1))
370 nextword = 0xffffffff;
371 else
372 nextword = *(sword + pos + 1);
373
374 //Loop unrolled
375
376 //1 bit to left and 1 bit to right
377 //Get the min value on LHS of every pixel
378 tempword = (prevword << (31)) | ((currword >> 1));
379 destword &= tempword;
380 //Get min value on RHS of every pixel
381 tempword = (currword << 1) | (nextword >> (31));
382 destword &= tempword;
383
384 //2 bit to left and 2 bit to right
385 //Get the min value on LHS of every pixel
386 tempword = (prevword << (30)) | ((currword >> 2));
387 destword &= tempword;
388 //Get min value on RHS of every pixel
389 tempword = (currword << 2) | (nextword >> (30));
390 destword &= tempword;
391
392 *(dword + pos) = destword;
393 }\n)
394
395 KERNEL(
396\n__kernel void morphoErodeVer_5x5(__global int *sword, __global int *dword, const int wpl, const int h, const int fwmask, const int lwmask) {
397 const int col = get_global_id(0);
398 const int row = get_global_id(1);
399 const unsigned int pos = row * wpl + col;
400 unsigned int tempword;
401 unsigned int destword;
402 int i;
403
404 //Ignore the execss
405 if (row >= h || col >= wpl)
406 return;
407
408 destword = *(sword + pos);
409
410 if (row < 2 || row >= (h - 2)) {
411 destword = 0x0;
412 } else {
413 //2 words above
414 //i = (row - 2) < 0 ? row : (row - 2);
415 i = (row - 2);
416 tempword = *(sword + i * wpl + col);
417 destword &= tempword;
418
419 //1 word above
420 //i = (row - 1) < 0 ? row : (row - 1);
421 i = (row - 1);
422 tempword = *(sword + i * wpl + col);
423 destword &= tempword;
424
425 //1 word below
426 //i = (row >= (h - 1)) ? row : (row + 1);
427 i = (row + 1);
428 tempword = *(sword + i * wpl + col);
429 destword &= tempword;
430
431 //2 words below
432 //i = (row >= (h - 2)) ? row : (row + 2);
433 i = (row + 2);
434 tempword = *(sword + i * wpl + col);
435 destword &= tempword;
436
437 if (col == 0) {
438 destword &= fwmask;
439 }
440 if (col == (wpl - 1)) {
441 destword &= lwmask;
442 }
443 }
444
445 *(dword + pos) = destword;
446 }\n)
447
448 KERNEL(
449\n__kernel void morphoErodeHor(__global int *sword, __global int *dword, const int xp, const int xn, const int wpl, const int h, const char isAsymmetric, const int rwmask, const int lwmask) {
450 const int col = get_global_id(0);
451 const int row = get_global_id(1);
452 const unsigned int pos = row * wpl + col;
453 unsigned int parbitsxp, parbitsxn, nwords;
454 unsigned int destword, tempword, lastword, currword;
455 unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword;
456 int i, j, siter, eiter;
457
458 //Ignore the execss
459 if (pos >= (wpl * h) || (xn < 1 && xp < 1))
460 return;
461
462 currword = *(sword + pos);
463 destword = currword;
464
465 parbitsxp = xp & 31;
466 parbitsxn = xn & 31;
467 nwords = xp >> 5;
468
469 if (parbitsxp > 0)
470 nwords += 1;
471 else
472 parbitsxp = 31;
473
474 siter = (col - nwords);
475 eiter = (col + nwords);
476
477 //Get prev word
478 if (col == 0)
479 firstword = 0xffffffff;
480 else
481 firstword = *(sword + pos - 1);
482
483 //Get next word
484 if (col == (wpl - 1))
485 secondword = 0xffffffff;
486 else
487 secondword = *(sword + pos + 1);
488
489 //Last partial bits on either side
490 for (i = 1; i <= parbitsxp; i++) {
491 //Get the max value on LHS of every pixel
492 tempword = (firstword << (32 - i)) | ((currword >> i));
493 destword &= tempword;
494
495 //Get max value on RHS of every pixel
496 tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i));
497
498 //tempword = (currword << i) | (secondword >> (32 - i));
499 destword &= tempword;
500 }
501
502 //Return if halfwidth <= 1 word
503 if (nwords == 1) {
504 if (xp == 32) {
505 destword &= firstword;
506 }
507 if (xn == 32) {
508 destword &= secondword;
509 }
510
511 //Clear boundary pixels
512 if (isAsymmetric) {
513 if (col == 0)
514 destword &= rwmask;
515 if (col == (wpl - 1))
516 destword &= lwmask;
517 }
518
519 *(dword + pos) = destword;
520 return;
521 }
522
523 if (siter < 0)
524 firstword = 0xffffffff;
525 else
526 firstword = *(sword + row * wpl + siter);
527
528 if (eiter >= wpl)
529 lastword = 0xffffffff;
530 else
531 lastword = *(sword + row * wpl + eiter);
532
533 for (i = 1; i < nwords; i++) {
534 //Gets LHS words
535 if ((siter + i) < 0)
536 secondword = 0xffffffff;
537 else
538 secondword = *(sword + row * wpl + siter + i);
539
540 lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
541
542 firstword = secondword;
543
544 if ((siter + i + 1) < 0)
545 secondword = 0xffffffff;
546 else
547 secondword = *(sword + row * wpl + siter + i + 1);
548
549 lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp);
550
551 //Gets RHS words
552 if ((eiter - i) >= wpl)
553 firstword = 0xffffffff;
554 else
555 firstword = *(sword + row * wpl + eiter - i);
556
557 rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
558
559 lastword = firstword;
560 if ((eiter - i - 1) >= wpl)
561 firstword = 0xffffffff;
562 else
563 firstword = *(sword + row * wpl + eiter - i - 1);
564
565 rprevword = firstword << parbitsxn | lastword >> (32 - parbitsxn);
566
567 for (j = 0; j < 32; j++) {
568 //OR LHS full words
569 tempword = (lprevword << j) | (lnextword >> (32 - j));
570 destword &= tempword;
571
572 //OR RHS full words
573 tempword = (rprevword << j) | (rnextword >> (32 - j));
574 destword &= tempword;
575 }
576
577 destword &= lprevword;
578 destword &= lnextword;
579 destword &= rprevword;
580 destword &= rnextword;
581
582 lastword = firstword;
583 firstword = secondword;
584 }
585
586 if (isAsymmetric) {
587 //Clear boundary pixels
588 if (col < (nwords - 1))
589 destword = 0x0;
590 else if (col == (nwords - 1))
591 destword &= rwmask;
592 else if (col > (wpl - nwords))
593 destword = 0x0;
594 else if (col == (wpl - nwords))
595 destword &= lwmask;
596 }
597
598 *(dword + pos) = destword;
599 }\n)
600
601 KERNEL(
602\n__kernel void morphoErodeHor_32word(__global int *sword, __global int *dword, const int halfwidth, const int wpl, const int h, const char clearBoundPixH, const int rwmask, const int lwmask, const char isEven) {
603 const int col = get_global_id(0);
604 const int row = get_global_id(1);
605 const unsigned int pos = row * wpl + col;
606 unsigned int prevword, nextword, currword, tempword, destword;
607 int i;
608
609 //Ignore the execss
610 if (pos >= (wpl * h))
611 return;
612
613 currword = *(sword + pos);
614 destword = currword;
615
616 //Handle boundary conditions
617 if (col == 0)
618 prevword = 0xffffffff;
619 else
620 prevword = *(sword + pos - 1);
621
622 if (col == (wpl - 1))
623 nextword = 0xffffffff;
624 else
625 nextword = *(sword + pos + 1);
626
627 for (i = 1; i <= halfwidth; i++) {
628 //Get the min value on LHS of every pixel
629 tempword = (prevword << (32 - i)) | ((currword >> i));
630
631 destword &= tempword;
632
633 //Get min value on RHS of every pixel
634 if (i == halfwidth && isEven) {
635 tempword = 0xffffffff;
636 } else {
637 tempword = (currword << i) | (nextword >> (32 - i));
638 }
639
640 destword &= tempword;
641 }
642
643 if (clearBoundPixH) {
644 if (col == 0) {
645 destword &= rwmask;
646 } else if (col == (wpl - 1)) {
647 destword &= lwmask;
648 }
649 }
650
651 *(dword + pos) = destword;
652 }\n)
653
654 KERNEL(
655\n__kernel void morphoErodeVer(__global int *sword, __global int *dword, const int yp, const int wpl, const int h, const char clearBoundPixV, const int yn) {
656 const int col = get_global_id(0);
657 const int row = get_global_id(1);
658 const unsigned int pos = row * wpl + col;
659 unsigned int tempword, destword;
660 int i, siter, eiter;
661
662 //Ignore the execss
663 if (row >= h || col >= wpl)
664 return;
665
666 destword = *(sword + pos);
667
668 //Set start position and end position considering the boundary conditions
669 siter = (row - yp) < 0 ? 0 : (row - yp);
670 eiter = (row >= (h - yn)) ? (h - 1) : (row + yn);
671
672 for (i = siter; i <= eiter; i++) {
673 tempword = *(sword + i * wpl + col);
674
675 destword &= tempword;
676 }
677
678 //Clear boundary pixels
679 if (clearBoundPixV && ((row < yp) || ((h - row) <= yn))) {
680 destword = 0x0;
681 }
682
683 *(dword + pos) = destword;
684 }\n)
685
686 // HistogramRect Kernel: Accumulate
687 // assumes 4 channels, i.e., bytes_per_pixel = 4
688 // assumes number of pixels is multiple of 8
689 // data is laid out as
690 // ch0 ch1 ...
691 // bin0 bin1 bin2... bin0...
692 // rpt0,1,2...256 rpt0,1,2...
693 KERNEL(
694\n #define HIST_REDUNDANCY 256\n
695\n #define GROUP_SIZE 256\n
696\n #define HIST_SIZE 256\n
697\n #define NUM_CHANNELS 4\n
698\n #define HR_UNROLL_SIZE 8 \n
699\n #define HR_UNROLL_TYPE uchar8 \n
700
701 __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectAllChannels(__global const uchar8 *data, uint numPixels, __global uint *histBuffer) {
702 // declare variables
703 uchar8 pixels;
704 int threadOffset = get_global_id(0) % HIST_REDUNDANCY;
705
706 // for each pixel/channel, accumulate in global memory
707 for (uint pc = get_global_id(0); pc < numPixels * NUM_CHANNELS / HR_UNROLL_SIZE; pc += get_global_size(0)) {
708 pixels = data[pc];
709 // channel bin thread
710 atomic_inc(&histBuffer[0 * HIST_SIZE * HIST_REDUNDANCY + pixels.s0 * HIST_REDUNDANCY + threadOffset]); // ch0
711 atomic_inc(&histBuffer[0 * HIST_SIZE * HIST_REDUNDANCY + pixels.s4 * HIST_REDUNDANCY + threadOffset]); // ch0
712 atomic_inc(&histBuffer[1 * HIST_SIZE * HIST_REDUNDANCY + pixels.s1 * HIST_REDUNDANCY + threadOffset]); // ch1
713 atomic_inc(&histBuffer[1 * HIST_SIZE * HIST_REDUNDANCY + pixels.s5 * HIST_REDUNDANCY + threadOffset]); // ch1
714 atomic_inc(&histBuffer[2 * HIST_SIZE * HIST_REDUNDANCY + pixels.s2 * HIST_REDUNDANCY + threadOffset]); // ch2
715 atomic_inc(&histBuffer[2 * HIST_SIZE * HIST_REDUNDANCY + pixels.s6 * HIST_REDUNDANCY + threadOffset]); // ch2
716 atomic_inc(&histBuffer[3 * HIST_SIZE * HIST_REDUNDANCY + pixels.s3 * HIST_REDUNDANCY + threadOffset]); // ch3
717 atomic_inc(&histBuffer[3 * HIST_SIZE * HIST_REDUNDANCY + pixels.s7 * HIST_REDUNDANCY + threadOffset]); // ch3
718 }
719 })
720
721 KERNEL(
722 // NUM_CHANNELS = 1
723 __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectOneChannel(__global const uchar8 *data, uint numPixels, __global uint *histBuffer) {
724 // declare variables
725 uchar8 pixels;
726 int threadOffset = get_global_id(0) % HIST_REDUNDANCY;
727
728 // for each pixel/channel, accumulate in global memory
729 for (uint pc = get_global_id(0); pc < numPixels / HR_UNROLL_SIZE; pc += get_global_size(0)) {
730 pixels = data[pc];
731 // bin thread
732 atomic_inc(&histBuffer[pixels.s0 * HIST_REDUNDANCY + threadOffset]);
733 atomic_inc(&histBuffer[pixels.s1 * HIST_REDUNDANCY + threadOffset]);
734 atomic_inc(&histBuffer[pixels.s2 * HIST_REDUNDANCY + threadOffset]);
735 atomic_inc(&histBuffer[pixels.s3 * HIST_REDUNDANCY + threadOffset]);
736 atomic_inc(&histBuffer[pixels.s4 * HIST_REDUNDANCY + threadOffset]);
737 atomic_inc(&histBuffer[pixels.s5 * HIST_REDUNDANCY + threadOffset]);
738 atomic_inc(&histBuffer[pixels.s6 * HIST_REDUNDANCY + threadOffset]);
739 atomic_inc(&histBuffer[pixels.s7 * HIST_REDUNDANCY + threadOffset]);
740 }
741 })
742
743 // HistogramRect Kernel: Reduction
744 // only supports 4 channels
745 // each work group handles a single channel of a single histogram bin
746 KERNEL(__attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectAllChannelsReduction(int n, // unused pixel redundancy
747 __global uint *histBuffer, __global int *histResult) {
748 // declare variables
749 int channel = get_group_id(0) / HIST_SIZE;
750 int bin = get_group_id(0) % HIST_SIZE;
751 int value = 0;
752
753 // accumulate in register
754 for (uint i = get_local_id(0); i < HIST_REDUNDANCY; i += GROUP_SIZE) {
755 value += histBuffer[channel * HIST_SIZE * HIST_REDUNDANCY + bin * HIST_REDUNDANCY + i];
756 }
757
758 // reduction in local memory
759 __local int localHist[GROUP_SIZE];
760 localHist[get_local_id(0)] = value;
761 barrier(CLK_LOCAL_MEM_FENCE);
762 for (int stride = GROUP_SIZE / 2; stride >= 1; stride /= 2) {
763 if (get_local_id(0) < stride) {
764 value = localHist[get_local_id(0) + stride];
765 }
766 barrier(CLK_LOCAL_MEM_FENCE);
767 if (get_local_id(0) < stride) {
768 localHist[get_local_id(0)] += value;
769 }
770 barrier(CLK_LOCAL_MEM_FENCE);
771 }
772
773 // write reduction to final result
774 if (get_local_id(0) == 0) {
775 histResult[get_group_id(0)] = localHist[0];
776 }
777 } // kernel_HistogramRectAllChannels
778 )
779
780 KERNEL(
781 // NUM_CHANNELS = 1
782 __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_HistogramRectOneChannelReduction(int n, // unused pixel redundancy
783 __global uint *histBuffer, __global int *histResult) {
784 // declare variables
785 // int channel = get_group_id(0)/HIST_SIZE;
786 int bin = get_group_id(0) % HIST_SIZE;
787 int value = 0;
788
789 // accumulate in register
790 for (int i = get_local_id(0); i < HIST_REDUNDANCY; i += GROUP_SIZE) {
791 value += histBuffer[bin * HIST_REDUNDANCY + i];
792 }
793
794 // reduction in local memory
795 __local int localHist[GROUP_SIZE];
796 localHist[get_local_id(0)] = value;
797 barrier(CLK_LOCAL_MEM_FENCE);
798 for (int stride = GROUP_SIZE / 2; stride >= 1; stride /= 2) {
799 if (get_local_id(0) < stride) {
800 value = localHist[get_local_id(0) + stride];
801 }
802 barrier(CLK_LOCAL_MEM_FENCE);
803 if (get_local_id(0) < stride) {
804 localHist[get_local_id(0)] += value;
805 }
806 barrier(CLK_LOCAL_MEM_FENCE);
807 }
808
809 // write reduction to final result
810 if (get_local_id(0) == 0) {
811 histResult[get_group_id(0)] = localHist[0];
812 }
813 } // kernel_HistogramRectOneChannelReduction
814 )
815
816 // ThresholdRectToPix Kernel
817 // only supports 4 channels
818 // imageData is input image (24-bits/pixel)
819 // pix is output image (1-bit/pixel)
820 KERNEL(
821\n #define CHAR_VEC_WIDTH 4 \n
822\n #define PIXELS_PER_WORD 32 \n
823\n #define PIXELS_PER_BURST 8 \n
824\n #define BURSTS_PER_WORD (PIXELS_PER_WORD)/(PIXELS_PER_BURST) \n
825\n typedef union {
826 uchar s[PIXELS_PER_BURST * NUM_CHANNELS];
827 uchar4 v[(PIXELS_PER_BURST * NUM_CHANNELS) / CHAR_VEC_WIDTH];
828 } charVec;
829
830 __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_ThresholdRectToPix(__global const uchar4 *imageData, int height, int width,
831 int wpl, // words per line
832 __global int *thresholds, __global int *hi_values, __global int *pix) {
833 // declare variables
834 int pThresholds[NUM_CHANNELS];
835 int pHi_Values[NUM_CHANNELS];
836 for (int i = 0; i < NUM_CHANNELS; i++) {
837 pThresholds[i] = thresholds[i];
838 pHi_Values[i] = hi_values[i];
839 }
840
841 // for each word (32 pixels) in output image
842 for (uint w = get_global_id(0); w < wpl * height; w += get_global_size(0)) {
843 unsigned int word = 0; // all bits start at zero
844 // for each burst in word
845 for (int b = 0; b < BURSTS_PER_WORD; b++) {
846 // load burst
847 charVec pixels;
848 int offset = (w / wpl) * width;
849 offset += (w % wpl) * PIXELS_PER_WORD;
850 offset += b * PIXELS_PER_BURST;
851
852 for (int i = 0; i < PIXELS_PER_BURST; ++i)
853 pixels.v[i] = imageData[offset + i];
854
855 // for each pixel in burst
856 for (int p = 0; p < PIXELS_PER_BURST; p++) {
857 for (int c = 0; c < NUM_CHANNELS; c++) {
858 unsigned char pixChan = pixels.s[p * NUM_CHANNELS + c];
859 if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) {
860 const uint kTopBit = 0x80000000;
861 word |= (kTopBit >> ((b * PIXELS_PER_BURST + p) & 31));
862 }
863 }
864 }
865 }
866 pix[w] = word;
867 }
868 }
869
870\n #define CHAR_VEC_WIDTH 8 \n
871\n #define PIXELS_PER_WORD 32 \n
872\n #define PIXELS_PER_BURST 8 \n
873\n #define BURSTS_PER_WORD (PIXELS_PER_WORD) / (PIXELS_PER_BURST) \n
874\n typedef union {
875 uchar s[PIXELS_PER_BURST * 1];
876 uchar8 v[(PIXELS_PER_BURST * 1) / CHAR_VEC_WIDTH];
877 } charVec1;
878
879 __attribute__((reqd_work_group_size(256, 1, 1))) __kernel void kernel_ThresholdRectToPix_OneChan(__global const uchar8 *imageData, int height, int width,
880 int wpl, // words per line of output image
881 __global int *thresholds, __global int *hi_values, __global int *pix) {
882 // declare variables
883 int pThresholds[1];
884 int pHi_Values[1];
885 for (int i = 0; i < 1; i++) {
886 pThresholds[i] = thresholds[i];
887 pHi_Values[i] = hi_values[i];
888 }
889
890 // for each word (32 pixels) in output image
891 for (uint w = get_global_id(0); w < wpl * height; w += get_global_size(0)) {
892 unsigned int word = 0; // all bits start at zero
893
894 // for each burst in word
895 for (int b = 0; b < BURSTS_PER_WORD; b++) {
896 // load burst
897 charVec1 pixels;
898 // for each char8 in burst
899 pixels.v[0] = imageData[w * BURSTS_PER_WORD + b + 0];
900
901 // for each pixel in burst
902 for (int p = 0; p < PIXELS_PER_BURST; p++) {
903 //int littleEndianIdx = p ^ 3;
904 //int bigEndianIdx = p;
905 int idx =
906\n #ifdef __ENDIAN_LITTLE__\n
907 p ^
908 3;
909 \n #else \n
910 p;
911 \n #endif \n unsigned char pixChan = pixels.s[idx];
912 if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) {
913 const uint kTopBit = 0x80000000;
914 word |= (kTopBit >> ((b * PIXELS_PER_BURST + p) & 31));
915 }
916 }
917 }
918 pix[w] = word;
919 }
920 })
921
922 ; // close char*
923
924# endif // USE_EXTERNAL_KERNEL
925#endif // TESSERACT_OPENCL_OCLKERNELS_H_
926/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
#define __attribute__(attr)
Definition: errcode.h:35
#define KERNEL(...)
Definition: oclkernels.h:15
unsigned char uchar
Definition: utfdef.h:8
unsigned int uint
Definition: utfdef.h:10
int value
const char * p
const
Definition: upload.py:413