- Timestamp:
- Aug 13, 2010, 12:19:38 PM (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/eam_branches/ipp-20100621/psLib/src/fft/psImageFFT.c
r21183 r28910 532 532 return out; 533 533 } 534 535 void psKernelFFTFree(psKernelFFT *kernel) { 536 537 if (!kernel->fft) return; 538 539 bool threaded = psThreadPoolSize(); // Are we running threaded? 540 541 FFTW_LOCK; 542 fftwf_free(kernel->fft); 543 FFTW_UNLOCK; 544 545 return; 546 } 547 548 psKernelFFT *psKernelFFTAlloc(const psKernel *input) { 549 550 psKernelFFT *kernel = psAlloc(sizeof(psKernelFFT)); 551 psMemSetDeallocator(kernel, (psFreeFunc)psKernelFFTFree); 552 553 kernel->fft = NULL; 554 555 kernel->xMin = input->xMin; 556 kernel->xMax = input->xMax; 557 kernel->yMin = input->yMin; 558 kernel->yMax = input->yMax; 559 560 kernel->paddedCols = 0; 561 kernel->paddedRows = 0; 562 563 return kernel; 564 } 565 566 // generate the FFTed kernel image appropriate to be used for convolution of the given input image 567 psKernelFFT *psImageConvolveKernelInit(const psImage *in, const psKernel *kernel) 568 { 569 PS_ASSERT_IMAGE_NON_NULL(in, NULL); 570 PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL); 571 PS_ASSERT_KERNEL_NON_NULL(kernel, NULL); 572 573 bool threaded = psThreadPoolSize(); // Are we running threaded? 574 575 int numCols = in->numCols, numRows = in->numRows; // Size of image 576 int xMin = kernel->xMin, xMax = kernel->xMax, yMin = kernel->yMin, yMax = kernel->yMax; // Kernel sizes 577 578 // Need to pad the kernel (and later the input image) to protect from wrap-around effects 579 if (xMax - xMin > numCols || yMax - yMin > numRows) { 580 // Cannot pad the image if the kernel is larger. 581 psError(PS_ERR_BAD_PARAMETER_SIZE, true, 582 _("Kernel cannot extend further than input image size (%dx%d vs %dx%d)."), 583 xMax, yMax, numCols, numRows); 584 return NULL; 585 } 586 587 int paddedCols = numCols + PS_MAX(-xMin, xMax); // Number of columns in padded image 588 int paddedRows = numRows + PS_MAX(-yMin, yMax); // Number of rows in padded image 589 590 #if CONVOLVE_FFT_BINARY_SIZE 591 // Make the size an integer power of two 592 { 593 int twoCols, twoRows; // Size that is a factor of two 594 for (twoCols = 1; twoCols <= paddedCols && twoCols < INT_MAX - 1; twoCols <<= 1); // No action 595 for (twoRows = 1; twoRows <= paddedRows && twoRows < INT_MAX - 1; twoRows <<= 1); // No action 596 if (paddedCols > twoCols || paddedRows > twoRows) { 597 psError(PS_ERR_BAD_PARAMETER_SIZE, true, "Unable to scale size (%dx%d) up to factor of two", 598 paddedCols, paddedRows); 599 return NULL; 600 } 601 paddedCols = twoCols; 602 paddedRows = twoRows; 603 } 604 #endif 605 606 int numPadded = paddedCols * paddedRows; // Number of pixels in padded image 607 608 // Create data array containing the padded kernel 609 FFTW_LOCK; 610 psF32 *data = fftwf_malloc(numPadded * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Data for FFTW 611 FFTW_UNLOCK; 612 613 // Generate the padded kernel image. We could generate the padded kernel image using 614 // memcpy, but by going pixel by pixel we can apply the normalisation that corrects for the 615 // FFT renormalisation. By applying it to the kernel here, we save applying it to the 616 // output image(s). 617 618 // copy kernel into data array 619 psF32 *dataPtr = data; // Pointer into FFTW data 620 float norm = 1.0 / (float)(paddedRows * paddedCols); // Normalisation to correct for FFT 621 622 int xNegMin = PS_MIN(-1, xMin), xNegMax = PS_MIN(-1, xMax); // Min and max for x when negative 623 int xPosMin = PS_MAX(0, xMin), xPosMax = PS_MAX(0, xMax); // Min and max for x when positive 624 int yNegMin = PS_MIN(-1, yMin), yNegMax = PS_MIN(-1, yMax); // Min and max for x when negative 625 int yPosMin = PS_MAX(0, yMin), yPosMax = PS_MAX(0, yMax); // Min and max for x when positive 626 int blankCols = xNegMin + paddedCols - xPosMax - 1; // Number of columns between kernel extrema 627 int blankRows = (yNegMin + paddedRows - yPosMax - 1) * paddedCols; // Rows between kernel extrema 628 size_t blankColBytes = blankCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes in blankCols 629 for (int y = yPosMin; y <= yPosMax; y++) { 630 // y is positive 631 for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) { 632 // x is positive 633 *dataPtr = kernel->kernel[y][x] * norm; 634 } 635 // Columns between kernel extrema 636 memset(dataPtr, 0, blankColBytes); 637 dataPtr += blankCols; 638 for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) { 639 // x is negative 640 *dataPtr = kernel->kernel[y][x] * norm; 641 } 642 } 643 // Rows between kernel extrema 644 memset(dataPtr, 0, blankRows * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); 645 dataPtr += blankRows; 646 for (int y = yNegMin; y <= yNegMax; y++) { 647 // y is negative 648 for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) { 649 // x is positive 650 *dataPtr = kernel->kernel[y][x] * norm; 651 } 652 // Columns between kernel extrema 653 memset(dataPtr, 0, blankColBytes); 654 dataPtr += blankCols; 655 for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) { 656 // x is negative 657 *dataPtr = kernel->kernel[y][x] * norm; 658 } 659 } 660 661 #if 0 662 { 663 // Use this for inspecting the result of copying the kernel 664 psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32); 665 psFree(test->p_rawDataBuffer); 666 test->p_rawDataBuffer = data; 667 test->data.V[0] = test->p_rawDataBuffer; 668 for (int y = 1; y < paddedRows; y++) { 669 test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] + paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); 670 } 671 // View image here or write to disk 672 test->p_rawDataBuffer = NULL; 673 psFree(test); 674 } 675 #endif 676 677 // Do the forward FFT 678 // Note that the FFT images have different size from the input 679 FFTW_LOCK; 680 fftwf_complex *fft = fftwf_malloc((paddedCols/2 + 1) * paddedRows * sizeof(fftwf_complex)); // FFT 681 FFTW_UNLOCK; 682 683 FFTW_LOCK; 684 fftwf_plan plan = fftwf_plan_dft_r2c_2d(paddedRows, paddedCols, data, fft, FFTW_PLAN_RIGOR); 685 FFTW_UNLOCK; 686 687 fftwf_execute(plan); 688 689 FFTW_LOCK; 690 fftwf_destroy_plan(plan); 691 fftwf_free(data); 692 FFTW_UNLOCK; 693 694 psKernelFFT *output = psKernelFFTAlloc(kernel); 695 output->fft = fft; 696 output->paddedCols = paddedCols; 697 output->paddedRows = paddedRows; 698 699 return output; 700 } 701 702 psImage *psImageConvolveKernel(psImage *out, const psImage *in, const psImage *mask, psImageMaskType maskVal, const psKernelFFT *kernel) 703 { 704 PS_ASSERT_IMAGE_NON_NULL(in, NULL); 705 PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL); 706 PS_ASSERT_PTR_NON_NULL(kernel, NULL); 707 PS_ASSERT_PTR_NON_NULL(kernel->fft, NULL); 708 if (mask) { 709 PS_ASSERT_IMAGE_NON_NULL(mask, NULL); 710 PS_ASSERT_IMAGE_TYPE(mask, PS_TYPE_IMAGE_MASK, NULL); 711 PS_ASSERT_IMAGES_SIZE_EQUAL(mask, in, NULL); 712 } 713 714 bool threaded = psThreadPoolSize(); // Are we running threaded? 715 716 int numCols = in->numCols, numRows = in->numRows; // Size of image 717 int xMin = kernel->xMin, xMax = kernel->xMax, yMin = kernel->yMin, yMax = kernel->yMax; // Kernel sizes 718 719 // Need to pad the input image to protect from wrap-around effects 720 if (xMax - xMin > numCols || yMax - yMin > numRows) { 721 // Cannot pad the image if the kernel is larger. 722 psError(PS_ERR_BAD_PARAMETER_SIZE, true, _("Kernel cannot extend further than input image size (%dx%d vs %dx%d)."), xMax, yMax, numCols, numRows); 723 return NULL; 724 } 725 726 int paddedCols = numCols + PS_MAX(-xMin, xMax); // Number of columns in padded image 727 int paddedRows = numRows + PS_MAX(-yMin, yMax); // Number of rows in padded image 728 729 #if CONVOLVE_FFT_BINARY_SIZE 730 // Make the size an integer power of two 731 { 732 int twoCols, twoRows; // Size that is a factor of two 733 for (twoCols = 1; twoCols <= paddedCols && twoCols < INT_MAX - 1; twoCols <<= 1); // No action 734 for (twoRows = 1; twoRows <= paddedRows && twoRows < INT_MAX - 1; twoRows <<= 1); // No action 735 if (paddedCols > twoCols || paddedRows > twoRows) { 736 psError(PS_ERR_BAD_PARAMETER_SIZE, true, "Unable to scale size (%dx%d) up to factor of two", 737 paddedCols, paddedRows); 738 return NULL; 739 } 740 paddedCols = twoCols; 741 paddedRows = twoRows; 742 } 743 #endif 744 745 if (paddedCols != kernel->paddedCols) { 746 psError(PS_ERR_BAD_PARAMETER_SIZE, true, _("Image is inconsistent with allocated kernel")); 747 return NULL; 748 } 749 if (paddedRows != kernel->paddedRows) { 750 psError(PS_ERR_BAD_PARAMETER_SIZE, true, _("Image is inconsistent with allocated kernel")); 751 return NULL; 752 } 753 754 int numPadded = paddedCols * paddedRows; // Number of pixels in padded image 755 756 // Create data array containing the padded image 757 FFTW_LOCK; 758 psF32 *data = fftwf_malloc(numPadded * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Data for FFTW 759 FFTW_UNLOCK; 760 psF32 *dataPtr = data; // Pointer into FFTW data 761 psF32 **imageData = in->data.F32; // Pointer into image data 762 763 // Copy image into data array 764 size_t goodBytes = numCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes per image row 765 size_t padBytes = (paddedCols - numCols) * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes to pad 766 for (int y = 0; y < numRows; y++, dataPtr += paddedCols, imageData++) { 767 memcpy(dataPtr, *imageData, goodBytes); 768 memset(dataPtr + numCols, 0, padBytes); 769 } 770 memset(dataPtr, 0, (paddedRows - numRows) * paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); 771 772 #if 0 773 { 774 // Use this for inspecting the result of copying the image 775 psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32); 776 psFree(test->p_rawDataBuffer); 777 test->p_rawDataBuffer = data; 778 test->data.V[0] = test->p_rawDataBuffer; 779 for (int y = 1; y < paddedRows; y++) { 780 test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] + 781 paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); 782 } 783 // View image here 784 test->p_rawDataBuffer = NULL; 785 psFree(test); 786 } 787 #endif 788 789 // Mask bad pixels (which may be NANs), lest they infect everything 790 if (mask && maskVal) { 791 for (int y = 0; y < numRows; y++) { 792 for (int x = 0; x < numCols; x++) { 793 if (mask->data.PS_TYPE_IMAGE_MASK_DATA[y][x] & maskVal) { 794 data[x + paddedCols * y] = 0; 795 } 796 } 797 } 798 } 799 800 // Do the forward FFT 801 // Note that the FFT images have different size from the input 802 FFTW_LOCK; 803 fftwf_complex *fft = fftwf_malloc((paddedCols/2 + 1) * paddedRows * sizeof(fftwf_complex)); // FFT 804 FFTW_UNLOCK; 805 806 int fftCols = paddedCols/2 + 1, fftRows = paddedRows; // Size of FFT images 807 int fftPixels = fftCols * fftRows; // Number of pixels in FFT image 808 809 FFTW_LOCK; 810 fftwf_plan forward = fftwf_plan_dft_r2c_2d(paddedRows, paddedCols, data, fft, FFTW_PLAN_RIGOR); 811 FFTW_UNLOCK; 812 813 fftwf_execute(forward); 814 815 FFTW_LOCK; 816 fftwf_destroy_plan(forward); 817 FFTW_UNLOCK; 818 819 fftwf_complex *kernelFFT = kernel->fft; 820 821 // Multiply the two transforms 822 for (int i = 0; i < fftPixels; i++) { 823 // (a + bi) * (c + di) = (ac - bd) + (bc + ad)i 824 #if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) 825 // C99 complex support 826 fft[i] *= kernelFFT[i]; 827 #else 828 // FFTW's backup complex support 829 float imageReal = fft[i][0], imageImag = fft[i][1]; 830 float kernelReal = kernelFFT[i][0], kernelImag = kernelFFT[i][1]; 831 fft[i][0] = imageReal * kernelReal - imageImag * kernelImag; 832 fft[i][1] = imageImag * kernelReal + imageReal * kernelImag; 833 #endif 834 } 835 836 // Do the backward FFT 837 FFTW_LOCK; 838 fftwf_plan backward = fftwf_plan_dft_c2r_2d(paddedRows, paddedCols, fft, data, FFTW_PLAN_RIGOR); 839 FFTW_UNLOCK; 840 841 fftwf_execute(backward); 842 843 FFTW_LOCK; 844 fftwf_destroy_plan(backward); 845 fftwf_free(fft); 846 FFTW_UNLOCK; 847 848 // Copy into the target, without the padding 849 out = psImageRecycle(out, numCols, numRows, PS_TYPE_F32); 850 psF32 **outData = out->data.F32; // Pointer into output 851 dataPtr = data; // Reset to start 852 for (int y = 0; y < numRows; y++, outData++, dataPtr += paddedCols) { 853 memcpy(*outData, dataPtr, goodBytes); 854 } 855 856 FFTW_LOCK; 857 fftwf_free(data); 858 FFTW_UNLOCK; 859 860 return out; 861 }
Note:
See TracChangeset
for help on using the changeset viewer.
