Changeset 29592 for trunk/psLib/src/fft/psImageFFT.c
- Timestamp:
- Oct 28, 2010, 5:37:34 PM (16 years ago)
- File:
-
- 1 edited
-
trunk/psLib/src/fft/psImageFFT.c (modified) (5 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/psLib/src/fft/psImageFFT.c
r29014 r29592 492 492 FFTW_UNLOCK; 493 493 494 #if 0 495 { 496 // Use this for inspecting the result of the fft XXX : K & I are backwards... 497 psImage *testKr = psImageAlloc(fftCols, fftRows, PS_TYPE_F32); 498 psImage *testKi = psImageAlloc(fftCols, fftRows, PS_TYPE_F32); 499 psImage *testIr = psImageAlloc(fftCols, fftRows, PS_TYPE_F32); 500 psImage *testIi = psImageAlloc(fftCols, fftRows, PS_TYPE_F32); 501 for (int iy = 0; iy < fftRows; iy++) { 502 for (int ix = 0; ix < fftCols; ix++) { 503 #if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) 504 testKr->data.F32[iy][ix] = creal(fft[ix + iy*fftCols]); 505 testKi->data.F32[iy][ix] = cimag(fft[ix + iy*fftCols]); 506 testIr->data.F32[iy][ix] = creal(fft[ix + iy*fftCols + fftPixels]); 507 testIi->data.F32[iy][ix] = cimag(fft[ix + iy*fftCols + fftPixels]); 508 #else 509 testKr->data.F32[iy][ix] = fft[ix + iy*fftCols][0]; 510 testKi->data.F32[iy][ix] = fft[ix + iy*fftCols][1]; 511 testIr->data.F32[iy][ix] = fft[ix + iy*fftCols + fftPixels][0]; 512 testIi->data.F32[iy][ix] = fft[ix + iy*fftCols + fftPixels][1]; 513 #endif 514 } 515 } 516 fprintf (stderr, "generated test images\n"); 517 fprintf (stderr, "please inspect\n"); 518 psFree(testKr); 519 psFree(testKi); 520 psFree(testIr); 521 psFree(testIi); 522 } 523 #endif 524 494 525 // Multiply the two transforms 495 526 for (int i = 0, j = fftPixels; i < fftPixels; i++, j++) { … … 534 565 } 535 566 536 void psKernelFFTFree(psKernelFFT *kernel) { 537 538 if (!kernel->fft) return; 539 540 bool threaded = psThreadPoolSize(); // Are we running threaded? 541 542 FFTW_LOCK; 543 fftwf_free(kernel->fft); 544 FFTW_UNLOCK; 545 546 return; 547 } 548 549 psKernelFFT *psKernelFFTAlloc(const psKernel *input) { 550 551 psKernelFFT *kernel = psAlloc(sizeof(psKernelFFT)); 552 psMemSetDeallocator(kernel, (psFreeFunc)psKernelFFTFree); 553 554 kernel->fft = NULL; 555 556 kernel->xMin = input->xMin; 557 kernel->xMax = input->xMax; 558 kernel->yMin = input->yMin; 559 kernel->yMax = input->yMax; 560 561 kernel->paddedCols = 0; 562 kernel->paddedRows = 0; 563 564 return kernel; 565 } 566 567 // generate the FFTed kernel image appropriate to be used for convolution of the given input image 568 psKernelFFT *psImageConvolveKernelInit(const psImage *in, const psKernel *kernel) 567 psImage *psImageConvolveFFTwithWindow(psImage *out, const psImage *in, const psImage *mask, psImageMaskType maskVal, const psKernel *kernel) 569 568 { 570 569 PS_ASSERT_IMAGE_NON_NULL(in, NULL); 571 570 PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL); 572 571 PS_ASSERT_KERNEL_NON_NULL(kernel, NULL); 572 if (mask) { 573 PS_ASSERT_IMAGE_NON_NULL(mask, NULL); 574 PS_ASSERT_IMAGE_TYPE(mask, PS_TYPE_IMAGE_MASK, NULL); 575 PS_ASSERT_IMAGES_SIZE_EQUAL(mask, in, NULL); 576 } 573 577 574 578 bool threaded = psThreadPoolSize(); // Are we running threaded? … … 577 581 int xMin = kernel->xMin, xMax = kernel->xMax, yMin = kernel->yMin, yMax = kernel->yMax; // Kernel sizes 578 582 579 // Need to pad the kernel (and later the input image)to protect from wrap-around effects583 // Need to pad the input image to protect from wrap-around effects 580 584 if (xMax - xMin > numCols || yMax - yMin > numRows) { 581 585 // Cannot pad the image if the kernel is larger. … … 607 611 int numPadded = paddedCols * paddedRows; // Number of pixels in padded image 608 612 609 // Create data array containing the padded kernel 610 FFTW_LOCK; 611 psF32 *data = fftwf_malloc(numPadded * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Data for FFTW 612 FFTW_UNLOCK; 613 614 // Generate the padded kernel image. We could generate the padded kernel image using 615 // memcpy, but by going pixel by pixel we can apply the normalisation that corrects for the 616 // FFT renormalisation. By applying it to the kernel here, we save applying it to the 617 // output image(s). 618 619 // copy kernel into data array 613 // Create data array containing the padded image and padded kernel 614 FFTW_LOCK; 615 psF32 *data = fftwf_malloc(2 * numPadded * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Data for FFTW 616 FFTW_UNLOCK; 620 617 psF32 *dataPtr = data; // Pointer into FFTW data 618 psF32 **imageData = in->data.F32; // Pointer into image data 619 620 // Image part of data array 621 size_t goodBytes = numCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes per image row 622 size_t padBytes = (paddedCols - numCols) * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes to pad 623 for (int y = 0; y < numRows; y++, dataPtr += paddedCols, imageData++) { 624 memcpy(dataPtr, *imageData, goodBytes); 625 memset(dataPtr + numCols, 0, padBytes); 626 } 627 memset(dataPtr, 0, (paddedRows - numRows) * paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); 628 629 #if 0 630 { 631 // Use this for inspecting the result of copying the image 632 psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32); 633 psFree(test->p_rawDataBuffer); 634 test->p_rawDataBuffer = data; 635 test->data.V[0] = test->p_rawDataBuffer; 636 for (int y = 1; y < paddedRows; y++) { 637 test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] + 638 paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); 639 } 640 // View image here 641 test->p_rawDataBuffer = NULL; 642 psFree(test); 643 } 644 #endif 645 646 // Kernel part of data array 647 dataPtr = data + numPadded; // Reset to kernel image location 621 648 float norm = 1.0 / (float)(paddedRows * paddedCols); // Normalisation to correct for FFT 622 649 // We could generate the padded kernel image using memcpy, but by going pixel by pixel we can apply the 650 // normalisation that corrects for the FFT renormalisation. By applying it to the kernel here, we save 651 // applying it to the entire output image. 623 652 int xNegMin = PS_MIN(-1, xMin), xNegMax = PS_MIN(-1, xMax); // Min and max for x when negative 624 653 int xPosMin = PS_MAX(0, xMin), xPosMax = PS_MAX(0, xMax); // Min and max for x when positive … … 665 694 psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32); 666 695 psFree(test->p_rawDataBuffer); 696 test->p_rawDataBuffer = &data[numPadded]; 697 test->data.V[0] = test->p_rawDataBuffer; 698 for (int y = 1; y < paddedRows; y++) { 699 test->data.V[y] = (psPtr)((int8_t *)test->data.V[y - 1] + 700 paddedCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); 701 } 702 // View image here 703 test->p_rawDataBuffer = NULL; 704 psFree(test); 705 } 706 #endif 707 708 // Mask bad pixels (which may be NANs), lest they infect everything 709 if (mask && maskVal) { 710 for (int y = 0; y < numRows; y++) { 711 for (int x = 0; x < numCols; x++) { 712 if (mask->data.PS_TYPE_IMAGE_MASK_DATA[y][x] & maskVal) { 713 data[x + paddedCols * y] = 0; 714 } 715 } 716 } 717 } 718 719 // Do the forward FFT 720 // Note that the FFT images have different size from the input 721 FFTW_LOCK; 722 fftwf_complex *fft = fftwf_malloc(2 * (paddedCols/2 + 1) * paddedRows * sizeof(fftwf_complex)); // FFT 723 FFTW_UNLOCK; 724 int size[] = { paddedRows, paddedCols }; // Size of transforms 725 int fftCols = paddedCols/2 + 1, fftRows = paddedRows; // Size of FFT images 726 int fftPixels = fftCols * fftRows; // Number of pixels in FFT image 727 728 FFTW_LOCK; 729 fftwf_plan forward = fftwf_plan_many_dft_r2c(2, size, 2, data, NULL, 1, paddedCols * paddedRows, 730 fft, NULL, 1, fftPixels, FFTW_PLAN_RIGOR); 731 FFTW_UNLOCK; 732 733 fftwf_execute(forward); 734 735 FFTW_LOCK; 736 fftwf_destroy_plan(forward); 737 FFTW_UNLOCK; 738 739 // apply a window function: 740 psVector *xWindow = psVectorAlloc(fftCols, PS_TYPE_F32); 741 psVector *yWindow = psVectorAlloc(fftRows, PS_TYPE_F32); 742 743 // not sure what window function to use. trying a simple Gaussian. 744 // In the x-direction, the window goes from ~1 @ x = 0 to something low at x = fftCols 745 // In the y direction, the window goes to a small value at y = fftRows / 2, and back up 746 for (int i = 0; i < fftCols; i++) { 747 float xNorm = 2.0 * i / (float) fftCols; // sigma = 0.5 748 xWindow->data.F32[i] = exp(-0.5*xNorm*xNorm); 749 } 750 for (int i = 0; i < fftRows / 2 + 1; i++) { 751 float xNorm = 2.0 * i / (float) (fftRows / 2.0); // sigma = 0.5 752 yWindow->data.F32[i] = exp(-0.5*xNorm*xNorm); 753 yWindow->data.F32[fftRows - 1 - i] = yWindow->data.F32[i]; 754 } 755 756 for (int iy = 0; iy < fftRows; iy++) { 757 float yValue = yWindow->data.F32[iy]; 758 for (int ix = 0; ix < fftCols; ix++) { 759 float xValue = xWindow->data.F32[ix]; 760 #if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) 761 fft[ix + iy*fftCols + fftPixels] *= xValue * yValue; // kernel element (real + I*imaginary) 762 #else 763 fft[ix + iy*fftCols + fftPixels][0] *= xValue * yValue; 764 fft[ix + iy*fftCols + fftPixels][1] *= xValue * yValue; 765 #endif 766 } 767 } 768 769 #if 0 770 { 771 // Use this for inspecting the result of the fft XXX : K & I are backwards... 772 psImage *testKr = psImageAlloc(fftCols, fftRows, PS_TYPE_F32); 773 psImage *testKi = psImageAlloc(fftCols, fftRows, PS_TYPE_F32); 774 psImage *testIr = psImageAlloc(fftCols, fftRows, PS_TYPE_F32); 775 psImage *testIi = psImageAlloc(fftCols, fftRows, PS_TYPE_F32); 776 for (int iy = 0; iy < fftRows; iy++) { 777 for (int ix = 0; ix < fftCols; ix++) { 778 #if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) 779 testIr->data.F32[iy][ix] = creal(fft[ix + iy*fftCols]); 780 testIi->data.F32[iy][ix] = cimag(fft[ix + iy*fftCols]); 781 testKr->data.F32[iy][ix] = creal(fft[ix + iy*fftCols + fftPixels]); 782 testKi->data.F32[iy][ix] = cimag(fft[ix + iy*fftCols + fftPixels]); 783 #else 784 testIr->data.F32[iy][ix] = fft[ix + iy*fftCols][0]; 785 testIi->data.F32[iy][ix] = fft[ix + iy*fftCols][1]; 786 testKr->data.F32[iy][ix] = fft[ix + iy*fftCols + fftPixels][0]; 787 testKi->data.F32[iy][ix] = fft[ix + iy*fftCols + fftPixels][1]; 788 #endif 789 } 790 } 791 fprintf (stderr, "generated test images\n"); 792 fprintf (stderr, "please inspect\n"); 793 psFree(testKr); 794 psFree(testKi); 795 psFree(testIr); 796 psFree(testIi); 797 } 798 #endif 799 800 // Multiply the two transforms 801 for (int i = 0, j = fftPixels; i < fftPixels; i++, j++) { 802 // (a + bi) * (c + di) = (ac - bd) + (bc + ad)i 803 #if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined(complex) && defined(I) 804 // C99 complex support 805 fft[i] *= fft[j]; 806 #else 807 // FFTW's backup complex support 808 float imageReal = fft[i][0], imageImag = fft[i][1]; 809 float kernelReal = fft[j][0], kernelImag = fft[j][1]; 810 fft[i][0] = imageReal * kernelReal - imageImag * kernelImag; 811 fft[i][1] = imageImag * kernelReal + imageReal * kernelImag; 812 #endif 813 } 814 815 // Do the backward FFT 816 FFTW_LOCK; 817 fftwf_plan backward = fftwf_plan_dft_c2r_2d(paddedRows, paddedCols, fft, data, FFTW_PLAN_RIGOR); 818 FFTW_UNLOCK; 819 820 fftwf_execute(backward); 821 822 FFTW_LOCK; 823 fftwf_destroy_plan(backward); 824 fftwf_free(fft); 825 FFTW_UNLOCK; 826 827 // Copy into the target, without the padding 828 out = psImageRecycle(out, numCols, numRows, PS_TYPE_F32); 829 psF32 **outData = out->data.F32; // Pointer into output 830 dataPtr = data; // Reset to start 831 for (int y = 0; y < numRows; y++, outData++, dataPtr += paddedCols) { 832 memcpy(*outData, dataPtr, goodBytes); 833 } 834 835 FFTW_LOCK; 836 fftwf_free(data); 837 FFTW_UNLOCK; 838 839 return out; 840 } 841 842 void psKernelFFTFree(psKernelFFT *kernel) { 843 844 if (!kernel->fft) return; 845 846 bool threaded = psThreadPoolSize(); // Are we running threaded? 847 848 FFTW_LOCK; 849 fftwf_free(kernel->fft); 850 FFTW_UNLOCK; 851 852 return; 853 } 854 855 psKernelFFT *psKernelFFTAlloc(const psKernel *input) { 856 857 psKernelFFT *kernel = psAlloc(sizeof(psKernelFFT)); 858 psMemSetDeallocator(kernel, (psFreeFunc)psKernelFFTFree); 859 860 kernel->fft = NULL; 861 862 kernel->xMin = input->xMin; 863 kernel->xMax = input->xMax; 864 kernel->yMin = input->yMin; 865 kernel->yMax = input->yMax; 866 867 kernel->paddedCols = 0; 868 kernel->paddedRows = 0; 869 870 return kernel; 871 } 872 873 // generate the FFTed kernel image appropriate to be used for convolution of the given input image 874 psKernelFFT *psImageConvolveKernelInit(const psImage *in, const psKernel *kernel) 875 { 876 PS_ASSERT_IMAGE_NON_NULL(in, NULL); 877 PS_ASSERT_IMAGE_TYPE(in, PS_TYPE_F32, NULL); 878 PS_ASSERT_KERNEL_NON_NULL(kernel, NULL); 879 880 bool threaded = psThreadPoolSize(); // Are we running threaded? 881 882 int numCols = in->numCols, numRows = in->numRows; // Size of image 883 int xMin = kernel->xMin, xMax = kernel->xMax, yMin = kernel->yMin, yMax = kernel->yMax; // Kernel sizes 884 885 // Need to pad the kernel (and later the input image) to protect from wrap-around effects 886 if (xMax - xMin > numCols || yMax - yMin > numRows) { 887 // Cannot pad the image if the kernel is larger. 888 psError(PS_ERR_BAD_PARAMETER_SIZE, true, 889 _("Kernel cannot extend further than input image size (%dx%d vs %dx%d)."), 890 xMax, yMax, numCols, numRows); 891 return NULL; 892 } 893 894 int paddedCols = numCols + PS_MAX(-xMin, xMax); // Number of columns in padded image 895 int paddedRows = numRows + PS_MAX(-yMin, yMax); // Number of rows in padded image 896 897 #if CONVOLVE_FFT_BINARY_SIZE 898 // Make the size an integer power of two 899 { 900 int twoCols, twoRows; // Size that is a factor of two 901 for (twoCols = 1; twoCols <= paddedCols && twoCols < INT_MAX - 1; twoCols <<= 1); // No action 902 for (twoRows = 1; twoRows <= paddedRows && twoRows < INT_MAX - 1; twoRows <<= 1); // No action 903 if (paddedCols > twoCols || paddedRows > twoRows) { 904 psError(PS_ERR_BAD_PARAMETER_SIZE, true, "Unable to scale size (%dx%d) up to factor of two", 905 paddedCols, paddedRows); 906 return NULL; 907 } 908 paddedCols = twoCols; 909 paddedRows = twoRows; 910 } 911 #endif 912 913 int numPadded = paddedCols * paddedRows; // Number of pixels in padded image 914 915 // Create data array containing the padded kernel 916 FFTW_LOCK; 917 psF32 *data = fftwf_malloc(numPadded * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); // Data for FFTW 918 FFTW_UNLOCK; 919 920 // Generate the padded kernel image. We could generate the padded kernel image using 921 // memcpy, but by going pixel by pixel we can apply the normalisation that corrects for the 922 // FFT renormalisation. By applying it to the kernel here, we save applying it to the 923 // output image(s). 924 925 // copy kernel into data array 926 psF32 *dataPtr = data; // Pointer into FFTW data 927 float norm = 1.0 / (float)(paddedRows * paddedCols); // Normalisation to correct for FFT 928 929 int xNegMin = PS_MIN(-1, xMin), xNegMax = PS_MIN(-1, xMax); // Min and max for x when negative 930 int xPosMin = PS_MAX(0, xMin), xPosMax = PS_MAX(0, xMax); // Min and max for x when positive 931 int yNegMin = PS_MIN(-1, yMin), yNegMax = PS_MIN(-1, yMax); // Min and max for x when negative 932 int yPosMin = PS_MAX(0, yMin), yPosMax = PS_MAX(0, yMax); // Min and max for x when positive 933 int blankCols = xNegMin + paddedCols - xPosMax - 1; // Number of columns between kernel extrema 934 int blankRows = (yNegMin + paddedRows - yPosMax - 1) * paddedCols; // Rows between kernel extrema 935 size_t blankColBytes = blankCols * PSELEMTYPE_SIZEOF(PS_TYPE_F32); // Number of bytes in blankCols 936 for (int y = yPosMin; y <= yPosMax; y++) { 937 // y is positive 938 for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) { 939 // x is positive 940 *dataPtr = kernel->kernel[y][x] * norm; 941 } 942 // Columns between kernel extrema 943 memset(dataPtr, 0, blankColBytes); 944 dataPtr += blankCols; 945 for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) { 946 // x is negative 947 *dataPtr = kernel->kernel[y][x] * norm; 948 } 949 } 950 // Rows between kernel extrema 951 memset(dataPtr, 0, blankRows * PSELEMTYPE_SIZEOF(PS_TYPE_F32)); 952 dataPtr += blankRows; 953 for (int y = yNegMin; y <= yNegMax; y++) { 954 // y is negative 955 for (int x = xPosMin; x <= xPosMax; x++, dataPtr++) { 956 // x is positive 957 *dataPtr = kernel->kernel[y][x] * norm; 958 } 959 // Columns between kernel extrema 960 memset(dataPtr, 0, blankColBytes); 961 dataPtr += blankCols; 962 for (int x = xNegMin; x <= xNegMax; x++, dataPtr++) { 963 // x is negative 964 *dataPtr = kernel->kernel[y][x] * norm; 965 } 966 } 967 968 #if 0 969 { 970 // Use this for inspecting the result of copying the kernel 971 psImage *test = psImageAlloc(paddedCols, paddedRows, PS_TYPE_F32); 972 psFree(test->p_rawDataBuffer); 667 973 test->p_rawDataBuffer = data; 668 974 test->data.V[0] = test->p_rawDataBuffer;
Note:
See TracChangeset
for help on using the changeset viewer.
