#include #include #include #include #include "blur.h" void do_blur(pixel **out_image, pixel **in_image, int n_cols, int n_rows) { float filter_div = 16.0; float filter_div_rcp = 0.0; static float filter[3][3] = {{1.0, 2.0, 1.0}, {2.0, 4.0, 2.0}, {1.0, 2.0, 1.0}}; int x; int y; int i; int j; pixel vpix; int res_r; int res_g; int res_b; float tmp_min = 0.0; float tmp_max = 255.0; int tmp_x; int tmp_y; int tmp_r; int tmp_g; int tmp_b; int tmp_filter_index; __asm__ __volatile__("femms\n\t" "movd %1, %%mm0\n\t" "pfrcp %%mm0, %%mm1\n\t" "pfrcpit1 %%mm1, %%mm0\n\t" "pfrcpit2 %%mm1, %%mm0\n\t" "movd %%mm0, %0\n\t" "femms" : "=g" (filter_div_rcp) : "g" (filter_div) ); __asm__ __volatile__("femms"); __asm__ __volatile__("movd %0, %%mm4\n\t" "punpckldq %%mm4, %%mm4\n\t" "movd %1, %%mm3\n\t" "punpckldq %%mm3, %%mm3\n\t" : : "m" (tmp_min), "m" (tmp_max) ); for(y = 0; y < n_rows; y++) { for(x = 0; x < n_cols; x++) { __asm__ __volatile__("pxor %%mm7, %%mm7\n\t" "pxor %%mm6, %%mm6\n\t" : : ); for(j = 0; j < 3; j++) { for(i = 0; i < 3; i++) { tmp_x = x - (3>>1) + i; tmp_y = y - (3>>1) + j; if (tmp_x < 0) tmp_x = 0; if (tmp_x == n_cols) tmp_x = n_cols - 1; if (tmp_y < 0) tmp_y = 0; if (tmp_y == n_rows) tmp_y = n_rows - 1; vpix = in_image[tmp_y][tmp_x]; tmp_filter_index = i + 3 * j; tmp_r = PPM_GETR(vpix); tmp_g = PPM_GETG(vpix); tmp_b = PPM_GETB(vpix); __asm__ __volatile__("movq filter.0(,%3,4), %%mm5\n\t" "punpckldq %%mm5, %%mm5\n\t" "movd %0, %%mm0\n\t" "movd %1, %%mm1\n\t" "punpckldq %%mm1, %%mm0\n\t" "pi2fd %%mm0, %%mm0\n\t" "movd %2, %%mm1\n\t" "pi2fd %%mm1, %%mm1\n\t" "pfmul %%mm5, %%mm0\n\t" "pfadd %%mm0, %%mm7\n\t" "pfmul %%mm5, %%mm1\n\t" "pfadd %%mm1, %%mm6\n\t" : : "r" (tmp_r), "r" (tmp_g), "r" (tmp_b), "r" (tmp_filter_index), "r" (filter) ); } } __asm__ __volatile__("movd %3, %%mm5\n\t" "punpckldq %%mm5, %%mm5\n\t" "pfmul %%mm5, %%mm7\n\t" "pfmul %%mm5, %%mm6\n\t" "pfmax %%mm4, %%mm7\n\t" "pfmax %%mm4, %%mm6\n\t" "pfmin %%mm3, %%mm7\n\t" "pfmin %%mm3, %%mm6\n\t" "pf2id %%mm7, %%mm7\n\t" "pf2id %%mm6, %%mm6\n\t" "movd %%mm7, %0\n\t" "punpckhdq %%mm7, %%mm7\n\t" "movd %%mm7, %1\n\t" "movd %%mm6, %2\n\t" : "=g" (res_r), "=g" (res_g), "=g" (res_b) : "g" (filter_div_rcp) ); PPM_ASSIGN(out_image[y][x], (pixval) res_r, (pixval) res_g, (pixval) res_b); } } }