Browse Source

More texture load/store performance optimisations

rdb 9 years ago
parent
commit
62d0d8292e
1 changed files with 33 additions and 20 deletions
  1. 33 20
      panda/src/gobj/texture.cxx

+ 33 - 20
panda/src/gobj/texture.cxx

@@ -6005,10 +6005,11 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size,
     // Most common case: one byte per pixel, and the source image
     // Most common case: one byte per pixel, and the source image
     // maxval of 255.  No scaling is necessary.  Because this is such a common
     // maxval of 255.  No scaling is necessary.  Because this is such a common
     // case, we break it out per component for best performance.
     // case, we break it out per component for best performance.
+    const xel *array = pnmimage.get_array();
     switch (num_components) {
     switch (num_components) {
     case 1:
     case 1:
       for (int j = y_size-1; j >= 0; j--) {
       for (int j = y_size-1; j >= 0; j--) {
-        xel *row = pnmimage.row(j);
+        const xel *row = array + j * x_size;
         for (int i = 0; i < x_size; i++) {
         for (int i = 0; i < x_size; i++) {
           *p++ = (uchar)PPM_GETB(row[i]);
           *p++ = (uchar)PPM_GETB(row[i]);
         }
         }
@@ -6018,9 +6019,10 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size,
 
 
     case 2:
     case 2:
       if (img_has_alpha) {
       if (img_has_alpha) {
+        const xelval *alpha = pnmimage.get_alpha_array();
         for (int j = y_size-1; j >= 0; j--) {
         for (int j = y_size-1; j >= 0; j--) {
-          xel *row = pnmimage.row(j);
-          xelval *alpha_row = pnmimage.alpha_row(j);
+          const xel *row = array + j * x_size;
+          const xelval *alpha_row = alpha + j * x_size;
           for (int i = 0; i < x_size; i++) {
           for (int i = 0; i < x_size; i++) {
             *p++ = (uchar)PPM_GETB(row[i]);
             *p++ = (uchar)PPM_GETB(row[i]);
             *p++ = (uchar)alpha_row[i];
             *p++ = (uchar)alpha_row[i];
@@ -6029,7 +6031,7 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size,
         }
         }
       } else {
       } else {
         for (int j = y_size-1; j >= 0; j--) {
         for (int j = y_size-1; j >= 0; j--) {
-          xel *row = pnmimage.row(j);
+          const xel *row = array + j * x_size;
           for (int i = 0; i < x_size; i++) {
           for (int i = 0; i < x_size; i++) {
             *p++ = (uchar)PPM_GETB(row[i]);
             *p++ = (uchar)PPM_GETB(row[i]);
             *p++ = (uchar)255;
             *p++ = (uchar)255;
@@ -6041,7 +6043,7 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size,
 
 
     case 3:
     case 3:
       for (int j = y_size-1; j >= 0; j--) {
       for (int j = y_size-1; j >= 0; j--) {
-        xel *row = pnmimage.row(j);
+        const xel *row = array + j * x_size;
         for (int i = 0; i < x_size; i++) {
         for (int i = 0; i < x_size; i++) {
           *p++ = (uchar)PPM_GETB(row[i]);
           *p++ = (uchar)PPM_GETB(row[i]);
           *p++ = (uchar)PPM_GETG(row[i]);
           *p++ = (uchar)PPM_GETG(row[i]);
@@ -6053,9 +6055,10 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size,
 
 
     case 4:
     case 4:
       if (img_has_alpha) {
       if (img_has_alpha) {
+        const xelval *alpha = pnmimage.get_alpha_array();
         for (int j = y_size-1; j >= 0; j--) {
         for (int j = y_size-1; j >= 0; j--) {
-          xel *row = pnmimage.row(j);
-          xelval *alpha_row = pnmimage.alpha_row(j);
+          const xel *row = array + j * x_size;
+          const xelval *alpha_row = alpha + j * x_size;
           for (int i = 0; i < x_size; i++) {
           for (int i = 0; i < x_size; i++) {
             *p++ = (uchar)PPM_GETB(row[i]);
             *p++ = (uchar)PPM_GETB(row[i]);
             *p++ = (uchar)PPM_GETG(row[i]);
             *p++ = (uchar)PPM_GETG(row[i]);
@@ -6066,7 +6069,7 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size,
         }
         }
       } else {
       } else {
         for (int j = y_size-1; j >= 0; j--) {
         for (int j = y_size-1; j >= 0; j--) {
-          xel *row = pnmimage.row(j);
+          const xel *row = array + j * x_size;
           for (int i = 0; i < x_size; i++) {
           for (int i = 0; i < x_size; i++) {
             *p++ = (uchar)PPM_GETB(row[i]);
             *p++ = (uchar)PPM_GETB(row[i]);
             *p++ = (uchar)PPM_GETG(row[i]);
             *p++ = (uchar)PPM_GETG(row[i]);
@@ -6089,7 +6092,7 @@ convert_from_pnmimage(PTA_uchar &image, size_t page_size,
     for (int j = y_size-1; j >= 0; j--) {
     for (int j = y_size-1; j >= 0; j--) {
       for (int i = 0; i < x_size; i++) {
       for (int i = 0; i < x_size; i++) {
         if (is_grayscale) {
         if (is_grayscale) {
-           store_unscaled_short(p, pnmimage.get_gray_val(i, j));
+          store_unscaled_short(p, pnmimage.get_gray_val(i, j));
         } else {
         } else {
           store_unscaled_short(p, pnmimage.get_blue_val(i, j));
           store_unscaled_short(p, pnmimage.get_blue_val(i, j));
           store_unscaled_short(p, pnmimage.get_green_val(i, j));
           store_unscaled_short(p, pnmimage.get_green_val(i, j));
@@ -6260,11 +6263,13 @@ convert_to_pnmimage(PNMImage &pnmimage, int x_size, int y_size,
   const unsigned char *p = &image[idx];
   const unsigned char *p = &image[idx];
 
 
   if (component_width == 1) {
   if (component_width == 1) {
+    xel *array = pnmimage.get_array();
     if (is_grayscale) {
     if (is_grayscale) {
       if (has_alpha) {
       if (has_alpha) {
+        xelval *alpha = pnmimage.get_alpha_array();
         for (int j = y_size-1; j >= 0; j--) {
         for (int j = y_size-1; j >= 0; j--) {
-          xel *row = pnmimage.row(j);
-          xelval *alpha_row = pnmimage.alpha_row(j);
+          xel *row = array + j * x_size;
+          xelval *alpha_row = alpha + j * x_size;
           for (int i = 0; i < x_size; i++) {
           for (int i = 0; i < x_size; i++) {
             PPM_PUTB(row[i], *p++);
             PPM_PUTB(row[i], *p++);
             alpha_row[i] = *p++;
             alpha_row[i] = *p++;
@@ -6272,7 +6277,7 @@ convert_to_pnmimage(PNMImage &pnmimage, int x_size, int y_size,
         }
         }
       } else {
       } else {
         for (int j = y_size-1; j >= 0; j--) {
         for (int j = y_size-1; j >= 0; j--) {
-          xel *row = pnmimage.row(j);
+          xel *row = array + j * x_size;
           for (int i = 0; i < x_size; i++) {
           for (int i = 0; i < x_size; i++) {
             PPM_PUTB(row[i], *p++);
             PPM_PUTB(row[i], *p++);
           }
           }
@@ -6280,9 +6285,10 @@ convert_to_pnmimage(PNMImage &pnmimage, int x_size, int y_size,
       }
       }
     } else {
     } else {
       if (has_alpha) {
       if (has_alpha) {
+        xelval *alpha = pnmimage.get_alpha_array();
         for (int j = y_size-1; j >= 0; j--) {
         for (int j = y_size-1; j >= 0; j--) {
-          xel *row = pnmimage.row(j);
-          xelval *alpha_row = pnmimage.alpha_row(j);
+          xel *row = array + j * x_size;
+          xelval *alpha_row = alpha + j * x_size;
           for (int i = 0; i < x_size; i++) {
           for (int i = 0; i < x_size; i++) {
             PPM_PUTB(row[i], *p++);
             PPM_PUTB(row[i], *p++);
             PPM_PUTG(row[i], *p++);
             PPM_PUTG(row[i], *p++);
@@ -6292,7 +6298,7 @@ convert_to_pnmimage(PNMImage &pnmimage, int x_size, int y_size,
         }
         }
       } else {
       } else {
         for (int j = y_size-1; j >= 0; j--) {
         for (int j = y_size-1; j >= 0; j--) {
-          xel *row = pnmimage.row(j);
+          xel *row = array + j * x_size;
           for (int i = 0; i < x_size; i++) {
           for (int i = 0; i < x_size; i++) {
             PPM_PUTB(row[i], *p++);
             PPM_PUTB(row[i], *p++);
             PPM_PUTG(row[i], *p++);
             PPM_PUTG(row[i], *p++);
@@ -7028,13 +7034,20 @@ compare_images(const PNMImage &a, const PNMImage &b) {
   nassertr(a.get_x_size() == b.get_x_size() &&
   nassertr(a.get_x_size() == b.get_x_size() &&
            a.get_y_size() == b.get_y_size(), false);
            a.get_y_size() == b.get_y_size(), false);
 
 
+  const xel *a_array = a.get_array();
+  const xel *b_array = b.get_array();
+  const xelval *a_alpha = a.get_alpha_array();
+  const xelval *b_alpha = b.get_alpha_array();
+
+  int x_size = a.get_x_size();
+
   int delta = 0;
   int delta = 0;
   for (int yi = 0; yi < a.get_y_size(); ++yi) {
   for (int yi = 0; yi < a.get_y_size(); ++yi) {
-    xel *a_row = a.row(yi);
-    xel *b_row = b.row(yi);
-    xelval *a_alpha_row = a.alpha_row(yi);
-    xelval *b_alpha_row = b.alpha_row(yi);
-    for (int xi = 0; xi < a.get_x_size(); ++xi) {
+    const xel *a_row = a_array + yi * x_size;
+    const xel *b_row = b_array + yi * x_size;
+    const xelval *a_alpha_row = a_alpha + yi * x_size;
+    const xelval *b_alpha_row = b_alpha + yi * x_size;
+    for (int xi = 0; xi < x_size; ++xi) {
       delta += abs(PPM_GETR(a_row[xi]) - PPM_GETR(b_row[xi]));
       delta += abs(PPM_GETR(a_row[xi]) - PPM_GETR(b_row[xi]));
       delta += abs(PPM_GETG(a_row[xi]) - PPM_GETG(b_row[xi]));
       delta += abs(PPM_GETG(a_row[xi]) - PPM_GETG(b_row[xi]));
       delta += abs(PPM_GETB(a_row[xi]) - PPM_GETB(b_row[xi]));
       delta += abs(PPM_GETB(a_row[xi]) - PPM_GETB(b_row[xi]));