Java-Gaming.org
 Featured games (81) games approved by the League of Dukes Games in Showcase (497) Games in Android Showcase (114) games submitted by our members Games in WIP (563) games currently in development
 News: Read the Java Gaming Resources, or peek at the official Java tutorials
Pages: [1] 2
 ignore  |  Print
 Fastest get2fold algorithm  (Read 7674 times) 0 Members and 1 Guest are viewing this topic.
tusaki

Junior Member

Medals: 1

 « Posted 2005-05-27 07:41:47 »

Hia, this is the fastest algorithm which will find the nearest, larger, 2^n number, given an integer. For example: 94 will yield 128. 265 will yield 512. etc.

I got it from here: http://mindprod.com/jgloss/widthinbits.html

The original author is Dirk Bosmans. His original algorithm returned the minimum number of bits required to store a given number. The only change I made is that the algorithm now returns 2^n instead of n.

The commented out algorithm is a slower, but cleaner version.

I will use this algorithm in my next post, the fastest texture-generating algorithm. but I thought this was worth mentioning by itself.

** edit: fixed bug in code. now the "branching method" gives equal results to the java.Math method.

 1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99  100  101  102  103  104  105  106  107  108  109  110  111  112  113  114  115  116  117  118  119  120  121  122  123  124  125  126  127  128  129  130  131  132  133  134  135  136  137  138  139  140  141  142  143  144  145  146  147  148  149  150  151  152  153  154  155  156  157  158  159  160  161  162  163  164 `/*public static double invln2 = 1.0 / Math.log(2.0);public static int get2Fold(int num) {      return (int) Math.pow(2,Math.ceil(Math.log(num) * invln2));}*//*** Calculate how many bits wide a number is,* i.e. position of highest 1 bit.* Fully unraveled binary search method.* @return p where 2**p is first power of two >= n.* e.g. binary 0001_0101 -> 5, 0xffffffff -> 32,* 0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4 -> 3* * (Vincent) Changed to return 2^n instead of n* * @author Dirk Bosmans Dirk.Bosmans@tijd.com * @author Vincent Vollers*/static public final int get2Fold( int n )   {   if ( n < 0 ) return 32;   if ( n > 0x00010000 )      {      if ( n > 0x01000000 )         {         if ( n > 0x10000000 )            {            if ( n > 0x40000000 )               {               // if ( n > 0x7fffffff )               // return 32               // else              // return 2147483648; is too much, so return -1                        return -1;               }            else               {               // !( n > 0x3fffffff )               if ( n > 0x20000000 ) return 1073741824;               else return 536870912;               }            }         else            {            // !( n > 0x0fffffff )            if ( n > 0x04000000 )               {               if ( n > 0x08000000 ) return 268435456;               else return 134217728;               }            else               {               // !( n > 0x03ffffff )               if ( n > 0x02000000 ) return 67108864;               else return 33554432;               }            }         }      else         {         // !( n > 0x00ffffff )         if ( n > 0x00100000 )            {            if ( n > 0x00400000 )               {               if ( n > 0x00800000 ) return 16777216;               else return 8388608;               }            else               {               // !( n > 0x003fffff )               if ( n > 0x00200000 ) return 4194304;               else return 2097152;               }            }         else            {            // !( n > 0x000fffff )            if ( n > 0x00040000 )               {               if ( n > 0x00080000 ) return 1048576;               else return 524288;               }            else               {               // !( n > 0x0003ffff )               if ( n > 0x00020000 ) return 262144;               else return 131072;               }            }         }      }   else      {      // !( n > 0x0000ffff )      if ( n > 0x00000100 )         {         if ( n > 0x00001000 )            {            if ( n > 0x00004000 )               {               if ( n > 0x00008000 ) return 65536;               else return 32768;               }            else               {               // !( n > 0x00003fff )               if ( n > 0x00002000 ) return 16384;               else return 8192;               }            }         else            {            // !( n > 0x00000fff )            if ( n > 0x00000400 )               {               if ( n > 0x00000800 ) return 4096;               else return 2048;               }            else               {               // !( n > 0x000003ff )               if ( n > 0x00000200 ) return 1024;               else return 512;               }            }         }      else         {         // !( n > 0x000000ff )         if ( n > 0x00000010 )            {            if ( n > 0x00000040 )               {               if ( n > 0x00000080 ) return 256;               else return 128;               }            else               {               // !( n > 0x0000003f )               if ( n > 0x00000020 ) return 64;               else return 32;               }            }         else            {            // !( n > 0x0000000f )            if ( n > 0x00000004 )               {               if ( n > 0x00000008 ) return 16;               else return 8;               }            else               {               // !( n > 0x00000003 )               if ( n > 0x00000002 ) return 4;                                    return n;               }            }         }      }   } // end widthInBits`
Orangy Tang

JGO Kernel

Medals: 56
Projects: 11

 « Reply #1 - Posted 2005-05-27 08:18:25 »

Quote

The commented out algorithm is a slower, but cleaner version.

Profiling method & results? I'm surprised that a method with that many branches is actually faster on current CPUs.

[ TriangularPixels.com - Play Growth Spurt, Rescue Squad and Snowman Village ] [ Rebirth - game resource library ]
tusaki

Junior Member

Medals: 1

 « Reply #2 - Posted 2005-05-27 08:29:01 »

To be honest, it just said so on the page, I haven't tested both out for speed...

but I will do so now... on a pentium 4 2.4ghz..

ok, the branched method is faster:
 1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20 `-- Test run 1Empty loop: 219Rolled out binary search method: 203Java.Math() method: 1469-- Test run 2Empty loop: 187Rolled out binary search method: 203Java.Math() method: 1844-- Test run 3Empty loop: 172Rolled out binary search method: 203Java.Math() method: 1703-- Test run 4Empty loop: 172Rolled out binary search method: 219Java.Math() method: 1687-- Test run 5Empty loop: 172Rolled out binary search method: 219Java.Math() method: 1719`

Testing code:
 1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41 `public class TestProgram {      public static int NUM = 1000000;            public void testRun(int run) {            System.out.println("-- Test run " + run);            Random r = new Random();            long time = System.currentTimeMillis();                        for(int i=0;i

Riven
« League of Dukes »

JGO Overlord

Medals: 799
Projects: 4
Exp: 16 years

 « Reply #3 - Posted 2005-05-27 09:01:16 »

If you really want to squeeze out every drop of performance, start with the small numbers, as they are the common range (for textures)

Hi, appreciate more people! Σ ♥ = ¾
Learn how to award medals... and work your way up the social rankings
kevglass

JGO Kernel

Medals: 164
Projects: 23
Exp: 18 years

Coder, Trainee Pixel Artist, Game Reviewer

 « Reply #4 - Posted 2005-05-27 09:04:46 »

Doesn't Math implementation vary from platform to platform depending whats available on the processor? The Math version might be quicker on some platforms.

Kev

Riven
« League of Dukes »

JGO Overlord

Medals: 799
Projects: 4
Exp: 16 years

 « Reply #5 - Posted 2005-05-27 09:43:53 »

Quote
which will find the nearest, larger, 2^n number, given an integer.

When passing 1024 as argument, it returns 2048.

why always larger ??

Hi, appreciate more people! Σ ♥ = ¾
Learn how to award medals... and work your way up the social rankings
tusaki

Junior Member

Medals: 1

 « Reply #6 - Posted 2005-05-27 09:51:22 »

Quote

When passing 1024 as argument, it returns 2048.

why always larger ??

actually that wasn't intentional, I just said "larger" because just saying "nearest" would be incorrect, since 516 would result in 512 (since 512 is nearer).

but it should output 1024. mhhh.. thank you for finding this bug. I'll update the function (will post it here later) and notify the author.
tusaki

Junior Member

Medals: 1

 « Reply #7 - Posted 2005-05-27 09:53:48 »

Quote
Doesn't Math implementation vary from platform to platform depending whats available on the processor? The Math version might be quicker on some platforms.

Kev

Possibly true, I dont have access to other platforms, but maybe someone is willing to test this...
tusaki

Junior Member

Medals: 1

 « Reply #8 - Posted 2005-05-27 10:16:25 »

I've updated the code, the following program tests the output of both functions, and it's now always the same.

 1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26 `public class TestProgram {      public void test2Fold(int i) {            if(TextureTool.get2Fold2(i) != TextureTool.get2Fold(i)) {                  System.out.println("i: " + Integer.toString(i , 16) + " m1: " + TextureTool.get2Fold(i) + " m2: " + TextureTool.get2Fold2(i));            }      }            public void testRun() {            System.out.println("-- Testing consistency");            for(int i=1;i<30;i++) {                  int n = (int) Math.pow(2, i);                                    test2Fold(n-1);                  test2Fold(n);                  test2Fold(n+1);            }            System.out.println("-- Done");      }            public TestProgram() {            testRun();      }      public static void main(String[] args) {            new TestProgram();      }}`

tusaki

Junior Member

Medals: 1

 « Reply #9 - Posted 2005-05-27 10:28:51 »

I know what my mistake was. The original algorithm was to calculate the number of bits neccesary to contain a given number.

so, for example

seven:

111b
1+2+4 = 7
result: 3. 2^3 = 8, and this is, for our texture mapping, a good value.

but when we take "eight"

0001b
0+0+0+8 = 8
result: 4. 2^4 = 16, the "wrong" answer, since 8 is already aligned, but it is the right "minimum amount of bits to store the number eight".

In my test cases, I was working with images size not exactly 2^n, so I never noticed.

the new algorithm, as updated above, works fine though.
Riven
« League of Dukes »

JGO Overlord

Medals: 799
Projects: 4
Exp: 16 years

 « Reply #10 - Posted 2005-05-27 10:58:14 »

Offtopic:

We write binary in the same order as in the decimal-system:

7 = 0111b
8 = 1000b

Hi, appreciate more people! Σ ♥ = ¾
Learn how to award medals... and work your way up the social rankings
oravecz

Junior Newbie

 « Reply #11 - Posted 2006-03-20 23:32:32 »

Sorry to repeat this code block here. I added a couple other methods that use bit shifting. All results are certified accurate up to 0x4000000.

More performance is possible by doing a bottom-up search for a power of two instead of the top-down that I used. It might even prove much faster if the starting point was chosen using a few if clauses, As it is, I start with the max power of two and shift down until I am lower than the target number.

 1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99  100  101  102  103  104  105  106  107  108  109  110  111  112  113  114  115  116  117  118  119  120  121  122  123  124  125  126  127  128  129  130  131  132  133  134  135  136  137  138  139  140  141  142  143  144  145  146  147  148  149  150  151  152  153  154  155  156  157  158  159  160  161  162  163  164  165  166  167  168  169  170  171  172  173  174  175  176  177  178  179  180  181  182  183  184  185  186  187  188  189  190  191  192  193  194  195  196  197  198  199  200  201  202  203  204  205 `public class PowerOfTwo {   public static final double invln2 = 1.0 / Math.log(2.0);   public static int get2FoldLog(int num) {      return (int) Math.pow(2, Math.ceil(Math.log(num) * invln2));   }   /**    * Calculate how many bits wide a number is,    * i.e. position of highest 1 bit.    * Fully unraveled binary search method.    *    * @return p where 2**p is first power of two >= n.    *         e.g. binary 0001_0101 -> 5, 0xffffffff -> 32,    *         0 -> 0, 1 -> 1, 2 -> 2, 3 -> 2, 4 -> 3    *

*         (Vincent) Changed to return 2^n instead of n    * @author Dirk Bosmans Dirk.Bosmans@tijd.com    * @author Vincent Vollers    */   public static final int get2FoldUnrolled(int n) {      if (n < 0) return 32;      if (n > 0x00010000) {         if (n > 0x01000000) {            if (n > 0x10000000) {               if (n > 0x40000000) {                  // if ( n > 0x7fffffff )                  // return 32                  // else                  // return 2147483648; is too much, so return -1                  return -1;               } else {                  // !( n > 0x3fffffff )                  if (n > 0x20000000) return 1073741824;                  else return 536870912;               }            } else {               // !( n > 0x0fffffff )               if (n > 0x04000000) {                  if (n > 0x08000000) return 268435456;                  else return 134217728;               } else {                  // !( n > 0x03ffffff )                  if (n > 0x02000000) return 67108864;                  else return 33554432;               }            }         } else {            // !( n > 0x00ffffff )            if (n > 0x00100000) {               if (n > 0x00400000) {                  if (n > 0x00800000) return 16777216;                  else return 8388608;               } else {                  // !( n > 0x003fffff )                  if (n > 0x00200000) return 4194304;                  else return 2097152;               }            } else {               // !( n > 0x000fffff )               if (n > 0x00040000) {                  if (n > 0x00080000) return 1048576;                  else return 524288;               } else {                  // !( n > 0x0003ffff )                  if (n > 0x00020000) return 262144;                  else return 131072;               }            }         }      } else {         // !( n > 0x0000ffff )         if (n > 0x00000100) {            if (n > 0x00001000) {               if (n > 0x00004000) {                  if (n > 0x00008000) return 65536;                  else return 32768;               } else {                  // !( n > 0x00003fff )                  if (n > 0x00002000) return 16384;                  else return 8192;               }            } else {               // !( n > 0x00000fff )               if (n > 0x00000400) {                  if (n > 0x00000800) return 4096;                  else return 2048;               } else {                  // !( n > 0x000003ff )                  if (n > 0x00000200) return 1024;                  else return 512;               }            }         } else {            // !( n > 0x000000ff )            if (n > 0x00000010) {               if (n > 0x00000040) {                  if (n > 0x00000080) return 256;                  else return 128;               } else {                  // !( n > 0x0000003f )                  if (n > 0x00000020) return 64;                  else return 32;               }            } else {               // !( n > 0x0000000f )               if (n > 0x00000004) {                  if (n > 0x00000008) return 16;                  else return 8;               } else {                  // !( n > 0x00000003 )                  if (n > 0x00000002) return 4;                  return n;               }            }         }      }   } // end widthInBits   public static final int get2FoldShifty(int n) {      if (n == 0) return 0;      int power = 0;      n--;      while (n > 0) {         n = n >> 1;         power++;      }      return 1 << power;   }   public static final int get2FoldLogical(int n) {      int maxPower = 0x40000000;      while (maxPower > 0) {         if (n == maxPower) return maxPower;         if (n > maxPower) return maxPower << 1;         maxPower = maxPower >> 1;      }      return 0;   }   public void testPerformance(int run) {      System.out.println("-- Test run " + run);      long time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) get2FoldLog(i);      time = System.currentTimeMillis() - time;      System.out.println("Math.log method: " + time);      time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) get2FoldUnrolled(i);      time = System.currentTimeMillis() - time;      System.out.println("Rolled out binary search method: " + time);      time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) get2FoldShifty(i);      time = System.currentTimeMillis() - time;      System.out.println("Shifty method: " + time);      time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) get2FoldLogical(i);      time = System.currentTimeMillis() - time;      System.out.println("Logical method: " + time);   }   public void testAccuracy() {      System.out.println("-- Accuracy run ");      for (int i = 0; i < 0x4000000; i++) {         int i1 = get2FoldLog(i);         int i2 = get2FoldUnrolled(i);         int i3 = get2FoldShifty(i);         int i4 = get2FoldLogical(i);         if (i1 != i2 || i2 != i3 || i3 != i4) {            System.err.println("Error in results: " + i +                  " Log:" + i1 + ", Unrolled: " + i2 +                  ", Shifty: " + i3 + ", Logical: " + i4);            System.err.println("Stopping test, there may be other inaccuracies.");            return;         }      }      System.out.println("All methods return the exact same results.");   }   public PowerOfTwo() {      for (int i = 0; i < 5; i++) {         testPerformance(i + 1);      }      testAccuracy();   }   public static int NUM = 1000000;   public static void main(String[] args) {      new PowerOfTwo();   }}`

tusaki

Junior Member

Medals: 1

 « Reply #12 - Posted 2006-03-21 12:35:04 »

Interesting results. Here are my results of your (slightly tweaked) benchmark: (numbers in seconds, lower is better)

 1  2  3  4 `Total Math.log method time: 35.26507531Total Rolled out BS method time: 0.453305379Total Shifty method time: 1.676443794Total Logical method time: 0.957726191`

(basically increased NUM to 5000000, changed System.currentTimeMillis() to System.nanoTime(), formatted the output to be in seconds and added all times from all runs of which you see the result here)
DzzD
 « Reply #13 - Posted 2006-03-21 16:28:07 »

fastest algorithm?!

I do something like the following to find nereast power of two

function findpow2(int val)
{
int result=1;
while(result<val) result<<=1;
return result;
}

Riven
« League of Dukes »

JGO Overlord

Medals: 799
Projects: 4
Exp: 16 years

 « Reply #14 - Posted 2006-03-21 16:29:32 »

Which is much much slower
because: which is much much slower

Hi, appreciate more people! Σ ♥ = ¾
Learn how to award medals... and work your way up the social rankings
DzzD
 « Reply #15 - Posted 2006-03-21 16:44:43 »

I trust you, I did not benchmark anything for now,

also, it is so simple that it can be inlined

Riven
« League of Dukes »

JGO Overlord

Medals: 799
Projects: 4
Exp: 16 years

 « Reply #16 - Posted 2006-03-21 18:14:00 »

slightly cleanedup version of the code:

 1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58 `   public static final int get2FoldUnrolled(int n)   {      if (n < 0)         return 0xFFFFFFFF;      if (n > 0x00010000)      {         if (n > 0x01000000)         {            if (n > 0x10000000)            {               if (n > 0x40000000)                  return 0xFFFFFFFF;               return (n > 0x20000000) ? 0x40000000 : 0x20000000;            }            if (n > 0x04000000)               return (n > 0x08000000) ? 0x10000000 : 0x08000000;            return (n > 0x02000000) ? 0x04000000 : 0x02000000;         }         if (n > 0x00100000)         {            if (n > 0x00400000)               return (n > 0x00800000) ? 0x01000000 : 0x00800000;            return (n > 0x00200000) ? 0x00400000 : 0x00200000;         }         if (n > 0x00040000)            return (n > 0x00080000) ? 0x00100000 : 0x00080000;         return (n > 0x00020000) ? 0x00040000 : 0x00020000;      }      if (n > 0x00000100)      {         if (n > 0x00001000)         {            if (n > 0x00004000)               return (n > 0x00008000) ? 0x00010000 : 0x00008000;            return (n > 0x00002000) ? 0x00004000 : 0x00002000;         }         if (n > 0x00000400)            return (n > 0x00000800) ? 0x00001000 : 0x00000800;         return (n > 0x00000200) ? 0x00000400 : 0x00000200;      }      if (n > 0x00000010)      {         if (n > 0x00000040)            return (n > 0x00000080) ? 0x00000100 : 0x00000080;         return (n > 0x00000020) ? 0x00000040 : 0x00000020;      }      if (n > 0x00000004)         return (n > 0x00000008) ? 0x00000010 : 0x00000008;      return (n > 0x00000002) ? 0x00000004 : n;   }`

Who will turn this code into a massive (()?a: (?b:c))?d:e block?

Hi, appreciate more people! Σ ♥ = ¾
Learn how to award medals... and work your way up the social rankings
Color_Of_Green

Junior Newbie

 « Reply #17 - Posted 2006-03-22 01:29:16 »

Interesting that you say this is the fastest algorithm, which it is not. While it may be in the general case when one uses it in a certain context knowing the upper and lower bounds then a lookup table will be the fatest method (3-4x faster than BS) :-)

i.e.

static final int[] lookupTable;
static {
lookupTable = new int[MAX_LOOKUP];
for(int i = 0; i < MAX_LOOKUP; i++)
lookupTable = get2FoldUnrolled(i);
}

public final static int get2FoldLookup(int val) {
return lookupTable[val];
}

Your application will probably hit a certain input range 90% of the time, in which case you use the lookup table method. For all other input values you use the BS method.

DzzD
 « Reply #18 - Posted 2006-03-25 00:51:13 »

hum your idea of lookup table make me think about a solution that will be faster than all other, I did not try to implement it yet but it should work and also use very low memory for lookup.

first create a lookup table with 256 entries

our lookup table will give near power of values ranging from 0 to 255
basically
lookup[0]=1
lookup[1]=1
lookup[2]=2
lookup[3]=4
lookup[4]=4
lookup[5]=8
lookup[6]=8
lookup[7]=8
lookup[8]=8
etc...

than use a simple scale on your input&output value like that
function(val)
{
if(val>0 && val <256)
scale=0;
else
if(val>=256 && val <65536)
scale=8;
else
if(val>=65536 && val < 16777216 )
scale=16;
else
if(val>=16777216 && val<=2147483647)
scale=24;
return lookup[val>>scale]<<scale;
}

or something like that

you can use smaller range to use less memory : like 4 bits rather than height for this sample
or use higher range to get better performance like 12 bits

if someone have some spare time to test i will be interrested to know bench result on this function

edit: also "if" must be rearanged to get faster

if(val>65536)
{if(val<256)
{scale=8}
else
{scale=16}
}else[....etc}

EDIT2:

something again better

function(val)
{
if(val>0 && val <256)
return lookup[val];
else
if(val>=256 && val <65536)
return lookup[val>>8]<<8;
else
if(val>=65536 && val < 16777216 )
return lookup[val>>16]<<16;
else
if(val>=16777216 && val<=2147483647)
return lookup[val>>24]<<24;
return -1
}

DzzD
 « Reply #19 - Posted 2006-03-25 23:50:52 »

Looking again to my previous post it seems to me that I made stupid error, here is a new version:

function(val)
{
if(val<0)
return -1;
else
if(val <256)
return lookup[val];
else
if(val <65536)
return lookup[val>>8]<<8;
else
if(val < 16777216 )
return lookup[val>>16]<<16;
else
if(val<2147483647)
return lookup[val>>24]<<24;
return -1
}

swpalmer

JGO Coder

Where's the Kaboom?

 « Reply #20 - Posted 2006-03-26 04:05:46 »

The conditions are wrong.  The above fails for  0x101.

Riven
« League of Dukes »

JGO Overlord

Medals: 799
Projects: 4
Exp: 16 years

 « Reply #21 - Posted 2006-03-26 08:58:08 »

And then, you don't need any 'else' statements... just

if(...)
return ...;
if(...)
return ...;
if(...)
return ...;
if(...)
return ...;

Hi, appreciate more people! Σ ♥ = ¾
Learn how to award medals... and work your way up the social rankings
DzzD
 « Reply #22 - Posted 2006-03-26 12:54:54 »

you are right, I think about it while writing the post, so there could be other mistake, anyways it looks to be a fast algo, have you tried to benchmark this code?

swpalmer

JGO Coder

Where's the Kaboom?

 « Reply #23 - Posted 2006-03-26 18:42:48 »

I tried a few algorithms... one was branchless but the equation was too large and it performed very poor compared to the while (1 << i) loop version.

Here's another attempt that is more like a divide & conquer approach, it does 25-30% better than the while loop for me:

 1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24 `   public static int p2Bound4(int n) {      int h1 =  n & 0xffff0000;  // 11111111111111110000000000000000      if(h1 == 0)         h1 = n & ~0xffff0000;            int h2 = h1 & 0xff00ff00;  // 11111111000000001111111100000000      if (h2 == 0)         h2 = h1 & ~0xff00ff00;            int h3 = h2 & 0xf0f0f0f0;  // 11110000111100001111000011110000      if (h3 == 0)         h3 = h2 & ~0xf0f0f0f0;            int h4 = h3 & 0xcccccccc;  // 11001100110011001100110011001100      if (h4 == 0)         h4 = (h3 & ~0xcccccccc);            int h5 = h4 & 0xaaaaaaaa;  // 10101010101010101010101010101010      if (h5 == 0)         h5 = (h4 & ~0xaaaaaaaa);            // h5 is the highest set bit in 'n', so we likely need to shift by 1      return h5 << (h5-n >>> 31);   }`

Color_Of_Green

Junior Newbie

 « Reply #24 - Posted 2006-03-27 02:04:28 »

Excluding the raw lookup table approach, the p2Bound4 algorithm is the fastest so far, by about 20%.. nice.
mabraham

Junior Member

 « Reply #25 - Posted 2006-03-27 20:01:07 »

Hey guys,

This is what I use:
 1  2  3  4  5  6  7  8  9 `   private static int nextPow2( int value )   {      if ( ( value & ( value - 1 ) ) == 0 )      {         // already a power-of-2 value         return value;      }      return Integer.highestOneBit( value ) << 1;   }`

It uses new 1.5 API, it uses no branches (the Integer.highestOneBit() API doesn't anyway), and I believe it should perform pretty well, though I haven't benchmarked it yet.
Color_Of_Green

Junior Newbie

 « Reply #26 - Posted 2006-03-28 01:12:49 »

Interesting, on my centrino laptop the BS method is actually 2x faster than p2bound4 but on my P4 its slower??
HamsterofDeath

Junior Member

Java games rock!

 « Reply #27 - Posted 2006-03-31 09:39:43 »

Quote
Who will turn this code into a massive (()?a: (?b:c))?d:e block?

 1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55 `   public static final int get2FoldUnrolled(int n)   {      return n < 0         ? 0xFFFFFFFF         : n > 0x00010000            ? n > 0x01000000            ? n > 0x10000000            ? n > 0x40000000            ? 0xFFFFFFFF            : (n > 0x20000000) ? 0x40000000 : 0x20000000            : n > 0x04000000 ? (n > 0x08000000) ? 0x10000000 : 0x08000000 : (n > 0x02000000) ? 0x04000000 : 0x02000000            : n > 0x00100000               ? n > 0x00400000               ? (n > 0x00800000) ? 0x01000000 : 0x00800000               : (n > 0x00200000) ? 0x00400000 : 0x00200000               : n > 0x00040000                  ? (n > 0x00080000) ? 0x00100000 : 0x00080000                  : (n > 0x00020000) ? 0x00040000 : 0x00020000            : n > 0x00000100 ? n > 0x00001000               ? n > 0x00004000               ? (n > 0x00008000) ? 0x00010000 : 0x00008000               : (n > 0x00002000) ? 0x00004000 : 0x00002000               : n > 0x00000400                  ? (n > 0x00000800) ? 0x00001000 : 0x00000800                  : (n > 0x00000200) ? 0x00000400 : 0x00000200 : n > 0x00000010 ? n > 0x00000040 ?               (n >                0x00000080)                  ?                  0x00000100                  :                     0x00000080 :               (n >                0x00000020)                  ?                  0x00000040                  :                     0x00000020 :               n >               0x00000004                  ?                  (n >                   0x00000008)                     ?                     0x00000010                     :                        0x00000008                  :                     (n >                      0x00000002)                        ?                        0x00000004                        :                           n;   }`

scnr
oravecz

Junior Newbie

 « Reply #28 - Posted 2006-04-03 13:10:00 »

On my machine, some of the more optimized solutions are clocking in at less than 100 ms for 5,000,000 iterations. Any of the algorithms presented thus far are suitable replacements for the java.lang.Math approach, but it is fun to see the different approaches to the problem. In the back of your mind, you begin to feel that there is an extremely elegant and simple solution to this problem, and the highestOneBit algorithm probably comes closest.

For the sake of summary, I have collected all of the algorithms mentioned so far into a single test harness that verifies their accuracy and performs a micro-benchmark. As swpalmer noted, the lookup code produces inaccurate results. This may be caused by my initialization of the lookup table.

Since this is a micro-benchmark, take it with a grain of salt. I would expect some algorithms to be faster than others and vice-versa when run on different OS, processor, ot Java VM. (Actually, I wouldn't expect this, but it turns out that way. Maybe it is just the VM that makes all of the difference.)

Here are the results of my tests:

Windows P4 2.8GHz
java version "1.4.2_10"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.2_10-b03)
Java HotSpot(TM) Client VM (build 1.4.2_10-b03, mixed mode)

 1  2  3  4  5  6  7  8  9  10  11  12 `>java -cp . PowerOfTwo-- Accuracy runAll methods return the exact same results.-- Test runs with 5000000 iterations.Math.log method: 6219, 6453, 6453, 6469, 6453Rolled out binary search method: 94, 94, 93, 93, 94Shifty method: 281, 313, 313, 313, 312Logical method: 156, 156, 156, 141, 141p2Bound4 method: 141, 140, 156, 141, 141highestOneBit method: 156, 141, 141, 125, 140get2FoldFlattened method: 93, 94, 94, 94, 94shiftUp method: 235, 250, 250, 234, 250`

The 1.4 VM shows the Math.log code more than a magnitude slower than some of the optimized routines, as one might expect. The BS approach, that started this thread, by Dirk Bosmans and Vincent Vollers still performs the best in this configuration. The approaches that use some type of bit-shifting are close behind.

Windows P4 2.8GHz
java version "1.4.2_10"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.2_10-b03)
Java HotSpot(TM) Server VM (build 1.4.2_10-b03, mixed mode)

 1  2  3  4  5  6  7  8  9  10  11  12 `>java -cp . -server PowerOfTwo-- Accuracy runAll methods return the exact same results.-- Test runs with 5000000 iterations.Math.log method: 2953, 2891, 2953m 2907, 2922Rolled out binary search method: 79, 78, 78, 78, 63Shifty method: 250, 266, 265, 265, 265Logical method: 156, 140, 141, 157, 141p2Bound4 method: 78, 78, 78, 62, 78highestOneBit method: 78, 78, 78, 94, 78get2FoldFlattened method: 78, 79, 78, 62, 78shiftUp method: 210, 210, 211, 210, 210`

I took a moment to run the VM in server mode. I wasn't expecting to see much change since we are only performing such minimal computations. I was wrong. Perhaps there is much more aggressive inlining occurring? I'm not sure, but all of the benchmarks were reduced, and significantly in the case of the java.lang.Math functions. Also, the p2Bound4 and highestOneBit algorithms now match the performance of the BS approaches.

Windows P4 2.8GHz
java version "1.6.0-beta"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.6.0-beta-b59g)
Java HotSpot(TM) Client VM (build 1.6.0-beta-b59g, mixed mode, sharing)

 1  2  3  4  5  6  7  8  9  10  11  12 `>java -cp . PowerOfTwo-- Accuracy runAll methods return the exact same results.-- Test runs with 5000000 iterations.Math.log method: 4329, 4328, 4375, 4390, 4391Rolled out binary search method: 78, 78, 78, 63, 78Shifty method: 218, 219, 234, 219, 219Logical method: 172, 172, 172, 172, 156p2Bound4 method: 78, 78, 78, 78, 78highestOneBit method: 79, 78, 79, 78, 78get2FoldFlattened method: 78, 78, 78, 78, 78shiftUp method: 220, 215, 214, 214, 210`

Running the benchmarks under the Java 6 beta produced similar results as the server mode results above, but with not so great a performance bump for the java.lang.Math routine. There is no server VM for my beta JDK download, so that test is not included.

IBM p590 Server
java version "1.4.2"
Java(TM) 2 Runtime Environment, Standard Edition (build 1.4.2)
Classic VM (build 1.4.2, J2RE 1.4.2 IBM AIX build ca142ifx-20050119 SR1+80507+81622 (JIT enabled: jitc))

 1  2  3  4  5  6  7  8  9  10  11  12 `> /usr/java14/bin/java -cp . PowerOfTwo-- Accuracy runAll methods return the exact same results.-- Test runs with 5000000 iterations.Math.log method: 1271, 1269, 1240, 1212, 1208Rolled out binary search method: 98, 96, 97, 96, 96Shifty method: 0, 0, 0, 0, 0Logical method: 325, 321, 324, 327, 324p2Bound4 method: 87, 85, 87, 86, 86highestOneBit method: 0, 0, 0, 0, 0get2FoldFlattened method: 112, 110, 110, 110, 111shiftUp method: 0, 0, 0, 0, 0`

This is the test result for my production server environment. This hardware accelerates the simple bit-shifting algorithms (with minimal looping) to the point that I couldn't measure them. Shifty, highestOneBit, and ShiftUp executed 5MM iterations in less than 10 ms! That's quite a bit of optimization. The same hardware produced some of the lowest scores for the BS routines.
oravecz

Junior Newbie

 « Reply #29 - Posted 2006-04-03 13:12:27 »

Here is the test harness. I think I have everyone's tweaks in here, but I have commented out the lookup code because of verification errors. If someone can correct this and repost, it would be helpful. I copied the code from the Java 5 Integer.highestOneBit() method so it reduces a method call and also can run in JDK 1.4 VMs.

 1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98  99  100  101  102  103  104  105  106  107  108  109  110  111  112  113  114  115  116  117  118  119  120  121  122  123  124  125  126  127  128  129  130  131  132  133  134  135  136  137  138  139  140  141  142  143  144  145  146  147  148  149  150  151  152  153  154  155  156  157  158  159  160  161  162  163  164  165  166  167  168  169  170  171  172  173  174  175  176  177  178  179  180  181  182  183  184  185  186  187  188  189  190  191  192  193  194  195  196  197  198  199  200  201  202  203  204  205  206  207  208  209  210  211  212  213  214  215  216  217  218  219  220  221  222  223  224  225  226  227  228  229  230  231  232  233  234  235  236  237  238  239  240  241  242  243  244  245  246  247  248  249  250  251  252  253  254  255  256  257  258  259  260  261  262  263  264  265  266  267  268  269  270  271  272  273  274  275  276  277  278  279  280  281  282  283  284  285  286  287  288  289  290  291  292  293  294  295  296  297  298  299  300  301  302  303  304  305  306  307  308  309  310  311  312  313  314  315  316  317  318  319  320  321  322  323  324  325  326  327  328  329  330  331  332  333  334  335  336  337  338  339  340  341  342  343  344  345  346  347  348  349  350  351  352  353  354  355  356  357  358  359  360  361  362  363  364  365  366  367  368  369  370  371  372 `public class PowerOfTwo {   public static final double invln2 = 1.0 / Math.log(2.0);   /**    * @author Control    */   public static final int get2FoldLog(int num) {      return (int) Math.pow(2, Math.ceil(Math.log(num) * invln2));   }   /**    * @author Dirk Bosmans Dirk.Bosmans@tijd.com    * @author Vincent Vollers    */   public static final int get2FoldUnrolled(int n) {      if (n < 0) return 32;      if (n > 0x00010000) {         if (n > 0x01000000) {            if (n > 0x10000000) {               if (n > 0x40000000) {                  // if ( n > 0x7fffffff )                  // return 32                  // else                  // return 2147483648; is too much, so return -1                  return -1;               } else {                  // !( n > 0x3fffffff )                  if (n > 0x20000000) return 1073741824;                  else return 536870912;               }            } else {               // !( n > 0x0fffffff )               if (n > 0x04000000) {                  if (n > 0x08000000) return 268435456;                  else return 134217728;               } else {                  // !( n > 0x03ffffff )                  if (n > 0x02000000) return 67108864;                  else return 33554432;               }            }         } else {            // !( n > 0x00ffffff )            if (n > 0x00100000) {               if (n > 0x00400000) {                  if (n > 0x00800000) return 16777216;                  else return 8388608;               } else {                  // !( n > 0x003fffff )                  if (n > 0x00200000) return 4194304;                  else return 2097152;               }            } else {               // !( n > 0x000fffff )               if (n > 0x00040000) {                  if (n > 0x00080000) return 1048576;                  else return 524288;               } else {                  // !( n > 0x0003ffff )                  if (n > 0x00020000) return 262144;                  else return 131072;               }            }         }      } else {         // !( n > 0x0000ffff )         if (n > 0x00000100) {            if (n > 0x00001000) {               if (n > 0x00004000) {                  if (n > 0x00008000) return 65536;                  else return 32768;               } else {                  // !( n > 0x00003fff )                  if (n > 0x00002000) return 16384;                  else return 8192;               }            } else {               // !( n > 0x00000fff )               if (n > 0x00000400) {                  if (n > 0x00000800) return 4096;                  else return 2048;               } else {                  // !( n > 0x000003ff )                  if (n > 0x00000200) return 1024;                  else return 512;               }            }         } else {            // !( n > 0x000000ff )            if (n > 0x00000010) {               if (n > 0x00000040) {                  if (n > 0x00000080) return 256;                  else return 128;               } else {                  // !( n > 0x0000003f )                  if (n > 0x00000020) return 64;                  else return 32;               }            } else {               // !( n > 0x0000000f )               if (n > 0x00000004) {                  if (n > 0x00000008) return 16;                  else return 8;               } else {                  // !( n > 0x00000003 )                  if (n > 0x00000002) return 4;                  return n;               }            }         }      }   } // end widthInBits   /**    * @author James Cook    */   public static final int get2FoldShifty(int n) {      if (n == 0) return 0;      int power = 0;      n--;      while (n > 0) {         n = n >> 1;         power++;      }      return 1 << power;   }   /**    * @author James Cook    */   public static final int get2FoldLogical(int n) {      int maxPower = 0x40000000;      while (maxPower > 0) {         if (n == maxPower) return maxPower;         if (n > maxPower) return maxPower << 1;         maxPower = maxPower >> 1;      }      return 0;   }   /**    * @author swpalmer    */   public static final int p2Bound4(int n) {      int h1 = n & 0xffff0000;  // 11111111111111110000000000000000      if (h1 == 0)         h1 = n & ~0xffff0000;      int h2 = h1 & 0xff00ff00;  // 11111111000000001111111100000000      if (h2 == 0)         h2 = h1 & ~0xff00ff00;      int h3 = h2 & 0xf0f0f0f0;  // 11110000111100001111000011110000      if (h3 == 0)         h3 = h2 & ~0xf0f0f0f0;      int h4 = h3 & 0xcccccccc;  // 11001100110011001100110011001100      if (h4 == 0)         h4 = (h3 & ~0xcccccccc);      int h5 = h4 & 0xaaaaaaaa;  // 10101010101010101010101010101010      if (h5 == 0)         h5 = (h4 & ~0xaaaaaaaa);      // h5 is the highest set bit in 'n', so we likely need to shift by 1      return h5 << (h5 - n >>> 31);   }   /**    * @author JDK5 Integer class by way of mabraham ("inlined" the code from Integer)    */   private static final int highestOneBit(int n) {      // already a power-of-2 value      if ((n & (n - 1)) == 0) return n;      n |= (n >> 1);      n |= (n >> 2);      n |= (n >> 4);      n |= (n >> 8);      n |= (n >> 16);      n = n - (n >>> 1);      return n << 1;   }   public static final int[] lookup = new int[256];   static {      for (int i = 0; i < lookup.length; i++)         lookup[i] = highestOneBit(i);   }   public static final int lookup(int val) {      if (val < 0)         return -1;      else if (val < 256)         return lookup[val];      else if (val < 65536)         return lookup[val >> 8] << 8;      else if (val < 16777216)         return lookup[val >> 16] << 16;      else if (val < 2147483647)         return lookup[val >> 24] << 24;      return -1;   }   /**    * @author HamsterOfDeath    */   public static final int get2FoldFlattened(int n) {      return n < 0            ? 0xFFFFFFFF            : n > 0x00010000            ? n > 0x01000000            ? n > 0x10000000            ? n > 0x40000000            ? 0xFFFFFFFF            : (n > 0x20000000) ? 0x40000000 : 0x20000000            : n > 0x04000000 ? (n > 0x08000000) ? 0x10000000 : 0x08000000 : (n > 0x02000000) ? 0x04000000 : 0x02000000            : n > 0x00100000            ? n > 0x00400000            ? (n > 0x00800000) ? 0x01000000 : 0x00800000            : (n > 0x00200000) ? 0x00400000 : 0x00200000            : n > 0x00040000            ? (n > 0x00080000) ? 0x00100000 : 0x00080000            : (n > 0x00020000) ? 0x00040000 : 0x00020000            : n > 0x00000100 ? n > 0x00001000            ? n > 0x00004000            ? (n > 0x00008000) ? 0x00010000 : 0x00008000            : (n > 0x00002000) ? 0x00004000 : 0x00002000            : n > 0x00000400            ? (n > 0x00000800) ? 0x00001000 : 0x00000800            : (n > 0x00000200) ? 0x00000400 : 0x00000200 : n > 0x00000010 ? n > 0x00000040 ?            (n >                  0x00000080)                  ?                  0x00000100                  :                  0x00000080 :            (n >                  0x00000020)                  ?                  0x00000040                  :                  0x00000020 :            n >                  0x00000004                  ?                  (n >                        0x00000008)                        ?                        0x00000010                        :                        0x00000008                  :                  (n >                        0x00000002)                        ?                        0x00000004                        :                        n;   }   public static final int shiftUp(int n) {      if (n == 0) return 0;      int result = 1;      while (result < n) result <<= 1;      return result;   }   public void testPerformance(int run) {      System.out.println("-- Test run " + run + " with " + NUM + " iterations.");      long time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) get2FoldLog(i);      time = System.currentTimeMillis() - time;      System.out.println("Math.log method: " + time);      time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) get2FoldUnrolled(i);      time = System.currentTimeMillis() - time;      System.out.println("Rolled out binary search method: " + time);      time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) get2FoldShifty(i);      time = System.currentTimeMillis() - time;      System.out.println("Shifty method: " + time);      time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) get2FoldLogical(i);      time = System.currentTimeMillis() - time;      System.out.println("Logical method: " + time);      time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) p2Bound4(i);      time = System.currentTimeMillis() - time;      System.out.println("p2Bound4 method: " + time);      time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) highestOneBit(i);      time = System.currentTimeMillis() - time;      System.out.println("highestOneBit method: " + time);//      time = System.currentTimeMillis();//      for (int i = 0; i < NUM; i++) lookup(i);//      time = System.currentTimeMillis() - time;////      System.out.println("lookup method: " + time);      time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) get2FoldFlattened(i);      time = System.currentTimeMillis() - time;      System.out.println("get2FoldFlattened method: " + time);      time = System.currentTimeMillis();      for (int i = 0; i < NUM; i++) shiftUp(i);      time = System.currentTimeMillis() - time;      System.out.println("shiftUp method: " + time);   }   public void testAccuracy() {      System.out.println("-- Accuracy run ");      for (int i = 1; i < 0x4000000; i++) {         int i1 = get2FoldLog(i);         int i2 = get2FoldUnrolled(i);         int i3 = get2FoldShifty(i);         int i4 = get2FoldLogical(i);         int i5 = p2Bound4(i);         int i6 = highestOneBit(i);         int i7 = get2FoldFlattened(i);         int i8 = shiftUp(i);         if (i1 != i2 || i2 != i3 || i3 != i4 ||               i4 != i5 || i5 != i6 || i6 != i7 || i7 != i8) {            System.err.println("Error in results for i = " + i +                  ", Log:" + i1 + ", Unrolled: " + i2 +                  ", Shifty: " + i3 + ", Logical: " + i4 +                  ", p2Bound4: " + i5 + ", highestOneBit: " + i6 +                  ", get2FoldFlattened: " + i7 + ", shiftUp: " + i8);            System.err.println("Stopping test. (there may be other inaccuracies.)");            System.exit(-1);         }      }      System.out.println("All methods return the exact same results.");   }   public PowerOfTwo() {      testAccuracy();      for (int i = 0; i < 5; i++) {         testPerformance(i + 1);      }   }   public static int NUM = 5000000;   public static void main(String[] args) {      new PowerOfTwo();   }}`
Pages: [1] 2
 ignore  |  Print

You cannot reply to this message, because it is very, very old.

 Add your game by posting it in the WIP section, or publish it in Showcase. The first screenshot will be displayed as a thumbnail.
 BurntPizza (22 views) 2014-09-19 03:14:18 Dwinin (35 views) 2014-09-12 09:08:26 Norakomi (62 views) 2014-09-10 13:57:51 TehJavaDev (90 views) 2014-09-10 06:39:09 Tekkerue (44 views) 2014-09-09 02:24:56 mitcheeb (65 views) 2014-09-08 06:06:29 BurntPizza (48 views) 2014-09-07 01:13:42 Longarmx (35 views) 2014-09-07 01:12:14 Longarmx (40 views) 2014-09-07 01:11:22 Longarmx (37 views) 2014-09-07 01:10:19
 BurntPizza 37x Riven 18x Rayvolution 18x princec 17x ags1 16x basil_ 16x KevinWorkman 15x LiquidNitrogen 12x kevglass 12x theagentd 11x nsigma 11x HeroesGraveDev 9x deathpat 9x The Lion King 7x TehJavaDev 6x Gibbo3771 6x
 List of Learning Resources2014-08-16 10:40:00List of Learning Resources2014-08-05 19:33:27Resources for WIP games2014-08-01 16:20:17Resources for WIP games2014-08-01 16:19:50List of Learning Resources2014-07-31 16:29:50List of Learning Resources2014-07-31 16:26:06List of Learning Resources2014-07-31 11:54:12HotSpot Optionsby dleskov2014-07-08 01:59:08
 java-gaming.org is not responsible for the content posted by its members, including references to external websites, and other references that may or may not have a relation with our primarily gaming and game production oriented community. inquiries and complaints can be sent via email to the info‑account of the company managing the website of java‑gaming.org