SDSL: Succinct Data Structure Library
A C++ template library for succinct data structures
|
00001 /* sdsl - succinct data structures library 00002 Copyright (C) 2008 Simon Gog 00003 00004 This program is free software: you can redistribute it and/or modify 00005 it under the terms of the GNU General Public License as published by 00006 the Free Software Foundation, either version 3 of the License, or 00007 (at your option) any later version. 00008 00009 This program is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00012 GNU General Public License for more details. 00013 00014 You should have received a copy of the GNU General Public License 00015 along with this program. If not, see http://www.gnu.org/licenses/ . 00016 */ 00021 #ifndef INCLUDED_SDSL_BITMAGIC 00022 #define INCLUDED_SDSL_BITMAGIC 00023 00024 #include <stdint.h> // for uint64_t uint32_t declaration 00025 #include <iostream>// for cerr 00026 #include <cassert> 00027 #ifdef __SSE4_2__ 00028 #include <xmmintrin.h> 00029 #endif 00030 00032 namespace sdsl 00033 { 00034 00036 00050 class bit_magic 00051 { 00052 private: 00053 bit_magic(); // This helper class can not be instantiated 00054 public: 00056 static const int64_t All1Mask = -1LL; 00057 00059 00064 static const uint64_t DeBruijn64 = 0x0218A392CD3D5DBFULL; 00065 00067 00069 static const uint32_t DeBruijn64ToIndex[64]; 00070 00072 static const uint64_t Fib[92]; 00073 00075 static const uint8_t B1CntBytes[256]; 00076 00078 00080 static const uint32_t L1BP[256]; 00081 00083 static const uint64_t Li1Mask[65]; 00084 00086 static const uint64_t Li0Mask[65]; 00087 00088 static const uint8_t lookupr1BP[256]; 00089 00091 00094 static const uint8_t Select256[256*8]; 00095 00096 static const uint64_t PsOverflow[65]; 00097 00098 static const uint32_t powerOf3[9]; 00099 00100 static const uint8_t max_excess_8bit[256]; 00101 00103 static const uint64_t _8_x_the_byte[65]; 00104 00105 static const uint8_t cover0[1]; 00106 static const uint8_t cover1[2]; 00107 static const uint8_t cover2[3]; 00108 static const uint8_t cover3[4]; 00109 static const uint8_t cover4[5]; 00110 static const uint8_t cover5[7]; 00111 static const uint8_t cover6[9]; 00112 static const uint8_t cover7[13]; 00113 static const uint8_t cover8[20]; 00114 00115 static const uint8_t* covers[9]; 00116 00117 static const uint32_t cover_sizes[9]; 00118 00119 00120 static void generate_first_pos_of_excess_val() { 00121 for (int i=0; i<9; ++i) { 00122 for (int j=0; j<256; ++j) { 00123 int x = j; 00124 int excess = 0; 00125 int found=0; 00126 for (int k=0; k<8 and !found; ++k, x>>=1) { 00127 if (x&1) 00128 excess++; 00129 else 00130 excess--; 00131 if (excess == i) { 00132 std::cout<<k<<","; 00133 found=1; 00134 } 00135 } 00136 if (!found) 00137 std::cout<<0<<","; 00138 if ((j+1)%16==0) 00139 std::cout<<std::endl; 00140 } 00141 std::cout<<std::endl; 00142 } 00143 } 00144 00145 static const uint8_t first_pos_of_excess_val[256*9]; 00146 00147 static void generate_last_pos_of_excess_val() { 00148 std::cout<<"const uint8_t bit_magic::last_pos_of_excess_val[]={"<<std::endl; 00149 for (int i=-8; i<9; ++i) { 00150 for (int j=0; j<256; ++j) { 00151 int x = j; 00152 int excess = 0; 00153 int idx=0; 00154 for (int k=0; k<8; ++k, x>>=1) { 00155 if (x&1) 00156 excess++; 00157 else 00158 excess--; 00159 if (excess == i) { 00160 idx=k; 00161 } 00162 } 00163 std::cout<<idx; 00164 if (i* j!=8*255) 00165 std::cout<<","; 00166 if ((j+1)%16==0) 00167 std::cout<<std::endl; 00168 } 00169 std::cout<<std::endl; 00170 } 00171 std::cout<<"};"<<std::endl; 00172 } 00173 00174 static const uint8_t last_pos_of_excess_val[256*17]; 00175 00176 static void generate_very_near_find_open() { 00177 std::cout<<"const uint8_t bit_magic::very_near_find_open[]={"<<std::endl; 00178 for (int w=0; w<256; ++w) { 00179 int excess_val=0; 00180 for (int i=0; i<8; ++i) { 00181 if ((w>>i)&1) 00182 ++excess_val; 00183 else 00184 --excess_val; 00185 } 00186 int res=0; 00187 int cur_excess_val=0; 00188 for (int i=0; i<8; ++i) { 00189 if ((w>>i)&1) 00190 ++cur_excess_val; 00191 else 00192 --cur_excess_val; 00193 if (cur_excess_val==excess_val-1) 00194 res = 7-i; 00195 } 00196 std::cout<<res; 00197 if (w!=255) 00198 std::cout<<","; 00199 if ((w+1)%16==0) 00200 std::cout<<std::endl; 00201 } 00202 std::cout<<"};"<<std::endl; 00203 } 00204 00206 00210 static const uint8_t very_near_find_open[256]; 00211 00212 static void generate_very_near_enclose() { 00213 std::cout<<"const uint8_t bit_magic::very_near_enclose[]={"<<std::endl; 00214 for (int w=0; w<256; ++w) { 00215 int excess_val = 0, res=0; 00216 for (int i=7; i>=0 and !res; --i) { 00217 if (w&(1<<i)) 00218 ++excess_val; 00219 else 00220 --excess_val; 00221 if (excess_val==1) 00222 res = 8-i; 00223 } 00224 std::cout<<res; 00225 if (w!=255) 00226 std::cout<<","; 00227 if ((w+1)%16==0) 00228 std::cout<<std::endl; 00229 } 00230 std::cout<<"};"<<std::endl; 00231 } 00232 00233 static const uint8_t very_near_enclose[256]; 00235 00240 static uint64_t b1Cnt(uint64_t x); 00241 00243 00249 static uint32_t b1Cnt32(uint32_t x); 00250 00252 static uint32_t b1CntNaive(uint64_t x); 00253 00255 00259 static uint32_t b11Cnt(uint64_t x, uint64_t& c); 00260 00261 static uint32_t b11CntS(uint64_t x, uint64_t& c); 00262 static uint32_t b11CntS(uint64_t x); 00263 00265 00268 static uint32_t b11Cnt(uint64_t x); 00269 00271 static uint32_t b11CntNaive(uint64_t x); 00272 00274 static uint32_t eB11Cnt(uint64_t x); 00275 00277 00281 static uint32_t b10Cnt(uint64_t x, uint64_t& c); 00282 00284 00288 static uint32_t b01Cnt(uint64_t x, uint64_t& c); 00289 00290 static uint32_t b10CntNaive(uint64_t x, uint64_t& c); 00291 00293 static uint64_t b10Map(uint64_t x, uint64_t c=0); 00294 00296 static uint64_t b01Map(uint64_t x, uint64_t c=1); 00297 00299 00305 static uint32_t i1BP(uint64_t x, uint32_t i); 00306 static uint32_t i1BP2(uint64_t x, uint32_t i); 00307 00309 00311 static uint32_t j1BP(uint64_t x, uint32_t j); 00312 00314 00316 static uint32_t k1BP(uint64_t x, uint32_t j); 00317 00318 00319 00321 static uint32_t i1BPNaive(uint64_t x, uint32_t i); 00322 00324 00329 static uint32_t l1BP(uint64_t x); 00330 00332 static uint32_t l1BPNaive(uint64_t x); 00333 00335 00341 static uint32_t r1BP(uint64_t x); 00342 00344 static uint32_t r1BPNaive(uint64_t x); 00345 00347 00355 static uint32_t i11BP(uint64_t x, uint32_t i, uint32_t c=0); 00356 00358 static uint32_t i11BPNaive(uint64_t x, uint32_t i) { 00359 for (uint32_t j=0; j<63; ++j) { 00360 if ((x&3)==3) { 00361 i--; 00362 if (!i) return j+1; 00363 x>>=1; 00364 ++j; 00365 } 00366 x>>=1; 00367 } 00368 return 63; 00369 } 00370 00371 static uint64_t all11BPs(uint64_t x, bool& c); 00372 00374 static uint32_t eI11BP(uint64_t x, uint32_t i); 00375 00377 00383 static uint32_t l11BP(uint64_t x); 00384 00386 static void write_int(uint64_t* word, uint64_t x, const uint8_t offset=0, const uint8_t len=64); 00387 // static void writeInt2(uint64_t *word, uint64_t x, const uint8_t offset=0, const uint8_t len=64); 00388 00390 static void write_int_and_move(uint64_t*& word, uint64_t x, uint8_t& offset, const uint8_t len); 00391 00393 static uint64_t read_int(const uint64_t* word, uint8_t offset=0, const uint8_t len=64); 00394 00396 static uint64_t read_int_and_move(const uint64_t*& word, uint8_t& offset, const uint8_t len=64); 00397 00399 static uint64_t readUnaryInt(const uint64_t* word, uint8_t offset=0); 00400 00402 static uint64_t readUnaryIntAndMove(const uint64_t*& word, uint8_t& offset); 00403 00405 00411 static void move_right(const uint64_t*& word, uint8_t& offset, const uint8_t len); 00413 00419 static void move_left(const uint64_t*& word, uint8_t& offset, const uint8_t len); 00420 00422 static uint64_t next(const uint64_t* word, uint64_t idx); 00423 00425 static uint64_t prev(const uint64_t* word, uint64_t idx); 00426 00427 /* // precalc the deBruijn64ToIndex table 00428 static const void calcDeBruijnHash(uint64_t h[]){ 00429 h[0] = 0; 00430 for(int i=1; i<64; ++i){ 00431 h[((1ULL<<i)*DeBruijn64)>>58] = i; 00432 } 00433 } 00434 */ 00435 /* // precalc the lookup for select256 00436 static const void select256_table(uint64_t table[]){ 00437 for(int ones=1;ones<=8;++ones){ 00438 for(uint16_t i=0;i<256;++i){ 00439 uint16_t tmp = i, res=0, one_occs=0; 00440 while(tmp){ 00441 if(tmp&1) 00442 if( ++one_occs==ones) 00443 break; 00444 tmp>>=1; 00445 ++res; 00446 } 00447 if(one_occs!=ones) 00448 res = 0; 00449 table[((ones-1)<<8) + i] = res; 00450 } 00451 } 00452 } 00453 */ 00454 // precalac for PSoverflow 00455 /* static const void psoverflow_table(uint64_t table[]){ 00456 for(int i=0; i<=64; ++i){ 00457 table[i]= 128-i; 00458 for(int j=1;j<8;++j){ 00459 table[i]<<=8; 00460 table[i] += (128-i); 00461 } 00462 } 00463 } 00464 */ 00465 static uint16_t max_excess(uint64_t x, uint16_t& b1Cnt); 00466 static uint16_t max_excess2(uint64_t x, uint16_t& b1Cnt); 00467 static uint16_t max_excess3(uint64_t x, uint16_t& b1Cnt); 00474 static void max_byte_excesses(uint64_t w, uint64_t& max_byte_excesses, uint64_t& byte_prefix_sums_x_2); 00475 static void max_byte_excesses2(uint64_t w, uint64_t& max_byte_excesses, uint64_t& byte_prefix_sums_x_2); 00476 00477 static void min_max_byte_excesses(uint64_t max2, uint64_t& min_byte_excesses, uint64_t& max_byte_excesses, uint64_t& byte_prefix_sums_x_2); 00478 00488 static uint8_t first_excess_position(uint64_t w, uint8_t excess_val, uint64_t& byte_prefix_sums_x_2); 00489 00493 static uint8_t first_excess_position_naive(uint64_t w, uint8_t excess_val, uint64_t& byte_prefix_sums_x_2); 00494 00495 static uint8_t last_excess_position(uint64_t w, uint8_t excess_val, uint64_t& byte_prefix_sums_x_2); 00496 static uint8_t last_excess_position_naive(uint64_t w, uint8_t excess_val, uint64_t& byte_prefix_sums_x_2); 00497 /* 00498 * \param close_parenthesis_index Index of the closing parenthesis. \f$0 \leq close_parenthesis_index \leq 64\f$ 00499 */ 00500 static uint8_t find_open(uint64_t w, uint8_t close_parenthesis_index); 00501 static uint8_t find_open_naive(uint64_t w, uint8_t close_parenthesis_index); 00502 00503 static uint8_t find_enclose(uint64_t w, uint8_t open_parenthesis_index); 00504 static uint8_t find_enclose_naive(uint64_t w, uint8_t open_parenthesis_index); 00505 }; 00506 00507 00508 // ============= inline - implementations ================ 00509 00510 inline uint16_t bit_magic::max_excess(uint64_t max2, uint16_t& b1Cnt) 00511 { 00512 uint64_t sum = max2-((max2>>1)&0x5555555555555555ULL); 00513 uint64_t max = sum&max2;//(~(x&0x5555555555555555ULL&sum))∑ 00514 // Maxima fuer 4bit Bloecke 00515 max2 = ((max>>2)&0x3333333333333333ULL) + 0x6666666666666666ULL + ((sum&0x3333333333333333ULL)<<1); 00516 // cout<<"max2 "; 00517 // printDebug(max2); 00518 uint64_t mask = (max2 - (max = max&0x3333333333333333ULL))&0x8888888888888888ULL; 00519 mask = (mask | (((mask>>3))^0x1111111111111111ULL))-0x1111111111111111ULL; 00520 // max2 = ((max2 & mask) | (max & ~mask)); 00521 max2 = max ^((max^max2) & mask); 00522 // Summen fuer 4bit Bloecke 00523 sum = ((sum & 0x3333333333333333ULL) + ((sum>>2) & 0x3333333333333333ULL))<<1; 00524 // Maxima fuer 8bit Bloecke 00525 max = ((max2>>4)&0x0707070707070707ULL) + 0x0C0C0C0C0C0C0C0CULL + ((sum&0x0F0F0F0F0F0F0F0FULL)); 00526 mask = (max - (max2 = max2&0x0707070707070707ULL)) & 0x1010101010101010ULL; 00527 mask = (mask | ((mask>>4)^0x0101010101010101ULL))-0x0101010101010101ULL; 00528 // max = ((max & mask) | (max2 & ~mask)); 00529 max = max2 ^((max2^max) & mask); 00530 // Summe fuer 8 bit Bloecke 00531 sum = ((sum + (sum>>4)) & 0x1E1E1E1E1E1E1E1EULL); 00532 // Maxima fuer 16bit Bloecke 00533 max2 = ((max>>8)&0x000F000F000F000FULL) + 0x0018001800180018ULL + ((sum&0x00FF00FF00FF00FFULL)); 00534 mask = (max2 - (max = max&0x000F000F000F000FULL)) & 0x0020002000200020ULL; 00535 mask = (mask | ((mask>>5)^0x0001000100010001ULL))-0x0001000100010001ULL; 00536 // max2 = ((max2 & mask) | (max & ~mask)); 00537 max2 = max ^((max^max2) & mask); 00538 // Summe fuer 16 bit Bloecke 00539 sum = (sum + (sum>>8)) & 0x00FF00FF00FF00FFULL; 00540 // Maxima fuer 32bit Bloecke 00541 max = ((max2>>16)&0x0000001F0000001FULL) + 0x0000003000000030ULL + ((sum&0x0000FFFF0000FFFFULL)); 00542 mask = (max - (max2 = max2&0x0000001F0000001FULL)) & 0x0000004000000040ULL; 00543 mask = (mask | ((mask>>6)^0x0000000100000001ULL))-0x0000000100000001ULL; 00544 // max = ((max & mask) | (max2 & ~mask)) & 0x0000003F0000003FULL; 00545 max = (max2 ^((max2^max) & mask)) & 0x0000003F0000003FULL; 00546 // Summe fuer 32 bit Bloecke 00547 sum = (sum + (sum>>16)) & 0x0000FFFF0000FFFFULL; 00548 b1Cnt = (sum+(sum>>32))>>1; 00549 // Maxima fuer 64bit Bloecke 00550 max2 = (max>>32) + 0x0000000000000060ULL + (sum&0x00000000FFFFFFFFULL); 00551 return ((max2 - (max&0x00000000FFFFFFFFULL)) & 0x0000000000000080ULL) ? max2 & 0x000000000000007FULL : max & 0x00000000FFFFFFFFULL; 00552 /* mask = (max2 - (max = max&0x00000000FFFFFFFFULL)) & 0x0000000000000080ULL; 00553 return max2;*/ 00554 } 00555 00556 inline uint16_t bit_magic::max_excess2(uint64_t x, uint16_t& b1Cnt) 00557 { 00558 uint64_t _max_byte_excesses, byte_prefix_sums_x_2; 00559 max_byte_excesses(x, _max_byte_excesses, byte_prefix_sums_x_2); 00560 b1Cnt = byte_prefix_sums_x_2 >> 57; 00561 uint8_t maxi1 = _max_byte_excesses, maxi2 = _max_byte_excesses>>32; 00562 for (uint8_t i=1, m; i<4; ++i) { 00563 if ((m = (_max_byte_excesses >>= 8)) > maxi1) maxi1 = m; 00564 } 00565 _max_byte_excesses >>= 8; 00566 for (uint8_t i=5, m; i<8; ++i) { 00567 if ((m = (_max_byte_excesses >>= 8)) > maxi2) maxi2 = m; 00568 } 00569 return (maxi1>maxi2)?maxi1&0x7F:maxi2&0x7F; 00570 00571 /* 00572 uint8_t maxi[8] = {sum, sum>>8, sum>>16, sum>>24, sum>>32, sum>>40, sum>>48,sum>>56}; 00573 if(maxi[0]<maxi[1])maxi[0]=maxi[1]; 00574 if(maxi[2]<maxi[3])maxi[2]=maxi[3]; 00575 if(maxi[4]<maxi[5])maxi[4]=maxi[5]; 00576 if(maxi[6]<maxi[7])maxi[6]=maxi[7]; 00577 if(maxi[0]<maxi[2])maxi[0]=maxi[2]; 00578 if(maxi[4]<maxi[6])maxi[4]=maxi[6]; 00579 return (maxi[0]<maxi[4])?maxi[4]&0x7F:maxi[0]&0x7F; 00580 */ 00581 00582 /* max = (sum&0x00FF00FF00FF00FFULL)|0x0100010001000100ULL; 00583 mask = max-(max2 = ((sum>>4)&0x00FF00FF00FF00FFULL)); 00584 mask = (mask | ((mask>>5)^0x0001000100010001ULL))-0x0001000100010001ULL; 00585 max = max2 ^ (max2^max) & mask; 00586 uint8_t maxi1 = max, maxi2 = max>>32; 00587 if( ((uint8_t)(max>>16)) > maxi1 ) maxi1 = max>>16; 00588 if( ((uint8_t)(max>>48)) > maxi2 ) maxi2 = max>>48; 00589 return maxi2 > maxi1 ? maxi2 : maxi1; 00590 */ 00591 00592 /* for(uint8_t i=1, m;i<4;++i){ 00593 if( (m = (max >>= 16)) > maxi ) maxi = m; 00594 } 00595 return maxi&0x7F; 00596 */ 00597 } 00598 00599 inline void bit_magic::max_byte_excesses(uint64_t max2, uint64_t& max_byte_excesses, uint64_t& byte_prefix_sums_x_2) 00600 { 00601 byte_prefix_sums_x_2 = max2-((max2>>1)&0x5555555555555555ULL); 00602 uint64_t max = byte_prefix_sums_x_2&max2; 00603 // Maxima fuer 4bit Bloecke 00604 max2 = ((max>>2)&0x3333333333333333ULL) + 0x6666666666666666ULL + ((byte_prefix_sums_x_2&0x3333333333333333ULL)<<1); 00605 uint64_t mask = (max2 - (max = max&0x3333333333333333ULL))&0x8888888888888888ULL; 00606 mask = (mask | ((mask>>3)^0x1111111111111111ULL))-0x1111111111111111ULL; 00607 max2 = max ^((max^max2) & mask); 00608 // Summen fuer 4bit Bloecke 00609 byte_prefix_sums_x_2 = ((byte_prefix_sums_x_2 & 0x3333333333333333ULL) + ((byte_prefix_sums_x_2>>2) & 0x3333333333333333ULL))<<1; 00610 // Maxima fuer 8bit Bloecke 00611 max = ((max2>>4)&0x0707070707070707ULL) + 0x0C0C0C0C0C0C0C0CULL + ((byte_prefix_sums_x_2&0x0F0F0F0F0F0F0F0FULL)); 00612 mask = (max - (max2 = max2&0x0707070707070707ULL)) & 0x1010101010101010ULL; 00613 mask = (mask | ((mask>>4)^0x0101010101010101ULL))-0x0101010101010101ULL; 00614 max = max2 ^((max2^max) & mask); 00615 // Summe fuer 8 bit Bloecke 00616 byte_prefix_sums_x_2 = ((byte_prefix_sums_x_2 + (byte_prefix_sums_x_2>>4)) & 0x1E1E1E1E1E1E1E1EULL); 00617 byte_prefix_sums_x_2 = 0x0101010101010101ULL*byte_prefix_sums_x_2;// partialsummen der 2*summe der einsen im praefix 00618 max_byte_excesses = (((byte_prefix_sums_x_2<<8) | 0x8080808080808080ULL) - 0x3830282018100800ULL) + max; 00619 } 00620 00621 inline void bit_magic::max_byte_excesses2(uint64_t max2, uint64_t& max_byte_excesses, uint64_t& byte_prefix_sums_x_2) 00622 { 00623 byte_prefix_sums_x_2 = max2-((max2>>1)&0x5555555555555555ULL); 00624 // uint64_t max = (byte_prefix_sums_x_2&max2)+((max2|(max2>>1))&0x5555555555555555ULL);// add one to each max if max != 0 00625 uint64_t max = ((max2&0x5555555555555555ULL)<<1)|((max2&0x5555555555555555ULL)>>1); 00626 // Maxima fuer 4bit Bloecke 00627 max2 = ((max>>2)&0x3333333333333333ULL) + 0x6666666666666666ULL + ((byte_prefix_sums_x_2&0x3333333333333333ULL)<<1); 00628 uint64_t mask = (max2 - (max = max&0x3333333333333333ULL))&0x8888888888888888ULL; 00629 mask = (mask | ((mask>>3)^0x1111111111111111ULL))-0x1111111111111111ULL; 00630 max2 = max ^((max^max2) & mask); 00631 // Summen fuer 4bit Bloecke 00632 byte_prefix_sums_x_2 = ((byte_prefix_sums_x_2 & 0x3333333333333333ULL) + ((byte_prefix_sums_x_2>>2) & 0x3333333333333333ULL))<<1; 00633 // Maxima fuer 8bit Bloecke 00634 max = ((max2>>4)&0x0707070707070707ULL) + 0x0C0C0C0C0C0C0C0CULL + ((byte_prefix_sums_x_2&0x0F0F0F0F0F0F0F0FULL)); 00635 mask = (max - (max2 = max2&0x0707070707070707ULL)) & 0x1010101010101010ULL; 00636 mask = (mask | ((mask>>4)^0x0101010101010101ULL))-0x0101010101010101ULL; 00637 max = max2 ^((max2^max) & mask); 00638 // Summe fuer 8 bit Bloecke 00639 byte_prefix_sums_x_2 = ((byte_prefix_sums_x_2 + (byte_prefix_sums_x_2>>4)) & 0x1E1E1E1E1E1E1E1EULL); 00640 byte_prefix_sums_x_2 = 0x0101010101010101ULL*byte_prefix_sums_x_2;// partialsummen der 2*summe der einsen im praefix 00641 max_byte_excesses = (byte_prefix_sums_x_2<<8) + 0x474f575f676F777FULL + max; 00642 } 00643 00644 inline void bit_magic::min_max_byte_excesses(uint64_t max2, uint64_t& min_byte_excesses, uint64_t& max_byte_excesses, uint64_t& byte_prefix_sums_x_2) 00645 { 00646 byte_prefix_sums_x_2 = max2-((max2>>1)&0x5555555555555555ULL); 00647 // uint64_t max = (byte_prefix_sums_x_2&max2)+((max2|(max2>>1))&0x5555555555555555ULL);// add one to each max if max != 0 00648 uint64_t max = ((max2&0x5555555555555555ULL)<<1)|((max2>>1)&0x5555555555555555ULL); 00649 uint64_t min = ~max, min2,mask,mask2; 00650 //std::cerr<<"min = "<<min<<std::endl; 00651 // Maxima and minima fuer 4bit Bloecke 00652 max2 = ((max>>2)&0x3333333333333333ULL) + 0x6666666666666666ULL + ((byte_prefix_sums_x_2&0x3333333333333333ULL)<<1); 00653 min2 = ((min>>2)&0x3333333333333333ULL) + 0x6666666666666666ULL + (0x4444444444444444ULL-(((byte_prefix_sums_x_2)&0x3333333333333333ULL)<<1)); 00654 // min2 = ((min>>2)&0x3333333333333333ULL) + mask; 00655 mask = (max2 - (max = max&0x3333333333333333ULL))&0x8888888888888888ULL; 00656 mask2 = (min2 - (min = min&0x3333333333333333ULL))&0x8888888888888888ULL; 00657 mask = (mask | ((mask>>3)^0x1111111111111111ULL))-0x1111111111111111ULL; 00658 mask2 = (mask2 | ((mask2>>3)^0x1111111111111111ULL))-0x1111111111111111ULL; 00659 max2 = max ^((max^max2) & mask); 00660 min2 = min ^((min^min2) & mask2); 00661 //std::cerr<<"min2 = "<<min2<<std::endl; 00662 // Minima fuer 4bit Bloecke 00663 // Summen fuer 4bit Bloecke 00664 byte_prefix_sums_x_2 = ((byte_prefix_sums_x_2 & 0x3333333333333333ULL) + ((byte_prefix_sums_x_2>>2) & 0x3333333333333333ULL))<<1; 00665 // Maxima and minima fuer 8bit Bloecke 00666 max = ((max2>>4)&0x0707070707070707ULL) + 0x0C0C0C0C0C0C0C0CULL + ((byte_prefix_sums_x_2&0x0F0F0F0F0F0F0F0FULL)); 00667 min = ((min2>>4)&0x0707070707070707ULL) + 0x0C0C0C0C0C0C0C0CULL + (0x0808080808080808ULL-((byte_prefix_sums_x_2)&0x0F0F0F0F0F0F0F0FULL)); 00668 // min = ((min2>>4)&0x0707070707070707ULL) + mask; 00669 mask = (max - (max2 = max2&0x0707070707070707ULL)) & 0x1010101010101010ULL; 00670 mask2 = (min - (min2 = min2&0x0707070707070707ULL)) & 0x1010101010101010ULL; 00671 mask = (mask | ((mask>>4)^0x0101010101010101ULL))-0x0101010101010101ULL; 00672 mask2 = (mask2 | ((mask2>>4)^0x0101010101010101ULL))-0x0101010101010101ULL; 00673 max = max2 ^((max2^max) & mask); 00674 min = min2 ^((min2^min) & mask2); 00675 //std::cerr<<"min = "<<min<<std::endl; 00676 // Summe fuer 8 bit Bloecke 00677 byte_prefix_sums_x_2 = ((byte_prefix_sums_x_2 + (byte_prefix_sums_x_2>>4)) & 0x1E1E1E1E1E1E1E1EULL); 00678 byte_prefix_sums_x_2 = 0x0101010101010101ULL*byte_prefix_sums_x_2;// partialsummen der 2*summe der einsen im praefix 00679 //std::cerr<<"max = "<<max<<std::endl; 00680 //std::cerr<<"min = "<<min<<std::endl; 00681 //std::cerr<<"byte_prefix_sums_x = "<<(byte_prefix_sums_x_2) << std::endl; 00682 max_byte_excesses = (byte_prefix_sums_x_2<<8) + 0x474f575f676F777FULL + max; 00683 min_byte_excesses = (byte_prefix_sums_x_2<<8) + 0x4951596169717981ULL - min; 00684 } 00685 00686 00687 inline uint8_t bit_magic::first_excess_position(uint64_t w, uint8_t excess_val, uint64_t& byte_prefix_sums_x_2) 00688 { 00689 uint64_t max_byte_excesses; 00690 bit_magic::max_byte_excesses(w, max_byte_excesses, byte_prefix_sums_x_2); 00691 assert(excess_val <= 64); 00692 max_byte_excesses = (max_byte_excesses-_8_x_the_byte[excess_val])&0x8080808080808080ULL; 00693 if (max_byte_excesses) { 00694 uint8_t block_of_occurence_x_8 = DeBruijn64ToIndex[((max_byte_excesses&-max_byte_excesses)*DeBruijn64)>>58]&0xF8; 00695 assert(block_of_occurence_x_8<64); 00696 uint64_t sums = byte_prefix_sums_x_2<<8; 00697 excess_val = (excess_val + (block_of_occurence_x_8)) - ((sums >> (block_of_occurence_x_8))&0xFF); 00698 assert(excess_val <= 8); 00699 return first_pos_of_excess_val[((w>>(block_of_occurence_x_8))&0xFF)+(excess_val<<8)]+block_of_occurence_x_8; 00700 } else { 00701 return 64; 00702 } 00703 } 00704 00705 inline uint8_t bit_magic::first_excess_position_naive(uint64_t w, uint8_t excess_val, uint64_t& byte_prefix_sums_x_2) 00706 { 00707 byte_prefix_sums_x_2 = w-((w>>1) & 0x5555555555555555ull); 00708 byte_prefix_sums_x_2 = (byte_prefix_sums_x_2 & 0x3333333333333333ull) + ((byte_prefix_sums_x_2 >> 2) & 0x3333333333333333ull); 00709 byte_prefix_sums_x_2 = (byte_prefix_sums_x_2 + (byte_prefix_sums_x_2 >> 4)) & 0x0f0f0f0f0f0f0f0full; 00710 byte_prefix_sums_x_2 *= 0x0101010101010101ull; 00711 byte_prefix_sums_x_2 <<= 1; 00712 for (uint8_t i=0; i<64; ++i, w>>=1) { 00713 excess_val += (1-((w&1)<<1)); 00714 if (excess_val==0) 00715 return i; 00716 } 00717 return 64; 00718 } 00719 00720 inline uint8_t bit_magic::find_open(uint64_t w, uint8_t close_parenthesis_index) 00721 { 00722 if (!close_parenthesis_index) 00723 return 64; 00724 assert(close_parenthesis_index <= 64); 00725 w <<= (64-close_parenthesis_index); 00726 uint8_t near_match = very_near_find_open[(w>>56)&0xFF];// find near matches 00727 // std::cerr<<"near_match="<<(int)near_match<<std::endl; 00728 if (near_match) { // the matching opening parenthesis is at most 7 positions before the closing paranthesis 00729 return close_parenthesis_index-near_match; 00730 } else { // the matching opening parenthesis is not among the first 8 preceding bits/parentheses 00731 // calculate 00732 uint64_t min_byte_excesses, max_byte_excesses, byte_prefix_sums_x_2; 00733 //std::cerr<<"w="<<w<<" excess_val="<<(int)excess_val<<std::endl; 00734 bit_magic::min_max_byte_excesses(w, min_byte_excesses, max_byte_excesses, byte_prefix_sums_x_2); 00735 int8_t desired_excess = (byte_prefix_sums_x_2 >> 56) - 65; 00736 // std::cerr<<"desired_excess="<<(int)desired_excess<<" "<<(int)(64-close_parenthesis_index)<<std::endl; 00737 if (desired_excess >= 0) { 00738 max_byte_excesses = (max_byte_excesses-_8_x_the_byte[desired_excess])&0x8080808080808080ULL; 00739 max_byte_excesses &= ~(min_byte_excesses-_8_x_the_byte[desired_excess]-_8_x_the_byte[1]); 00740 } else { // desire_excess <0 00741 // std::cerr<<"max_byte_excesses="<<max_byte_excesses<<std::endl; 00742 // std::cerr<<"min_byte_excesses="<<min_byte_excesses<<std::endl; 00743 max_byte_excesses = (max_byte_excesses+_8_x_the_byte[-desired_excess])&0x8080808080808080ULL; 00744 max_byte_excesses &= ~(min_byte_excesses+_8_x_the_byte[-desired_excess]-_8_x_the_byte[1]); 00745 } 00746 // std::cerr<<"max_byte_excesses="<<max_byte_excesses<<std::endl; 00747 if (max_byte_excesses) { 00748 max_byte_excesses |= (max_byte_excesses>>8); 00749 max_byte_excesses |= (max_byte_excesses>>16); 00750 max_byte_excesses |= (max_byte_excesses>>32); 00751 max_byte_excesses -= (max_byte_excesses>>8); 00752 assert(max_byte_excesses = (max_byte_excesses&-max_byte_excesses)); 00753 uint8_t block_of_occurence_x_8 = DeBruijn64ToIndex[(max_byte_excesses*DeBruijn64)>>58]-7; 00754 //std::cerr<<"block_of_occurence_x_8="<<(int)block_of_occurence_x_8<<std::endl; 00755 //std::cerr<<"result+(64-close_parenthesis_index)="<<result+(64-close_parenthesis_index)<<std::endl; 00756 assert(block_of_occurence_x_8 <= 56); 00757 byte_prefix_sums_x_2 <<= 8; 00758 int excess_val = (int)(desired_excess + block_of_occurence_x_8 + 8) - ((byte_prefix_sums_x_2 >> block_of_occurence_x_8)&0xFF); 00759 //std::cerr<<"desired_excess+block_of_occurence_x_8="<< (int)(desired_excess + block_of_occurence_x_8) <<std::endl; 00760 //std::cerr<<"excess_so_far ="<< ((int)((byte_prefix_sums_x_2 >> block_of_occurence_x_8)&0xFF))-(block_of_occurence_x_8) <<std::endl; 00761 //std::cerr<<"byte_prefix_sums ="<< byte_prefix_sums_x_2 <<std::endl; 00762 //std::cerr<<"excess_val="<<(int)excess_val<<std::endl; 00763 assert(excess_val<=16); 00764 return last_pos_of_excess_val[((w>>(block_of_occurence_x_8))&0xFF)+(excess_val<<8)]+block_of_occurence_x_8-63+close_parenthesis_index; 00765 } else // no near parenthesis found 00766 return 64; 00767 } 00768 } 00769 00770 inline uint8_t bit_magic::find_open_naive(uint64_t w, uint8_t close_parenthesis_index) 00771 { 00772 for (int i=close_parenthesis_index-1, excess_val=-1; i>=0; --i) { 00773 if ((w>>i)&1) 00774 ++excess_val; 00775 else 00776 --excess_val; 00777 if (excess_val==0) 00778 return i; 00779 } 00780 return 64; 00781 } 00782 00783 inline uint8_t bit_magic::find_enclose_naive(uint64_t w, uint8_t open_parenthesis_index) 00784 { 00785 for (int i=open_parenthesis_index-1, excess_val=0; i>=0; --i) { 00786 if ((w>>i)&1) 00787 ++excess_val; 00788 else 00789 --excess_val; 00790 if (excess_val==1) 00791 return i; 00792 } 00793 return 64; 00794 } 00795 00796 inline uint8_t bit_magic::find_enclose(uint64_t w, uint8_t open_parenthesis_index) 00797 { 00798 return find_open(w, open_parenthesis_index); 00799 } 00800 00801 inline uint8_t bit_magic::last_excess_position(uint64_t w, uint8_t excess_val, uint64_t& byte_prefix_sums_x_2) 00802 { 00803 uint64_t min_byte_excesses, max_byte_excesses; 00804 //std::cerr<<"w="<<w<<" excess_val="<<(int)excess_val<<std::endl; 00805 bit_magic::min_max_byte_excesses(w, min_byte_excesses, max_byte_excesses, byte_prefix_sums_x_2); 00806 //std::cerr<<"min_byte_accesses="<<min_byte_excesses<<std::endl; 00807 //std::cerr<<"max_byte_accesses="<<max_byte_excesses<<std::endl; 00808 assert(excess_val <= 64); 00809 max_byte_excesses = (max_byte_excesses-_8_x_the_byte[excess_val])&0x8080808080808080ULL; 00810 max_byte_excesses &= ~(min_byte_excesses-_8_x_the_byte[excess_val]-_8_x_the_byte[1]); 00811 //std::cerr<<"max_byte_excesses="<<max_byte_excesses<<std::endl; 00812 if (max_byte_excesses) { 00813 // std::cerr<<"max_byte_accesses="<<max_byte_excesses<<std::endl; 00814 uint8_t block_of_occurence_x_8; 00815 if (max_byte_excesses&0x8080808000000000ULL) { // 4..7 00816 if (max_byte_excesses&0x8080000000000000ULL) { // 6..7 00817 block_of_occurence_x_8 = 48 + ((max_byte_excesses>>60)&0x8); 00818 } else { // 4..5 00819 block_of_occurence_x_8 = 32 + ((max_byte_excesses>>44)&0x8); 00820 } 00821 } else { // 0..3 00822 if (max_byte_excesses&0x8080808080800000ULL) { //2..3 00823 block_of_occurence_x_8 = 16 + ((max_byte_excesses>>28)&0x8); 00824 } else { //0..1 00825 block_of_occurence_x_8 = (max_byte_excesses>>12)&0x8; 00826 } 00827 } 00828 // std::cerr<<" block_of_oc...="<<(int)block_of_occurence_x_8<<std::endl; 00829 #ifndef NDEBUG 00830 #endif 00831 assert(block_of_occurence_x_8<64); 00832 uint64_t sums = byte_prefix_sums_x_2<<8; 00833 excess_val = (excess_val + (block_of_occurence_x_8)+8) - ((sums >> (block_of_occurence_x_8))&0xFF); 00834 // if(excess_val>8+8){ 00835 // std::cerr<<"block_of_occurence_x_8="<< (int)block_of_occurence_x_8 <<" excess_val="<<(int)excess_val<<std::endl; 00836 // std::cerr<<"(sums >> (block_of_occurence_x_8))&0xFF = "<< ((sums >> (block_of_occurence_x_8))&0xFF) << std::endl; 00837 // } 00838 assert(excess_val <= 16); 00839 // std::cerr<<"excess_val="<<(int)((int)excess_val)-8<<std::endl; 00840 // std::cerr<<"pos_inside_8_bit_block "<<(int)last_pos_of_excess_val[((w>>(block_of_occurence_x_8))&0xFF)+(excess_val<<8)]<<std::endl; 00841 // std::cerr<<"pos_inside_8_bit_block+block_of... "<<(int)last_pos_of_excess_val[((w>>(block_of_occurence_x_8))&0xFF)+(excess_val<<8)]+block_of_occurence_x_8<<std::endl; 00842 return last_pos_of_excess_val[((w>>(block_of_occurence_x_8))&0xFF)+(excess_val<<8)]+block_of_occurence_x_8; 00843 } else { 00844 return 64; 00845 } 00846 } 00847 00848 00849 inline uint8_t bit_magic::last_excess_position_naive(uint64_t w, uint8_t excess_val, uint64_t& byte_prefix_sums_x_2) 00850 { 00851 byte_prefix_sums_x_2 = w-((w>>1) & 0x5555555555555555ull); 00852 byte_prefix_sums_x_2 = (byte_prefix_sums_x_2 & 0x3333333333333333ull) + ((byte_prefix_sums_x_2 >> 2) & 0x3333333333333333ull); 00853 byte_prefix_sums_x_2 = (byte_prefix_sums_x_2 + (byte_prefix_sums_x_2 >> 4)) & 0x0f0f0f0f0f0f0f0full; 00854 byte_prefix_sums_x_2 *= 0x0101010101010101ull; 00855 byte_prefix_sums_x_2 <<= 1; 00856 uint8_t res = 64; 00857 uint8_t my_excess_val=0; 00858 for (uint8_t i=0; i<64; ++i, w>>=1) { 00859 if (w&1) 00860 ++my_excess_val; 00861 else 00862 --my_excess_val; 00863 //if(i%8==0) std::cerr<<std::endl; 00864 //std::cerr<<"("<<(int)i<<","<<(int)my_excess_val<<")"; 00865 if (excess_val==my_excess_val) 00866 res = i; 00867 } 00868 //std::cerr<<std::endl; 00869 return res; 00870 } 00871 00872 00873 inline uint16_t bit_magic::max_excess3(uint64_t x, uint16_t& b1Cnt) 00874 { 00875 uint64_t sum = x-((x>>1) & 0x5555555555555555ull); 00876 sum = (sum & 0x3333333333333333ull) + ((sum >> 2) & 0x3333333333333333ull); 00877 sum = (sum + (sum >> 4)) & 0x0f0f0f0f0f0f0f0full; 00878 sum *= 0x0101010101010101ull; 00879 b1Cnt = sum>>56; 00880 sum = (((sum<<1) | 0x8080808080808080ULL) - 0x4038302820181008ULL); 00881 /* uint8_t maxi = max_excess_8bit[(uint8_t)x] | 0x80; 00882 x>>=8; 00883 for(uint8_t i=1,m;i<8;++i, sum>>=8, x>>=8){ 00884 if( (m = (((uint8_t)sum)+max_excess_8bit[(uint8_t)x])) > maxi ) maxi = m; 00885 // std::cerr<<"m="<<(int)(m&0x7F)<<" "<<(int)max_excess_8bit[(uint8_t)x]<<" sum="<<(int)((uint8_t)sum)<<std::endl; 00886 } 00887 return maxi&0x7F; 00888 */ uint8_t maxi[8] = { static_cast<uint8_t>(max_excess_8bit[(uint8_t)x]|0x80), 00889 static_cast<uint8_t>((sum)+ max_excess_8bit[(uint8_t)(x>>8)]), 00890 static_cast<uint8_t>((sum>>8)+ max_excess_8bit[(uint8_t)(x>>16)]), 00891 static_cast<uint8_t>((sum>>16)+ max_excess_8bit[(uint8_t)(x>>24)]), 00892 static_cast<uint8_t>((sum>>24)+ max_excess_8bit[(uint8_t)(x>>32)]), 00893 static_cast<uint8_t>((sum>>32)+ max_excess_8bit[(uint8_t)(x>>40)]), 00894 static_cast<uint8_t>((sum>>40)+ max_excess_8bit[(uint8_t)(x>>48)]), 00895 static_cast<uint8_t>((sum>>48)+ max_excess_8bit[(uint8_t)(x>>56)]) 00896 }; 00897 if (maxi[0]<maxi[1])maxi[0]=maxi[1]; 00898 if (maxi[2]<maxi[3])maxi[2]=maxi[3]; 00899 if (maxi[4]<maxi[5])maxi[4]=maxi[5]; 00900 if (maxi[6]<maxi[7])maxi[6]=maxi[7]; 00901 if (maxi[0]<maxi[2])maxi[0]=maxi[2]; 00902 if (maxi[4]<maxi[6])maxi[4]=maxi[6]; 00903 return (maxi[0]<maxi[4])?maxi[4]&0x7F:maxi[0]&0x7F; 00904 } 00905 00906 // see page 11, Knuth TAOCP Vol 4 F1A 00907 inline uint64_t bit_magic::b1Cnt(uint64_t x) 00908 { 00909 #ifdef __SSE4_2__ 00910 return __builtin_popcountll(x); 00911 #else 00912 #ifdef POPCOUNT_TL 00913 return B1CntBytes[x&0xFFULL] + B1CntBytes[(x>>8)&0xFFULL] + 00914 B1CntBytes[(x>>16)&0xFFULL] + B1CntBytes[(x>>24)&0xFFULL] + 00915 B1CntBytes[(x>>32)&0xFFULL] + B1CntBytes[(x>>40)&0xFFULL] + 00916 B1CntBytes[(x>>48)&0xFFULL] + B1CntBytes[(x>>56)&0xFFULL]; 00917 #else 00918 x = x-((x>>1) & 0x5555555555555555ull); 00919 x = (x & 0x3333333333333333ull) + ((x >> 2) & 0x3333333333333333ull); 00920 x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0full; 00921 return (0x0101010101010101ull*x >> 56); 00922 #endif 00923 #endif 00924 } 00925 00926 inline uint32_t bit_magic::b1Cnt32(uint32_t x) 00927 { 00928 x = x-((x>>1) & 0x55555555); 00929 x = (x & 0x33333333) + ((x>>2) & 0x33333333); 00930 return (0x10101010*x >>28)+(0x01010101*x >>28); 00931 } 00932 00933 inline uint32_t bit_magic::b1CntNaive(uint64_t x) 00934 { 00935 uint32_t res = 0; 00936 for (int i=0; i<64; ++i) { 00937 res += x&1; 00938 x>>=1; 00939 } 00940 return res; 00941 } 00942 00943 /* 00944 inline uint32_t bit_magic::b11Cnt(uint64_t x){ 00945 // extract "10" 2bit blocks 00946 uint64_t ex10 = ((x^(x<<1))&0xAAAAAAAAAAAAAAAAULL)&x; 00947 // extract "10" 2bit blocks 00948 uint64_t ex01 = ((x^(x>>1))&0x5555555555555555ULL)&x; 00949 // extract "11" 2bit blocks 00950 uint64_t ex11 = (x&(x<<1))&0xAAAAAAAAAAAAAAAAULL; 00951 ex11 |= (ex11>>1); 00952 x = (((ex11)+(ex10<<1))&(ex01))|(ex11&0x5555555555555555ULL); 00953 x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); 00954 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 00955 return (0x0101010101010101ULL*x >> 56); 00956 }*/ 00957 00958 inline uint32_t bit_magic::b11Cnt(uint64_t x, uint64_t& c) 00959 { 00960 // extract "11" 2bit blocks 00961 uint64_t ex11 = (x&(x>>1))&0x5555555555555555ULL, t; 00962 // extract "10" 2bit blocks 00963 uint64_t ex10or01 = (ex11|(ex11<<1))^x; 00964 00965 x = ex11 | ((t=(ex11|(ex11<<1))+(((ex10or01<<1)&0x5555555555555555ULL)|c))&(ex10or01&0x5555555555555555ULL)); 00966 c = (ex10or01>>63) or(t < (ex11|(ex11<<1))); 00967 00968 x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); 00969 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 00970 return (0x0101010101010101ULL*x >> 56); 00971 } 00972 00973 inline uint32_t bit_magic::b11Cnt(uint64_t x) 00974 { 00975 // extract "11" 2bit blocks 00976 uint64_t ex11 = (x&(x>>1))&0x5555555555555555ULL; 00977 // extract "10" 2bit blocks 00978 uint64_t ex10or01 = (ex11|(ex11<<1))^x; 00979 00980 x = ex11 | (((ex11|(ex11<<1))+((ex10or01<<1)&0x5555555555555555ULL))&(ex10or01&0x5555555555555555ULL)); 00981 00982 x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); 00983 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 00984 return (0x0101010101010101ULL*x >> 56); 00985 } 00986 00987 inline uint32_t bit_magic::b11CntS(uint64_t x, uint64_t& c) 00988 { 00989 uint64_t ex11 = (x&(x>>1))&0x5555555555555555ULL; 00990 uint64_t ex10or01 = (ex11|(ex11<<1))^x; 00991 ex11 += (((ex11|(ex11<<1))+(((ex10or01<<1)&0x5555555555555555ULL)|c)) & ((ex10or01&0x5555555555555555ULL)|ex11)); 00992 c = (x>>63) and(ex11>>62); 00993 x = ex11-((ex11>>1) & 0x5555555555555555ULL); 00994 x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); 00995 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 00996 return (0x0101010101010101ULL*x >> 56); 00997 } 00998 00999 inline uint32_t bit_magic::b11CntS(uint64_t x) 01000 { 01001 uint64_t ex11 = (x&(x>>1))&0x5555555555555555ULL; 01002 uint64_t ex10or01 = (ex11|(ex11<<1))^x; 01003 ex11 += (((ex11|(ex11<<1))+((ex10or01<<1)&0x5555555555555555ULL)) & ((ex10or01&0x5555555555555555ULL)|ex11)); 01004 x = ex11-((ex11>>1) & 0x5555555555555555ULL); 01005 x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); 01006 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 01007 return (0x0101010101010101ULL*x >> 56); 01008 } 01009 01010 inline uint32_t bit_magic::b11CntNaive(uint64_t x) 01011 { 01012 uint32_t res = 0; 01013 for (int i=0; i<64; ++i) { 01014 if ((x&3) == 3) { 01015 ++res; 01016 ++i; 01017 x>>=1; 01018 } 01019 x>>=1; 01020 } 01021 return res; 01022 } 01023 01024 inline uint32_t bit_magic::eB11Cnt(uint64_t x) 01025 { 01026 x = (x&(x>>1))&0x5555555555555555ULL; 01027 x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); 01028 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 01029 return (0x0101010101010101ULL*x >> 56); 01030 } 01031 01032 inline uint32_t bit_magic::b10Cnt(uint64_t x, uint64_t& c) 01033 { 01034 uint32_t res = b1Cnt((x ^((x<<1) | c)) & (~x)); 01035 c = (x >> 63); 01036 return res; 01037 } 01038 01039 inline uint64_t bit_magic::b10Map(uint64_t x, uint64_t c) 01040 { 01041 return ((x ^((x << 1) | c)) & (~x)); 01042 } 01043 01044 inline uint32_t bit_magic::b01Cnt(uint64_t x, uint64_t& c) 01045 { 01046 uint32_t res = b1Cnt((x ^((x<<1) | c)) & x); 01047 c = (x >> 63); 01048 return res; 01049 } 01050 inline uint64_t bit_magic::b01Map(uint64_t x, uint64_t c) 01051 { 01052 return ((x ^((x << 1) | c)) & x); 01053 } 01054 01055 inline uint32_t bit_magic::b10CntNaive(uint64_t x, uint64_t& c) 01056 { 01057 uint32_t sum = 0, lastbit = c; 01058 for (uint32_t i=0; i<64; ++i) { 01059 if ((x&1) == 0 and lastbit == 1) { 01060 ++sum; 01061 } 01062 lastbit = (x&1); 01063 x >>= 1; 01064 } 01065 c = lastbit; 01066 return sum; 01067 } 01068 01069 inline uint32_t bit_magic::k1BP(uint64_t x, uint32_t i) 01070 { 01071 // if ( i == 1 ){ 01072 // return __builtin_ctzll(x); 01073 // } 01074 #ifdef __SSE4_2__ 01075 uint64_t s = x, b; 01076 s = s-((s>>1) & 0x5555555555555555ULL); 01077 s = (s & 0x3333333333333333ULL) + ((s >> 2) & 0x3333333333333333ULL); 01078 s = (s + (s >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 01079 s = 0x0101010101010101ULL*s; 01080 // now s contains 8 bytes s[7],...,s[0], s[i] contains the cumulative sum 01081 // off (i+1)*8 least significant bits of s 01082 b = (s+PsOverflow[i]) & 0x8080808080808080ULL; 01083 // PsOverflow contains a bit mask x consisting of 8 bytes 01084 // x[7],...,x[0] and x[i] is set to 128-i 01085 // => a byte b[i] in b is >= 128 if cum sum >= i 01086 01087 // __builtin_ctzll returns the number of trailing zeros, if b!=0 01088 int byte_nr = __builtin_ctzll(b) >> 3; // byte nr in [0..7] 01089 // b = _mm_movemask_pi8( (__m64)~b ); 01090 // std::cout << "byte_nr = " << byte_nr << " "; 01094 s <<= 8; 01095 i -= ((uint8_t*)&s)[byte_nr]; 01096 // std::cout << "i = " << i << std::endl; 01097 return (byte_nr << 3) + Select256[((i-1) << 8) + ((uint8_t*)&x)[byte_nr] ]; 01098 #endif 01099 return i1BP(x, i); 01100 } 01101 01102 01103 inline uint32_t bit_magic::i1BP(uint64_t x, uint32_t i) 01104 { 01105 #ifdef __SSE4_2__ 01106 //__m64 v; 01107 //v = (__m64)x; 01108 //const __m64 mask_ 01109 uint64_t s = x, b; 01110 s = s-((s>>1) & 0x5555555555555555ULL); 01111 s = (s & 0x3333333333333333ULL) + ((s >> 2) & 0x3333333333333333ULL); 01112 s = (s + (s >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 01113 s = 0x0101010101010101ULL*s; 01114 // now s contains 8 bytes s[7],...,s[0], s[i] contains the cumulative sum 01115 // off (i+1)*8 least significant bits of s 01116 b = (s+PsOverflow[i]) & 0x8080808080808080ULL; 01117 // PsOverflow contains a bit mask x consisting of 8 bytes 01118 // x[7],...,x[0] and x[i] is set to 128-i 01119 // => a byte b[i] in b is >= 128 if cum sum >= i 01120 01121 // __builtin_ctzll returns the number of trailing zeros, if b!=0 01122 int byte_nr = __builtin_ctzll(b) >> 3; // byte nr in [0..7] 01123 // b = _mm_movemask_pi8( (__m64)~b ); 01124 // std::cout << "byte_nr = " << byte_nr << " "; 01128 s <<= 8; 01129 i -= (s >> (byte_nr<<3)) & 0xFFULL; 01130 //i -= ((uint8_t*)&s)[byte_nr]; 01131 // std::cout << "i = " << i << std::endl; 01132 return (byte_nr << 3) + Select256[((i-1) << 8) + ((x>>(byte_nr<<3))&0xFFULL) ]; 01133 #endif 01134 return i1BP2(x, i); 01135 } 01136 01137 01138 inline uint32_t bit_magic::i1BP2(uint64_t x, uint32_t i) 01139 { 01140 // TODO: Maybe case i<16 as a special case 01141 // assert(i>0 && i<=b1Cnt(x)); 01142 /*register*/ uint64_t s = x, b; // s = sum 01143 s = s-((s>>1) & 0x5555555555555555ULL); 01144 s = (s & 0x3333333333333333ULL) + ((s >> 2) & 0x3333333333333333ULL); 01145 s = (s + (s >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 01146 s = 0x0101010101010101ULL*s; 01147 b = (s+PsOverflow[i]);//&0x8080808080808080ULL;// add something to the partial sums to cause overflow 01148 // b =DeBruijn64ToIndex[((b&-b)*DeBruijn64)>>58]; 01149 i = (i-1)<<8; 01150 if (b&0x0000000080000000ULL) // byte <=3 01151 if (b&0x0000000000008000ULL) //byte <= 1 01152 if (b&0x0000000000000080ULL) 01153 return Select256[(x&0xFFULL) + i]; 01154 else 01155 return 8 +Select256[(((x>>8)&0xFFULL) + i - ((s&0xFFULL)<<8))&0x7FFULL];//byte 1; 01156 else//byte >1 01157 if (b&0x0000000000800000ULL) //byte <=2 01158 return 16+Select256[(((x>>16)&0xFFULL) + i - (s&0xFF00ULL))&0x7FFULL];//byte 2; 01159 else 01160 return 24+Select256[(((x>>24)&0xFFULL) + i - ((s>>8)&0xFF00ULL))&0x7FFULL];//byte 3; 01161 else// byte > 3 01162 if (b&0x0000800000000000ULL) // byte <=5 01163 if (b&0x0000008000000000ULL) //byte <=4 01164 return 32+Select256[(((x>>32)&0xFFULL) + i - ((s>>16)&0xFF00ULL))&0x7FFULL];//byte 4; 01165 else 01166 return 40+Select256[(((x>>40)&0xFFULL) + i - ((s>>24)&0xFF00ULL))&0x7FFULL];//byte 5; 01167 else// byte >5 01168 if (b&0x0080000000000000ULL) //byte<=6 01169 return 48+Select256[(((x>>48)&0xFFULL) + i - ((s>>32)&0xFF00ULL))&0x7FFULL];//byte 6; 01170 else 01171 return 56+Select256[(((x>>56)&0xFFULL) + i - ((s>>40)&0xFF00ULL))&0x7FFULL];//byte 7; 01172 return 0; 01173 } 01174 01175 inline uint32_t bit_magic::j1BP(uint64_t x, uint32_t j) 01176 { 01177 register uint64_t s = x, b, l; // s = sum b = byte [1..8] multiplied by 8 in which the j 1 is located 01178 s = s-((s>>1) & 0x5555555555555555ULL); 01179 s = (s & 0x3333333333333333ULL) + ((s >> 2) & 0x3333333333333333ULL); 01180 s = (s + (s >> 4)) & 0x0F0F0F0F0F0F0F0FULL; 01181 s = 0x0101010101010101ULL*s; 01182 b = ((((((((j-1)*0x0101010101010101ULL)|0x8080808080808080ULL)-s)&0x8080808080808080ULL)>>7)*0x0101010101010101ULL)>>53)&0xFFFFFFFFFFFFFFF8ULL; // s <=_8 (j*L_8) 01183 // b>0 if there are at least j 1s in x 01184 l = j - (((s<<8)>>b)&0xFF); 01185 s = (((((((((x>>b)&0xFF)*0x0101010101010101ULL)&0x8040201008040201ULL)|0x8080808080808080ULL)-0x0101010101010101ULL)|(((x>>b)&0x80ULL)<<56))&0x8080808080808080ULL)>>7)*0x0101010101010101ULL; 01186 // std::cerr<<"j="<<j<<" in byte "<<b<<" l="<<l<<" s="<<s<<std::endl; 01187 return b + ((((((((l-1)*0x0101010101010101ULL)|0x8080808080808080ULL)-s)&0x8080808080808080ULL)>>7)*0x0101010101010101ULL)>>56); 01188 } 01189 01190 inline uint32_t bit_magic::i1BPNaive(uint64_t x, uint32_t i) 01191 { 01192 uint32_t pos = 0; 01193 while (x) { 01194 i -= x&1; 01195 x>>=1; 01196 if (!i) break; 01197 ++pos; 01198 } 01199 return pos; 01200 } 01201 01202 // builtin version of sse version or 01203 // 64 bit version of 32 bit proposial of 01204 // http://www-graphics.stanford.edu/~seander/bithacks.html 01205 inline uint32_t bit_magic::l1BP(uint64_t x) 01206 { 01207 #ifdef __SSE4_2__ 01208 if (x == 0) 01209 return 0; 01210 return 63 - __builtin_clzll(x); 01211 #else 01212 register uint64_t t,tt; // temporaries 01213 if ((tt = x >> 32)) { // l1BP >= 32 01214 if ((t = tt >> 16)) { // l1BP >= 48 01215 return (tt = t >> 8) ? 56 + L1BP[tt] : 48 + L1BP[t]; 01216 } else { // l1BP < 48 01217 return (t = tt >> 8) ? 40 + L1BP[t] : 32 + L1BP[tt]; 01218 } 01219 } else { // l1BP < 32 01220 if ((t = x >> 16)) { // l1BP >= 16 01221 return (tt = t >> 8) ? 24 + L1BP[tt] : 16 + L1BP[t]; 01222 } else { // l1BP < 16 01223 return (tt = x >> 8) ? 8 + L1BP[tt] : L1BP[x]; 01224 } 01225 } 01226 #endif 01227 } 01228 01229 inline uint32_t bit_magic::l1BPNaive(uint64_t x) 01230 { 01231 if (x&0x8000000000000000ULL) 01232 return 63; 01233 x<<=1; 01234 for (int i=62; i>=0; ++i) 01235 if (x&0x8000000000000000ULL) 01236 return i; 01237 else 01238 x<<=1; 01239 return 0; 01240 } 01241 01242 // details see: http://citeseer.ist.psu.edu/leiserson98using.html 01243 // or page 10, Knuth TAOCP Vol 4 F1A 01244 inline uint32_t bit_magic::r1BP(uint64_t x) 01245 { 01246 #ifdef __SSE4_2__ 01247 if (x==0) 01248 return 0; 01249 return __builtin_ctzll(x); 01250 #else 01251 if (x&1) return 0; 01252 if (x&3) return 1; 01253 if (x&7) return 2; 01254 if (x&0x7F) { // in average every second random number x can be answered this way 01255 // return 0; 01256 return lookupr1BP[(x&0x7F)>>3]+3; 01257 } 01258 // x&-x equals x with only the lsb set 01259 //default: 01260 return DeBruijn64ToIndex[((x&-x)*DeBruijn64)>>58]; 01261 #endif 01262 } 01263 01264 inline uint32_t bit_magic::r1BPNaive(uint64_t x) 01265 { 01266 if (x&1) 01267 return 0; 01268 x>>=1; 01269 for (int i=1; i<64; ++i) 01270 if (x&1) 01271 return i; 01272 else 01273 x>>=1; 01274 return 63; 01275 } 01276 01277 inline uint32_t bit_magic::l11BP(uint64_t x) 01278 { 01279 // extract "11" 2bit blocks 01280 uint64_t ex11 = (x&(x>>1))&0x5555555555555555ULL; 01281 // std::cerr<<"ex11="<<ex11<<std::endl; 01282 // extract "10" 2bit blocks 01283 uint64_t ex10or01 = (ex11|(ex11<<1))^x; 01284 // uint64_t ex10or01 = (x^(x<<1))&0xAAAAAAAAAAAAAAAAULL; 01285 // ex10or01 = (ex10or01|(ex10or01>>1))&x; 01286 // extract "10" 2bit blocks 01287 ex11 += (((ex11|(ex11<<1))+((ex10or01<<1)&0x5555555555555555ULL)) & ((ex10or01&0x5555555555555555ULL)|ex11)); 01288 // 01289 // std::cerr<<"ex11="<<ex11<<std::endl; 01290 // uint32_t p = l1BP(ex11);// p % 2 == 0 and (x&(1<<p))==1 01291 // std::cerr<<"p="<<p<<std::endl; 01292 return l1BP(ex11);/* 01293 if( p == 0 and (x&1) == 0 ) 01294 return 0; 01295 return p + (x>>(p+1)&1); 01296 */ 01297 } 01298 /* 01299 inline uint64_t bit_magic::all11BPs(uint64_t x, uint32_t c){ 01300 uint64_t ex11 = (x&(x>>1))&0x5555555555555555ULL; 01301 uint64_t ex10or01 = (ex11|(ex11<<1))^x; 01302 ex11 += ( ((ex11|(ex11<<1))+(((ex10or01<<1)&0x5555555555555555ULL)|c)) & ((ex10or01&0x5555555555555555ULL)|ex11) ); 01303 return ex11; 01304 } 01305 */ 01306 01307 inline uint64_t bit_magic::all11BPs(uint64_t x, bool& c) 01308 { 01309 uint64_t ex11 = (x&(x>>1))&0x5555555555555555ULL; 01310 uint64_t ex10or01 = (ex11|(ex11<<1))^x; 01311 ex11 += (((ex11|(ex11<<1))+(((ex10or01<<1)&0x5555555555555555ULL)|c)) & ((ex10or01&0x5555555555555555ULL)|ex11)); 01312 c = (x&(~ex11))>>63; 01313 return ex11; 01314 } 01315 01316 inline uint32_t bit_magic::i11BP(uint64_t x, uint32_t i, uint32_t c) 01317 { 01318 uint64_t ex11 = (x&(x>>1))&0x5555555555555555ULL; 01319 uint64_t ex10or01 = (ex11|(ex11<<1))^x; 01320 ex11 += (((ex11|(ex11<<1))+(((ex10or01<<1)&0x5555555555555555ULL)|c)) & ((ex10or01&0x5555555555555555ULL)|ex11)); 01321 return i1BP(ex11,i); 01322 } 01323 01324 inline uint32_t bit_magic::eI11BP(uint64_t x, uint32_t i) 01325 { 01326 return i1BP((x&(x<<1))&0xAAAAAAAAAAAAAAAAULL, i); 01327 } 01328 01329 inline void bit_magic::write_int(uint64_t* word, uint64_t x, uint8_t offset, const uint8_t len) 01330 { 01331 x &= bit_magic::Li1Mask[len]; 01332 if (offset + len < 64) { 01333 *word &= 01334 ((bit_magic::All1Mask << (offset+len)) | bit_magic::Li1Mask[offset]); // mask 1..10..01..1 01335 *word |= (x << offset); 01336 // *word ^= ((*word ^ x) & (bit_magic::Li1Mask[len] << offset) ); 01337 // surprisingly the above line is slower than the lines above 01338 } else { 01339 *word &= 01340 ((bit_magic::Li1Mask[offset])); // mask 0....01..1 01341 *word |= (x << offset); 01342 if ((offset = (offset+len)&0x3F)) { // offset+len > 64 01343 *(word+1) &= (~bit_magic::Li1Mask[offset]); // mask 1...10..0 01344 // *(word+1) &= bit_magic::Li0Mask[offset]; // mask 1...10..0 01345 // surprisingly the above line is slower than the line above 01346 *(word+1) |= (x >> (len-offset)); 01347 } 01348 } 01349 } 01350 01351 //inline void bit_magic::writeInt2(uint64_t *word, uint64_t x, uint8_t offset, const uint8_t len){ 01352 // *word ^= ((x)& 111ULL << offset); 01353 // *word ^= ((*word ^ x)& bit_magic::Li1Mask[len] << offset); 01354 // if( offset + len > 64 ){ 01355 // offset = offset + len - 64; 01356 // *(word+1) ^= (( *(word+1) ^ (x >> (len-offset)) ) & bit_magic::Li1Mask[offset]); 01357 // } 01358 //} 01359 01360 inline void bit_magic::write_int_and_move(uint64_t*& word, uint64_t x, uint8_t& offset, const uint8_t len) 01361 { 01362 x &= bit_magic::Li1Mask[len]; 01363 if (offset + len < 64) { 01364 *word &= 01365 ((bit_magic::All1Mask << (offset+len)) | bit_magic::Li1Mask[offset]); // mask 1..10..01..1 01366 *word |= (x << offset); 01367 offset += len; 01368 } else { 01369 *word &= 01370 ((bit_magic::Li1Mask[offset])); // mask 0....01..1 01371 *word |= (x << offset); 01372 if ((offset= (offset+len))>64) {// offset+len >= 64 01373 offset &= 0x3F; 01374 *(++word) &= (~bit_magic::Li1Mask[offset]); // mask 1...10..0 01375 *word |= (x >> (len-offset)); 01376 } else { 01377 offset = 0; 01378 ++word; 01379 } 01380 } 01381 } 01382 01383 inline uint64_t bit_magic::read_int(const uint64_t* word, uint8_t offset, const uint8_t len) 01384 { 01385 uint64_t w1 = (*word)>>offset; 01386 if ((offset+len) > 64) { // if offset+len > 64 01387 return w1 | // w1 or w2 adepted: 01388 ((*(word+1) & bit_magic::Li1Mask[(offset+len)&0x3F]) // set higher bits zero 01389 << (64-offset)); // move bits to the left 01390 } else { 01391 return w1 & bit_magic::Li1Mask[len]; 01392 } 01393 } 01394 01395 inline uint64_t bit_magic::read_int_and_move(const uint64_t*& word, uint8_t& offset, const uint8_t len) 01396 { 01397 uint64_t w1 = (*word)>>offset; 01398 if ((offset = (offset+len))>=64) { // if offset+len > 64 01399 if (offset==64) { 01400 offset &= 0x3F; 01401 ++word; 01402 return w1; 01403 } else { 01404 offset &= 0x3F; 01405 return w1 | 01406 (((*(++word)) & bit_magic::Li1Mask[offset]) << (len-offset)); 01407 } 01408 } else { 01409 return w1 & bit_magic::Li1Mask[len]; 01410 } 01411 } 01412 01413 inline uint64_t bit_magic::readUnaryInt(const uint64_t* word, uint8_t offset) 01414 { 01415 register uint64_t w = *word >> offset; 01416 if (w) { 01417 return bit_magic::r1BP(w)/*+1*/; 01418 } else { 01419 if (0!=(w=*(++word))) 01420 return bit_magic::r1BP(w)+64-offset/*+1*/; 01421 uint64_t cnt=2; 01422 while (0==(w=*(++word))) 01423 ++cnt; 01424 return bit_magic::r1BP(w)+(cnt<<6)/*cnt*64*/-offset/*+1*/;//+(64-offset) 01425 } 01426 return 0; 01427 } 01428 01429 inline uint64_t bit_magic::readUnaryIntAndMove(const uint64_t*& word, uint8_t& offset) 01430 { 01431 register uint64_t w = (*word) >> offset; // temporary variable is good for the performance 01432 if (w) { 01433 uint8_t r = bit_magic::r1BP(w); 01434 offset = (offset + r+1)&0x3F; 01435 // we know that offset + r +1 <= 64, so if the new offset equals 0 increase word 01436 word += (offset==0); 01437 return r; 01438 } else { 01439 uint8_t rr=0; 01440 if (0!=(w=*(++word))) { 01441 rr = bit_magic::r1BP(w)+64-offset; 01442 offset = (offset+rr+1)&0x3F; 01443 word += (offset==0); 01444 return rr; 01445 } else { 01446 uint64_t cnt_1=1; 01447 while (0==(w=*(++word))) 01448 ++cnt_1; 01449 rr = bit_magic::r1BP(w)+64-offset; 01450 offset = (offset+rr+1)&0x3F; 01451 word += (offset==0); 01452 return ((cnt_1)<<6) + rr; 01453 } 01454 } 01455 return 0; 01456 } 01457 01458 inline void bit_magic::move_right(const uint64_t*& word, uint8_t& offset, const uint8_t len) 01459 { 01460 if ((offset+=len)&0xC0) { // if offset >= 65 01461 offset&=0x3F; 01462 ++word; 01463 } 01464 } 01465 01466 inline void bit_magic::move_left(const uint64_t*& word, uint8_t& offset, const uint8_t len) 01467 { 01468 if ((offset-=len)&0xC0) { // if offset-len<0 01469 offset&=0x3F; 01470 --word; 01471 } 01472 } 01473 01474 inline uint64_t bit_magic::next(const uint64_t* word, uint64_t idx) 01475 { 01476 word += (idx>>6); 01477 if (*word & ~Li1Mask[idx&0x3F]) { 01478 return (idx & ~((size_t)0x3F)) + r1BP(*word & ~Li1Mask[idx&0x3F]); 01479 } 01480 idx = (idx & ~((size_t)0x3F)) + 64; 01481 ++word; 01482 while (*word==0) { 01483 idx += 64; 01484 ++word; 01485 } 01486 return idx + r1BP(*word); 01487 } 01488 01489 inline uint64_t bit_magic::prev(const uint64_t* word, uint64_t idx) 01490 { 01491 word += (idx>>6); 01492 if (*word & Li1Mask[(idx&0x3F)+1]) { 01493 return (idx & ~((size_t)0x3F)) + l1BP(*word & Li1Mask[(idx&0x3F)+1]); 01494 } 01495 idx = (idx & ~((size_t)0x3F)) - 64; 01496 --word; 01497 while (*word==0) { 01498 idx -= 64; 01499 --word; 01500 } 01501 return idx + l1BP(*word); 01502 } 01503 01504 } // end namespace sdsl 01505 01506 #endif