Difference between revisions of "SISSY Bitboards"

From Chessprogramming wiki
Jump to: navigation, search
Line 10: Line 10:
  
 
=Queen Attacks=
 
=Queen Attacks=
The general SISSY attack getter requires a 3-dimensional 1MByte lookup table, with pre-initialized attack bitboards indexed by 6-bit square, 8-bit rank occupancy and 3-bit rank index.
+
The general SISSY attack getter requires a 3-dimensional 1MByte lookup table, with pre-initialized attack bitboards indexed by 3-bit rank index, 6-bit square, 8-bit rank occupancy.
 
<pre>
 
<pre>
U64 qss[64][256][8];  
+
U64 qss[8][64][256];  
  
 
U64 queenAttacks(U64 occ, enumSquare sq) {   
 
U64 queenAttacks(U64 occ, enumSquare sq) {   
 
   return                             
 
   return                             
     qss[sq][ occ        & 255][0] &   
+
     qss[0][sq][ occ        & 255] &   
     qss[sq][(occ >>  8) & 255][1] &   
+
     qss[1][sq][(occ >>  8) & 255] &   
     qss[sq][(occ >> 16) & 255][2] &   
+
     qss[2][sq][(occ >> 16) & 255] &   
     qss[sq][(occ >> 24) & 255][3] &   
+
     qss[3][sq][(occ >> 24) & 255] &   
     qss[sq][(occ >> 32) & 255][4] &   
+
     qss[4][sq][(occ >> 32) & 255] &   
     qss[sq][(occ >> 40) & 255][5] &   
+
     qss[5][sq][(occ >> 40) & 255] &   
     qss[sq][(occ >> 48) & 255][6] &   
+
     qss[6][sq][(occ >> 48) & 255] &   
     qss[sq][(occ >> 56) & 255][7] ;   
+
     qss[7][sq][(occ >> 56) & 255] ;   
}    
+
}
 
</pre>
 
</pre>
 
The following sample demonstrates how to determine queen attacks. The intersection of the eight split index addressed attack bitboards yields the desired queen attack bitboard (red squares attacked by queen, blue reset due to blocker):   
 
The following sample demonstrates how to determine queen attacks. The intersection of the eight split index addressed attack bitboards yields the desired queen attack bitboard (red squares attacked by queen, blue reset due to blocker):   
Line 33: Line 33:
 
  occ = 0x8041A20428054A00
 
  occ = 0x8041A20428054A00
 
<pre>
 
<pre>
     [d4][0x80][7]     [d4][0x41][6]     [d4][0xA2][5]     [d4][0x04][4]   
+
     [7][d4][0x80]     [6][d4][0x41]     [5][d4][0xA2]     [4][d4][0x04]   
 
   . . . 1 . . . 1*  . . . 1 . . . 0  . . . 1 . . . 0  . . . 1 . . . 1
 
   . . . 1 . . . 1*  . . . 1 . . . 0  . . . 1 . . . 0  . . . 1 . . . 1
 
   1 . . 1 . . 1 .  1 . . 1 . . 1 .*  0 . . 1 . . 0 .  0 . . 1 . . 1 .
 
   1 . . 1 . . 1 .  1 . . 1 . . 1 .*  0 . . 1 . . 0 .  0 . . 1 . . 1 .
Line 43: Line 43:
 
   1 . . 1 . . 1 .  1 . . 1 . . 1 .  1 . . 1 . . 1 .  1 . . 1 . . 1 .
 
   1 . . 1 . . 1 .  1 . . 1 . . 1 .  1 . . 1 . . 1 .  1 . . 1 . . 1 .
  
     [d4][0x28][3]     [d4][0x05][2]     [d4][0x4a][1]     [d4][0x00][0]  
+
     [3][d4][0x28]     [2][d4][0x05]     [1][d4][0x4a]     [0][d4][0x00]  
 
   . . . 1 . . . 1  . . . 1 . . . 1  . . . 1 . . . 1  . . . 1 . . . 1
 
   . . . 1 . . . 1  . . . 1 . . . 1  . . . 1 . . . 1  . . . 1 . . . 1
 
   1 . . 1 . . 1 .  1 . . 1 . . 1 .  1 . . 1 . . 1 .  1 . . 1 . . 1 .
 
   1 . . 1 . . 1 .  1 . . 1 . . 1 .  1 . . 1 . . 1 .  1 . . 1 . . 1 .
Line 73: Line 73:
 
union bbu {U64 b64; bb8 b08;};
 
union bbu {U64 b64; bb8 b08;};
  
U64 qss[64][256][8]; // 1M
+
U64 qss[8][64][256]; // 1M
  
 
U64 queenAttacks(U64 occ, enumSquare sq) {
 
U64 queenAttacks(U64 occ, enumSquare sq) {
 
   bbu o; o.b64 = occ;
 
   bbu o; o.b64 = occ;
 
   return  
 
   return  
     qss[sq][o.b08.r1][0] &
+
     qss[0][sq][o.b08.r1] &
     qss[sq][o.b08.r2][1] &
+
     qss[1][sq][o.b08.r2] &
     qss[sq][o.b08.r3][2] &
+
     qss[2][sq][o.b08.r3] &
     qss[sq][o.b08.r4][3] &
+
     qss[3][sq][o.b08.r4] &
     qss[sq][o.b08.r5][4] &
+
     qss[4][sq][o.b08.r5] &
     qss[sq][o.b08.r6][5] &
+
     qss[5][sq][o.b08.r6] &
     qss[sq][o.b08.r7][6] &
+
     qss[6][sq][o.b08.r7] &
     qss[sq][o.b08.r8][7];
+
     qss[7][sq][o.b08.r8];
};
+
}
 
</pre>
 
</pre>
 
Targeting [[X86-64]], the generated [[Assembly|assembly]] is likely similar to the above shift/and255 code <ref>[https://godbolt.org/ Compiler Explorer] with x86-64 clang (experimental - Wlifetime), -O3</ref>.
 
Targeting [[X86-64]], the generated [[Assembly|assembly]] is likely similar to the above shift/and255 code <ref>[https://godbolt.org/ Compiler Explorer] with x86-64 clang (experimental - Wlifetime), -O3</ref>.
 
<pre>
 
<pre>
queenAttacks(unsigned long, int)
+
queenAttacks(unsigned long long, int):                    # @queenAttacks(unsigned long long, int)
 +
        mov    rdx, rdi
 
         mov    rcx, rdi
 
         mov    rcx, rdi
         shr    rcx, 50
+
         shr    rcx, 56
        and    ecx, -64
+
         movsxd  rsi, esi
         movsxd  rdx, esi
+
         movzx  edi, dl
         movzx  esi, dil
+
         shl    rsi, 11
         shl    rsi, 6
+
         movzx  eax, dh
         shl    rdx, 14
+
         mov    rax, qword ptr [rsi + 8*rax + qss+131072]
        mov    rax, rdi
+
         and    rax, qword ptr [rsi + 8*rdi + qss]
        shr    rax, 2
+
         mov    rdi, rdx
        and    eax, 16320
+
         shr    rdi, 13
         mov    rax, qword ptr [rdx + rax + qss+8]
+
         and    edi, 2040
         and    rax, qword ptr [rdx + rsi + qss]
+
         and    rax, qword ptr [rsi + rdi + qss+262144]
         mov    rsi, rdi
+
         mov    rdi, rdx
         shr    rsi, 10
+
         shr    rdi, 21
         and    esi, 16320
+
         and    edi, 2040
         and    rax, qword ptr [rdx + rsi + qss+16]
+
         and    rax, qword ptr [rsi + rdi + qss+393216]
         mov    rsi, rdi
+
         mov    rdi, rdx
         shr    rsi, 18
+
         shr    rdi, 29
         and    esi, 16320
+
         and    edi, 2040
         and    rax, qword ptr [rdx + rsi + qss+24]
+
         and    rax, qword ptr [rsi + rdi + qss+524288]
         mov    rsi, rdi
+
         mov    rdi, rdx
         shr    rsi, 26
+
         shr    rdi, 37
         and    esi, 16320
+
         and    edi, 2040
         and    rax, qword ptr [rdx + rsi + qss+32]
+
         and    rax, qword ptr [rsi + rdi + qss+655360]
         mov    rsi, rdi
+
         shr    rdx, 45
         shr    rsi, 34
+
         and    edx, 2040
         and    esi, 16320
+
         and    rax, qword ptr [rsi + rdx + qss+786432]
         and    rax, qword ptr [rdx + rsi + qss+40]
+
         and    rax, qword ptr [rsi + 8*rcx + qss+917504]
         shr    rdi, 42
 
         and    edi, 16320
 
         and    rax, qword ptr [rdx + rdi + qss+48]
 
         and    rax, qword ptr [rdx + rcx + qss+56]
 
 
         ret
 
         ret
 
</pre>
 
</pre>
Line 129: Line 126:
 
Taking advantage of the [[First Rank Attacks#TheOuterSquares|inner six bits]] optimization for SISSY bishop attacks, but still much memory and computation compared to other techniques:
 
Taking advantage of the [[First Rank Attacks#TheOuterSquares|inner six bits]] optimization for SISSY bishop attacks, but still much memory and computation compared to other techniques:
 
<pre>
 
<pre>
U64 bss[64][64][6]; // 192 K
+
U64 bss[6][64][64]; // 192 K
  
 
U64 bishopAttacks(U64 occ, enumSquare sq) {
 
U64 bishopAttacks(U64 occ, enumSquare sq) {
 
   return
 
   return
bss[sq][(occ >>  9) & 63][0] &  // 2nd rank
+
    bss[0][sq][(occ >>  9) & 63] &  // 2nd rank
bss[sq][(occ >> 17) & 63][1] &   
+
    bss[1][sq][(occ >> 17) & 63] &   
bss[sq][(occ >> 25) & 63][2] &   
+
    bss[2][sq][(occ >> 25) & 63] &   
bss[sq][(occ >> 33) & 63][3] &   
+
    bss[3][sq][(occ >> 33) & 63] &   
bss[sq][(occ >> 41) & 63][4] &   
+
    bss[4][sq][(occ >> 41) & 63] &   
bss[sq][(occ >> 49) & 63][5] ;  // 7th rank
+
    bss[5][sq][(occ >> 49) & 63] ;  // 7th rank
}    
+
}  
 
</pre>
 
</pre>
 
One may half the table size by combining occupancy indices of two consecutive ranks, since their relevant bits on [[Diagonals|diagonal]] and [[Anti-Diagonals|anti-diagonal]] are always disjoint.
 
One may half the table size by combining occupancy indices of two consecutive ranks, since their relevant bits on [[Diagonals|diagonal]] and [[Anti-Diagonals|anti-diagonal]] are always disjoint.
Line 147: Line 144:
 
SISSY file attack generation looks not that competitive compared to other techniques as well, see for instance [[Kindergarten Bitboards#File-Attacks|Kindergarten file attacks]] or [[Hyperbola Quintessence]].
 
SISSY file attack generation looks not that competitive compared to other techniques as well, see for instance [[Kindergarten Bitboards#File-Attacks|Kindergarten file attacks]] or [[Hyperbola Quintessence]].
 
<pre>
 
<pre>
U64 rss[64][2][6];  // 6K
+
U64 rss[6][2][64];  // 6K
  
 
U64 fileAttacks(U64 occ, enumSquare sq) {  
 
U64 fileAttacks(U64 occ, enumSquare sq) {  
 
   occ >>= (sq & 7); // shift occupancy to A file
 
   occ >>= (sq & 7); // shift occupancy to A file
 
   return
 
   return
rss[sq][(occ >>  8) & 1][0] &  // 2nd rank
+
    rss[0][(occ >>  8) & 1][sq] &  // 2nd rank
rss[sq][(occ >> 16) & 1][1] &   
+
    rss[1][(occ >> 16) & 1][sq] &   
rss[sq][(occ >> 24) & 1][2] &   
+
    rss[2][(occ >> 24) & 1][sq] &   
rss[sq][(occ >> 32) & 1][3] &   
+
    rss[3][(occ >> 32) & 1][sq] &   
rss[sq][(occ >> 40) & 1][4] &   
+
    rss[4][(occ >> 40) & 1][sq] &   
rss[sq][(occ >> 48) & 1][5] ;  // 7th rank
+
    rss[5][(occ >> 48) & 1][sq] ;  // 7th rank
}    
+
}    
 
</pre>
 
</pre>
  

Revision as of 19:33, 16 February 2020

Home * Board Representation * Bitboards * Sliding Piece Attacks * SISSY Bitboards

Paul Klee - Hauptweg und Nebenwege, 1929 [1]

SISSY Bitboards (Split Index Super Set Yielding),
a new and interesting approach to determine sliding piece attacks devised by Michael Sherwin, introduced and discussed in CCC in February 2020 [2] as an improvement of his Sherwin Bitboards. SISSY bitboards apply the occupancy lookup rank by rank to intersect supersets of the attack bitboards from the same square considering blocking pieces on that particular rank only. While using eight lookups to intersect sounds expensive on the first glance, SISSY Bitboards yields up to eight ray-directions of a queen in one run with cheap instructions and high IPC potential, and thus may be an alternative for magic bitboards on architectures with slow multiplication, in particular for the queen.

Cpwmappinghint.JPG
Code samples and bitboard diagrams rely on Little endian file and rank mapping.

Queen Attacks

The general SISSY attack getter requires a 3-dimensional 1MByte lookup table, with pre-initialized attack bitboards indexed by 3-bit rank index, 6-bit square, 8-bit rank occupancy.

U64 qss[8][64][256]; 

U64 queenAttacks(U64 occ, enumSquare sq) {  
  return                             
    qss[0][sq][ occ        & 255] &  
    qss[1][sq][(occ >>  8) & 255] &  
    qss[2][sq][(occ >> 16) & 255] &  
    qss[3][sq][(occ >> 24) & 255] &  
    qss[4][sq][(occ >> 32) & 255] &  
    qss[5][sq][(occ >> 40) & 255] &  
    qss[6][sq][(occ >> 48) & 255] &  
    qss[7][sq][(occ >> 56) & 255] ;  
}

The following sample demonstrates how to determine queen attacks. The intersection of the eight split index addressed attack bitboards yields the desired queen attack bitboard (red squares attacked by queen, blue reset due to blocker):

Carina Jørgensen, Queen's Star [3]
 abcdefgh 
8
7
6
5
4
3
2
1
    
    
    
    
    
    
    
    
       
      
     
       
      
      
     
        
       ♚
♛     ♟ 
 ♟   ♟ ♟
  ♞     
   ♕ ♘  
♙ ♙     
 ♙ ♔  ♙ 
        
8
7
6
5
4
3
2
1
 abcdefgh 
7k/q5p1/1p3p1p/2n5/3Q1N2/P1P5/1P1K2P1/8 w - - 
occ = 0x8041A20428054A00
    [7][d4][0x80]     [6][d4][0x41]     [5][d4][0xA2]     [4][d4][0x04]  
   . . . 1 . . . 1*  . . . 1 . . . 0   . . . 1 . . . 0   . . . 1 . . . 1
   1 . . 1 . . 1 .   1 . . 1 . . 1 .*  0 . . 1 . . 0 .   0 . . 1 . . 1 .
   . 1 . 1 . 1 . .   . 1 . 1 . 1 . .   . 1 . 1 . 1 . .*  . 0 . 1 . 1 . .
   . . 1 1 1 . . . & . . 1 1 1 . . . & . . 1 1 1 . . . & . . 1 1 1 . . .* 
   1 1 1 Q 1 1 1 1   1 1 1 Q 1 1 1 1   1 1 1 Q 1 1 1 1   1 1 1 Q 1 1 1 1
   . . 1 1 1 . . .   . . 1 1 1 . . .   . . 1 1 1 . . .   . . 1 1 1 . . .
   . 1 . 1 . 1 . .   . 1 . 1 . 1 . .   . 1 . 1 . 1 . .   . 1 . 1 . 1 . .
   1 . . 1 . . 1 .   1 . . 1 . . 1 .   1 . . 1 . . 1 .   1 . . 1 . . 1 .

    [3][d4][0x28]     [2][d4][0x05]     [1][d4][0x4a]     [0][d4][0x00] 
   . . . 1 . . . 1   . . . 1 . . . 1   . . . 1 . . . 1   . . . 1 . . . 1
   1 . . 1 . . 1 .   1 . . 1 . . 1 .   1 . . 1 . . 1 .   1 . . 1 . . 1 .
   . 1 . 1 . 1 . .   . 1 . 1 . 1 . .   . 1 . 1 . 1 . .   . 1 . 1 . 1 . .
 & . . 1 1 1 . . . & . . 1 1 1 . . . & . . 1 1 1 . . . & . . 1 1 1 . . .
   1 1 1 Q 1 1 0 0*  1 1 1 Q 1 1 1 1   1 1 1 Q 1 1 1 1   1 1 1 Q 1 1 1 1
   . . 1 1 1 . . .   . . 1 1 1 . . .*  . . 1 1 1 . . .   . . 1 1 1 . . .
   . 1 . 1 . 1 . .   . 0 . 1 . 1 . .   . 1 . 1 . 1 . .*  . 1 . 1 . 1 . .
   1 . . 1 . . 1 .   0 . . 1 . . 1 .   0 . . 0 . . 1 .   1 . . 1 . . 1 .*
  
   . . . 1 . . . 0 
   0 . . 1 . . 0 . 
   . 0 . 1 . 1 . . 
 = . . 1 1 1 . . . 
   1 1 1 Q 1 1 0 0 
   . . 1 1 1 . . . 
   . 0 . 1 . 1 . . 
   0 . . 0 . . 1 . 

Since queens may appear on file A and H with attacks on that outer files, a reduction of the occupancy index range due to the inner six bits is not possible for queens. We will elaborate on reducing occupancy index range possibilities for bishops and rooks later. However, for queens, one may skip the outer ranks if combined with a general rank lookup.

Union Bitboard

Rather then using shift/and instructions to get the rank-wise occupancies, and similar to Sherwin Bitboards, one may use a union of a bitboard and a struct or array of eight bytes (Portability concerning endianness is an issue), to appear the code looking simpler.

typedef unsigned char U08;
struct bb8 {U08 r1; U08 r2; U08 r3; U08 r4; U08 r5; U08 r6; U08 r7; U08 r8;};
union bbu {U64 b64; bb8 b08;};

U64 qss[8][64][256]; // 1M

U64 queenAttacks(U64 occ, enumSquare sq) {
  bbu o; o.b64 = occ;
  return 
    qss[0][sq][o.b08.r1] &
    qss[1][sq][o.b08.r2] &
    qss[2][sq][o.b08.r3] &
    qss[3][sq][o.b08.r4] &
    qss[4][sq][o.b08.r5] &
    qss[5][sq][o.b08.r6] &
    qss[6][sq][o.b08.r7] &
    qss[7][sq][o.b08.r8];
}

Targeting X86-64, the generated assembly is likely similar to the above shift/and255 code [4].

queenAttacks(unsigned long long, int):                     # @queenAttacks(unsigned long long, int)
        mov     rdx, rdi
        mov     rcx, rdi
        shr     rcx, 56
        movsxd  rsi, esi
        movzx   edi, dl
        shl     rsi, 11
        movzx   eax, dh
        mov     rax, qword ptr [rsi + 8*rax + qss+131072]
        and     rax, qword ptr [rsi + 8*rdi + qss]
        mov     rdi, rdx
        shr     rdi, 13
        and     edi, 2040
        and     rax, qword ptr [rsi + rdi + qss+262144]
        mov     rdi, rdx
        shr     rdi, 21
        and     edi, 2040
        and     rax, qword ptr [rsi + rdi + qss+393216]
        mov     rdi, rdx
        shr     rdi, 29
        and     edi, 2040
        and     rax, qword ptr [rsi + rdi + qss+524288]
        mov     rdi, rdx
        shr     rdi, 37
        and     edi, 2040
        and     rax, qword ptr [rsi + rdi + qss+655360]
        shr     rdx, 45
        and     edx, 2040
        and     rax, qword ptr [rsi + rdx + qss+786432]
        and     rax, qword ptr [rsi + 8*rcx + qss+917504]
        ret

Bishop Attacks

Taking advantage of the inner six bits optimization for SISSY bishop attacks, but still much memory and computation compared to other techniques:

U64 bss[6][64][64]; // 192 K

U64 bishopAttacks(U64 occ, enumSquare sq) {
  return
    bss[0][sq][(occ >>  9) & 63] &  // 2nd rank
    bss[1][sq][(occ >> 17) & 63] &  
    bss[2][sq][(occ >> 25) & 63] &  
    bss[3][sq][(occ >> 33) & 63] &  
    bss[4][sq][(occ >> 41) & 63] &  
    bss[5][sq][(occ >> 49) & 63] ;  // 7th rank
}    

One may half the table size by combining occupancy indices of two consecutive ranks, since their relevant bits on diagonal and anti-diagonal are always disjoint.

Rook Attacks

If combined with the rank lookup to dense the occupy index range to 2, considering only blockers on the same file, SISSY file attack generation looks not that competitive compared to other techniques as well, see for instance Kindergarten file attacks or Hyperbola Quintessence.

U64 rss[6][2][64];  // 6K

U64 fileAttacks(U64 occ, enumSquare sq) { 
  occ >>= (sq & 7); // shift occupancy to A file
  return
    rss[0][(occ >>  8) & 1][sq] &  // 2nd rank
    rss[1][(occ >> 16) & 1][sq] &  
    rss[2][(occ >> 24) & 1][sq] &  
    rss[3][(occ >> 32) & 1][sq] &  
    rss[4][(occ >> 40) & 1][sq] &  
    rss[5][(occ >> 48) & 1][sq] ;  // 7th rank
}      

Forum Posts

References

Up one Level