Discussion:
[A51] Query on Kraken
Ajay Fuloria
2016-04-07 12:05:47 UTC
Permalink
Hi!!

I went through the kraken code and I must commend you for your work. I was
trying to understand the code so that I came make use of it.

I am facing problem in understanding the CalcTable() function in A5cpu.cpp
... You create a mask lookup in this function. Can you take some time out
and let me know how you are using these masks for keysearching and also why
are you looping 25 + 8 + 8 times in Process() function.

Thanks and regards,

Ajay


For your quick reference the code of these two functions is as below :


void A5Cpu::CalcTables(void)
{
/* Calculate clocking table */
for(int i=0; i< 16 ; i++) {
for(int j=0; j< 16 ; j++) {
for(int k=0; k< 16 ; k++) {
/* Copy input */
int m1 = i;
int m2 = j;
int m3 = k;
/* Generate masks */
int cm1 = 0;
int cm2 = 0;
int cm3 = 0;
/* Counter R2 */
int r2count = 0;
for (int l = 0; l < 4 ; l++ ) {
cm1 = cm1 << 1;
cm2 = cm2 << 1;
cm3 = cm3 << 1;
int maj = ((m1>>3)+(m2>>3)+(m3>>3))>>1;
if ((m1>>3)==maj) {
m1 = (m1<<1)&0x0f;
cm1 |= 0x01;
}
if ((m2>>3)==maj) {
m2 = (m2<<1)&0x0f;
cm2 |= 0x01;
r2count++;
}
if ((m3>>3)==maj) {
m3 = (m3<<1)&0x0f;
cm3 |= 0x01;
}
}
// printf( "%x %x %x -> %x:%x:%x\n", i,j,k, cm1, cm2, cm3);
int index = i*16*16+j*16+k;
mClockMask[index] = (r2count<<12) | (cm1<<8) | (cm2<<4) |
cm3;
}
}
}

/* Calculate 111000 + clock mask table */
for (int i=0; i < 64 ; i++ ) {
for(int j=0; j<16; j++) {
int count = PopcountNibble(j);
int feedback = 0;
int data = i;
for (int k=0; k<count; k++) {
feedback = feedback << 1;
int v = (data>>5) ^ (data>>4) ^ (data>>3);
data = data << 1;
feedback ^= (v&0x01);
}
data = i;
int mask = j;
int output = 0;
for (int k=0; k<4; k++) {
output = (output<<1) ^ ((data>>5)&0x01);
if (mask&0x08) {
data = data << 1;
}
mask = mask << 1;
}
int index = i * 16 + j;
mTable6bit[index] = (feedback<<4) | output;
// printf("%02x:%x -> %x %x\n", i,j,feedback, output);
}
}

/* Calculate 11000 + clock mask table */
for (int i=0; i < 32 ; i++ ) {
for(int j=0; j<16; j++) {
int count = PopcountNibble(j);
int feedback = 0;
int data = i;
for (int k=0; k<count; k++) {
feedback = feedback << 1;
int v = (data>>4) ^ (data>>3);
data = data << 1;
feedback ^= (v&0x01);
}
data = i;
int mask = j;
int output = 0;
for (int k=0; k<4; k++) {
output = (output<<1) ^ ((data>>4)&0x01);
if (mask&0x08) {
data = data << 1;
}
mask = mask << 1;
}
int index = i * 16 + j;
mTable5bit[index] = (feedback<<4) | output;
// printf("%02x:%x -> %x %x\n", i,j,feedback, output);
}
}

/* Calculate 1000 + clock mask table */
for (int i=0; i < 16 ; i++ ) {
for(int j=0; j<16; j++) {
int count = PopcountNibble(j);
int feedback = 0;
int data = i;
for (int k=0; k<count; k++) {
feedback = feedback << 1;
int v = (data>>3);
data = data << 1;
feedback ^= (v&0x01);
}
int index = i * 16 + j;
mTable4bit[index] = (count<<4)|feedback;
// printf("%02x:%x -> %x\n", i,j,feedback );
}
}
}



---------------------------------------------------------


void A5Cpu::Process(void)
{
bool active = false;
struct timeval tStart;
struct timeval tEnd;

uint64_t start_point;
uint64_t target;
uint64_t start_point_r;
int32_t start_round;
int32_t stop_round;
uint32_t advance;
const uint32_t* RFtable;
void* context;

for(;;) {
if (!mRunning) break;

/* Get input */
sem_wait(&mMutex);
if (mInputStart.size()) {
start_point = mInputStart.front();
mInputStart.pop_front();
target = mInputTarget.front();
mInputTarget.pop_front();
start_point_r = ReverseBits(start_point);
start_round = mInputRound.front();
mInputRound.pop_front();
stop_round = mInputRoundStop.front();
mInputRoundStop.pop_front();
advance = mInputAdvance.front();
mInputAdvance.pop_front();
context = mInputContext.front();
mInputContext.pop_front();
map< uint32_t, class Advance* >::iterator it =
mAdvances.find(advance);
if (it==mAdvances.end()) {
class Advance* adv = new Advance(advance, mMaxRound);
mAdvances[advance] = adv;
RFtable = adv->getRFtable();
} else {
RFtable = (*it).second->getRFtable();
}
active = true;
// printf("Insert\n");
}
sem_post(&mMutex);

if (!active) {
/* Don't use CPU while idle */
usleep(250);
continue;
}

gettimeofday( &tStart, NULL );
/* Do something */
unsigned int out_hi = start_point_r>>32;
unsigned int out_lo = start_point_r;

unsigned int target_lo = target;
unsigned int target_hi = target >> 32;

unsigned int last_key_lo;
unsigned int last_key_hi;

bool keysearch = (target != 0ULL);

for (int round=start_round; round < stop_round; ) {
out_lo = out_lo ^ RFtable[2*round];////not convibced
out_hi = out_hi ^ RFtable[2*round+1];

if ((out_hi>>mCondition)==0) { // check for distinguished points
else new round
// uint64_t res = (((uint64_t)out_hi)<<32)|out_lo;
// res = ReverseBits(res);
// printf("New round %i %016llx %08x:%08x\n", round, res,
out_hi, out_lo);
round++;
if (round>=stop_round) break;
}

unsigned int lfsr1 = out_lo;
unsigned int lfsr2 = (out_hi << 13) | (out_lo >> 19);
unsigned int lfsr3 = out_hi >> 9;

last_key_hi = out_hi;
last_key_lo =out_lo;

for (int i=0; i<25 ; i++) {
int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) |
((lfsr3>>7)&0xf);
int masks = mClockMask[clocks];

/* lfsr1 */
unsigned int tmask = (masks>>8)&0x0f;
unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);

/* lfsr2 */
tmask = (masks>>4)&0x0f;
tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
out_hi = out_hi ^ (tval&0x0f);
lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);

/* lfsr3 */
tmask = masks & 0x0f;
tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
}
for (int i=0; i<8 ; i++) {
int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) |
((lfsr3>>7)&0xf);
int masks = mClockMask[clocks];

/* lfsr1 */
unsigned int tmask = (masks>>8)&0x0f;
unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
out_hi = (out_hi << 4) | (tval&0x0f);
unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);

/* lfsr2 */
tmask = (masks>>4)&0x0f;
tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
out_hi = out_hi ^ (tval&0x0f);
lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);

/* lfsr3 */
tmask = masks & 0x0f;
tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
out_hi = out_hi ^ (tval&0x0f);
tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
}
for (int i=0; i<8 ; i++) {
int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) |
((lfsr3>>7)&0xf);
int masks = mClockMask[clocks];

/* lfsr1 */
unsigned int tmask = (masks>>8)&0x0f;
unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
out_lo = (out_lo << 4) | (tval&0x0f);
unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);

/* lfsr2 */
tmask = (masks>>4)&0x0f;
tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
out_lo = out_lo ^ (tval&0x0f);
lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);

/* lfsr3 */
tmask = masks & 0x0f;
tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
out_lo = out_lo ^ (tval&0x0f);
tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
}
if (keysearch&&(target_hi==out_hi)&&(target_lo==out_lo)) {
/* report key as finishing state */
out_hi = last_key_hi;
out_lo = last_key_lo;
start_round = -1;
break;
}
}


/////////////////////////////
gettimeofday( &tEnd, NULL );
unsigned int uSecs = 1000000 * (tEnd.tv_sec - tStart.tv_sec);
uSecs += (tEnd.tv_usec - tStart.tv_usec);

// printf("Completed in %i ms\n", uSecs/1000);

/* Report completed chains */
sem_wait(&mMutex);

uint64_t res = (((uint64_t)out_hi)<<32)|out_lo;
res = ReverseBits(res);
mOutput.push( pair<uint64_t,uint64_t>(start_point,res) );
mOutputStartRound.push( start_round );
mOutputContext.push( context );
active = false;

sem_post(&mMutex);

}
}
--
Ajay Fuloria
merlinsignals.blogspot.in
ᐧ
Jan Hrach
2016-04-07 13:39:48 UTC
Permalink
and also why are you looping 25 + 8 + 8 times in Process() function
It looks like he is computing 4 iterations of A5/1 in each cycle, and the tables were generated with 100 dummy clockings (25*4 = 100) and then you need 64 bits of keystream ((8+8)*4).
Can you take some time out and let me know how you are using these masks for keysearching
This is some optimization how to compute 4 iterations in one cycle.
It would be interesting to test whether it makes sense on current SIMD hardware, or if bitslicing is faster.
Hi!!
I went through the kraken code and I must commend you for your work. I was trying to understand the code so that I came make use of it.
I am facing problem in understanding the CalcTable() function in A5cpu.cpp ... You create a mask lookup in this function. Can you take some time out and let me know how you are using these masks for keysearching and also why are you looping 25 + 8 + 8 times in Process() function.
Thanks and regards,
Ajay
void A5Cpu::CalcTables(void)
{
/* Calculate clocking table */
for(int i=0; i< 16 ; i++) {
for(int j=0; j< 16 ; j++) {
for(int k=0; k< 16 ; k++) {
/* Copy input */
int m1 = i;
int m2 = j;
int m3 = k;
/* Generate masks */
int cm1 = 0;
int cm2 = 0;
int cm3 = 0;
/* Counter R2 */
int r2count = 0;
for (int l = 0; l < 4 ; l++ ) {
cm1 = cm1 << 1;
cm2 = cm2 << 1;
cm3 = cm3 << 1;
int maj = ((m1>>3)+(m2>>3)+(m3>>3))>>1;
if ((m1>>3)==maj) {
m1 = (m1<<1)&0x0f;
cm1 |= 0x01;
}
if ((m2>>3)==maj) {
m2 = (m2<<1)&0x0f;
cm2 |= 0x01;
r2count++;
}
if ((m3>>3)==maj) {
m3 = (m3<<1)&0x0f;
cm3 |= 0x01;
}
}
// printf( "%x %x %x -> %x:%x:%x\n", i,j,k, cm1, cm2, cm3);
int index = i*16*16+j*16+k;
mClockMask[index] = (r2count<<12) | (cm1<<8) | (cm2<<4) | cm3;
}
}
}
/* Calculate 111000 + clock mask table */
for (int i=0; i < 64 ; i++ ) {
for(int j=0; j<16; j++) {
int count = PopcountNibble(j);
int feedback = 0;
int data = i;
for (int k=0; k<count; k++) {
feedback = feedback << 1;
int v = (data>>5) ^ (data>>4) ^ (data>>3);
data = data << 1;
feedback ^= (v&0x01);
}
data = i;
int mask = j;
int output = 0;
for (int k=0; k<4; k++) {
output = (output<<1) ^ ((data>>5)&0x01);
if (mask&0x08) {
data = data << 1;
}
mask = mask << 1;
}
int index = i * 16 + j;
mTable6bit[index] = (feedback<<4) | output;
// printf("%02x:%x -> %x %x\n", i,j,feedback, output);
}
}
/* Calculate 11000 + clock mask table */
for (int i=0; i < 32 ; i++ ) {
for(int j=0; j<16; j++) {
int count = PopcountNibble(j);
int feedback = 0;
int data = i;
for (int k=0; k<count; k++) {
feedback = feedback << 1;
int v = (data>>4) ^ (data>>3);
data = data << 1;
feedback ^= (v&0x01);
}
data = i;
int mask = j;
int output = 0;
for (int k=0; k<4; k++) {
output = (output<<1) ^ ((data>>4)&0x01);
if (mask&0x08) {
data = data << 1;
}
mask = mask << 1;
}
int index = i * 16 + j;
mTable5bit[index] = (feedback<<4) | output;
// printf("%02x:%x -> %x %x\n", i,j,feedback, output);
}
}
/* Calculate 1000 + clock mask table */
for (int i=0; i < 16 ; i++ ) {
for(int j=0; j<16; j++) {
int count = PopcountNibble(j);
int feedback = 0;
int data = i;
for (int k=0; k<count; k++) {
feedback = feedback << 1;
int v = (data>>3);
data = data << 1;
feedback ^= (v&0x01);
}
int index = i * 16 + j;
mTable4bit[index] = (count<<4)|feedback;
// printf("%02x:%x -> %x\n", i,j,feedback );
}
}
}
---------------------------------------------------------
void A5Cpu::Process(void)
{
bool active = false;
struct timeval tStart;
struct timeval tEnd;
uint64_t start_point;
uint64_t target;
uint64_t start_point_r;
int32_t start_round;
int32_t stop_round;
uint32_t advance;
const uint32_t* RFtable;
void* context;
for(;;) {
if (!mRunning) break;
/* Get input */
sem_wait(&mMutex);
if (mInputStart.size()) {
start_point = mInputStart.front();
mInputStart.pop_front();
target = mInputTarget.front();
mInputTarget.pop_front();
start_point_r = ReverseBits(start_point);
start_round = mInputRound.front();
mInputRound.pop_front();
stop_round = mInputRoundStop.front();
mInputRoundStop.pop_front();
advance = mInputAdvance.front();
mInputAdvance.pop_front();
context = mInputContext.front();
mInputContext.pop_front();
map< uint32_t, class Advance* >::iterator it = mAdvances.find(advance);
if (it==mAdvances.end()) {
class Advance* adv = new Advance(advance, mMaxRound);
mAdvances[advance] = adv;
RFtable = adv->getRFtable();
} else {
RFtable = (*it).second->getRFtable();
}
active = true;
// printf("Insert\n");
}
sem_post(&mMutex);
if (!active) {
/* Don't use CPU while idle */
usleep(250);
continue;
}
gettimeofday( &tStart, NULL );
/* Do something */
unsigned int out_hi = start_point_r>>32;
unsigned int out_lo = start_point_r;
unsigned int target_lo = target;
unsigned int target_hi = target >> 32;
unsigned int last_key_lo;
unsigned int last_key_hi;
bool keysearch = (target != 0ULL);
for (int round=start_round; round < stop_round; ) {
out_lo = out_lo ^ RFtable[2*round];////not convibced
out_hi = out_hi ^ RFtable[2*round+1];
if ((out_hi>>mCondition)==0) {// check for distinguished points else new round
// uint64_t res = (((uint64_t)out_hi)<<32)|out_lo;
// res = ReverseBits(res);
// printf("New round %i %016llx %08x:%08x\n", round, res, out_hi, out_lo);
round++;
if (round>=stop_round) break;
}
unsigned int lfsr1 = out_lo;
unsigned int lfsr2 = (out_hi << 13) | (out_lo >> 19);
unsigned int lfsr3 = out_hi >> 9;
last_key_hi = out_hi;
last_key_lo =out_lo;
for (int i=0; i<25 ; i++) {
int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) | ((lfsr3>>7)&0xf);
int masks = mClockMask[clocks];
/* lfsr1 */
unsigned int tmask = (masks>>8)&0x0f;
unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
/* lfsr2 */
tmask = (masks>>4)&0x0f;
tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
out_hi = out_hi ^ (tval&0x0f);
lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);
/* lfsr3 */
tmask = masks & 0x0f;
tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
}
for (int i=0; i<8 ; i++) {
int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) | ((lfsr3>>7)&0xf);
int masks = mClockMask[clocks];
/* lfsr1 */
unsigned int tmask = (masks>>8)&0x0f;
unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
out_hi = (out_hi << 4) | (tval&0x0f);
unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
/* lfsr2 */
tmask = (masks>>4)&0x0f;
tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
out_hi = out_hi ^ (tval&0x0f);
lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);
/* lfsr3 */
tmask = masks & 0x0f;
tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
out_hi = out_hi ^ (tval&0x0f);
tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
}
for (int i=0; i<8 ; i++) {
int clocks = ((lfsr1<<3)&0xf00) | ((lfsr2>>3)&0xf0) | ((lfsr3>>7)&0xf);
int masks = mClockMask[clocks];
/* lfsr1 */
unsigned int tmask = (masks>>8)&0x0f;
unsigned int tval = mTable6bit[((lfsr1>>9)&0x3f0)|tmask];
out_lo = (out_lo << 4) | (tval&0x0f);
unsigned int tval2 = mTable4bit[((lfsr1>>6)&0xf0)|tmask];
lfsr1 = (lfsr1<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
/* lfsr2 */
tmask = (masks>>4)&0x0f;
tval = mTable5bit[((lfsr2>>13)&0x1f0)|tmask];
out_lo = out_lo ^ (tval&0x0f);
lfsr2 = (lfsr2<<(masks>>12))^(tval>>4);
/* lfsr3 */
tmask = masks & 0x0f;
tval = mTable6bit[((lfsr3>>13)&0x3f0)|tmask];
out_lo = out_lo ^ (tval&0x0f);
tval2 = mTable4bit[(lfsr3&0xf0)|tmask];
lfsr3 = (lfsr3<<(tval2>>4))^(tval>>4)^(tval2&0x0f);
}
if (keysearch&&(target_hi==out_hi)&&(target_lo==out_lo)) {
/* report key as finishing state */
out_hi = last_key_hi;
out_lo = last_key_lo;
start_round = -1;
break;
}
}
/////////////////////////////
gettimeofday( &tEnd, NULL );
unsigned int uSecs = 1000000 * (tEnd.tv_sec - tStart.tv_sec);
uSecs += (tEnd.tv_usec - tStart.tv_usec);
// printf("Completed in %i ms\n", uSecs/1000);
/* Report completed chains */
sem_wait(&mMutex);
uint64_t res = (((uint64_t)out_hi)<<32)|out_lo;
res = ReverseBits(res);
mOutput.push( pair<uint64_t,uint64_t>(start_point,res) );
mOutputStartRound.push( start_round );
mOutputContext.push( context );
active = false;
sem_post(&mMutex);
}
}
--
Ajay Fuloria
merlinsignals.blogspot.in <http://merlinsignals.blogspot.in>

_______________________________________________
A51 mailing list
https://lists.srlabs.de/cgi-bin/mailman/listinfo/a51
--
Jan Hrach | http://jenda.hrach.eu/
GPG CD98 5440 4372 0C6D 164D A24D F019 2F8E 6527 282E
Loading...