Пример #1
0
__attribute__((noinline)) void Piccolo80vperm_key_schedule(const u8* masterKey, u8* roundKeys)
{
        /*      Note : master key in rdi and round keys in rsi  */
        /*      __cdecl calling convention                      */
	asm (".intel_syntax noprefix");
	Push_All_Regs();
	
	asm("lea "tostr(PiccoloTcon80_)", [rip + PiccoloTcon80]");

	/* Isolate interleaved k2 and k3 */
	format_input(rdi+4, rdi+14, xmm0, xmm13, xmm14, xmm15);
	/* Shuffle the result */
	asm("pshufb xmm0, [rip + PiccoloKSShufa]");

	/* Isolate interleaved k0 and k1 */
	format_input(rdi, rdi+10, xmm1, xmm13, xmm14, xmm15);
	asm("movdqa xmm3, xmm1");
	/* Shuffle the result */
	asm("pshufb xmm1, [rip + PiccoloKSShufa]");
	/* Get the pre-whitening keys */
	asm("pshufb xmm3, [rip + PiccoloKSShufwa]");

	/* Isolate interleaved k4 */
	format_input(rdi+6, rdi+16, xmm2, xmm13, xmm14, xmm15);
	asm("movdqa xmm4, xmm2");
	/* Shuffle the result */
	asm("pshufb xmm2, [rip + PiccoloKSShufb]");
	/* Get the post-whitening keys */
	asm("pshufb xmm4, [rip + PiccoloKSShufwb]");

	/* Go for the transformed key schedule */
	asm("xor rcx, rcx");
	/* Store the pre-whitening keys */
	asm("movdqa [rsi+rcx], xmm3");
	asm("add rcx, 16");
	KEYSCHED80();
	KEYSCHED80();
	KEYSCHED80();
	KEYSCHED80();
	KEYSCHED80();
	/* Store the post-whitening keys */
	asm("movdqa [rsi+rcx], xmm4");


	Pop_All_Regs();
	asm (".att_syntax noprefix");

	return;
}
Пример #2
0
static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
			   unsigned int cryptlen)
{
	struct crypto_aead *aead = crypto_aead_reqtfm(req);
	struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead);
	struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req);
	struct crypto_cipher *cipher = ctx->cipher;
	unsigned int assoclen = req->assoclen;
	u8 *odata = pctx->odata;
	u8 *idata = pctx->idata;
	int err;

	/* format control data for input */
	err = format_input(odata, req, cryptlen);
	if (err)
		goto out;

	/* encrypt first block to use as start in computing mac  */
	crypto_cipher_encrypt_one(cipher, odata, odata);

	/* format associated data and compute into mac */
	if (assoclen) {
		pctx->ilen = format_adata(idata, assoclen);
		get_data_to_compute(cipher, pctx, req->assoc, req->assoclen);
	} else {
		pctx->ilen = 0;
	}

	/* compute plaintext into mac */
	if (cryptlen)
		get_data_to_compute(cipher, pctx, plain, cryptlen);

out:
	return err;
}
Пример #3
0
void PRESENT80_InitKEY(const unsigned char* userkey){
	ALIGN16(unsigned char tk[16]);
	memset(tk, 0, sizeof(tk));
	memcpy(tk, userkey, 2);
	memcpy(tk+8, userkey, 2);
	dqword lowkey = LOAD(tk);
	memcpy(tk, userkey+2, 8);
	memcpy(tk+8, userkey+2, 8);
	dqword highkey = LOAD(tk);
	rks[0] = highkey;
	format_input(&rks[0]);
	for(int i=0; i<31; i++){
        key_schedule(&highkey, &lowkey, i);
        rks[i+1] = highkey;
        format_input(&rks[i+1]);
	}
}
Пример #4
0
//////////////////////////////// MAIN LAND //////////////////////////////
int main(int argc, char **argv)
{
	char archname[261];
	char **argv_f;
	errno = 0;
	format_input(argc,argv,&argv_f);
	cla_opt_handle(argc,argv_f,archname);
	return 0;
}
Пример #5
0
void PRESENT80_enc(dqword *input, const unsigned char* userkey){
	format_input(input);
	for(int i=0; i<31; i++){
		addRoundKey(input, &rks[i]);
		sBoxLayer(input);
		pLayer(input);
	}
	addRoundKey(input, &rks[31]);
	format_output(input);
}
Пример #6
0
char	*add_some_space(char *command)
{
	char	*new_command;

	new_command = malloc(ft_strlen(command) + 2 * nb_tokens(command) + 1);
	if (new_command == NULL)
		return (NULL);
	format_input(command, new_command);
	free(command);
	return (new_command);
}
Пример #7
0
TokenizerT *TKCreate( char *separators, char *ts ) {

	// Allocate memory for the TokenizerT object
	TokenizerT *tokenStructP = malloc( sizeof( TokenizerT ) );

	tokenStructP->delimiter = malloc( sizeof(char) * (strlen( separators ) + 1) );
	tokenStructP->inputString = malloc( sizeof(char) * (strlen( ts ) + 1 ) );
	
	// Initialize Variables
	copy_string(tokenStructP->delimiter, separators);
	copy_string(tokenStructP->inputString, ts);

	// Format escape commands into single byte characters
	format_input(tokenStructP->delimiter);
	format_input(tokenStructP->inputString);

	if(&tokenStructP->inputString != &ts) {
		return tokenStructP;	
	}
	else {
  		return NULL;
	}
}
Пример #8
0
int main(int argc, char *argv[]) {
  uchar *input_seqfile;
  uchar *temp;
  uint n_seq, x;
  int l;
  FILE *indfile, *strfile, *binfile;

  configure_logmsg(MSG_DEBUG1);
  parse_arguments(&input_seqfile, argc, argv);
  configure_logmsg(verbosity_level);

  logmsg(MSG_INFO,"Output basename set to %s\n",output_basename);

  l = strlen(output_basename);
  MA(temp, (l+6)*sizeof(char));

  strcpy(temp, output_basename);
  strcat(temp, ".ind");
  indfile = openfile(temp, "w", "sequence index file");
  x = INDFILE_MAGIC;
  fwrite(&x, sizeof(uint), 1, indfile);
  
  strcpy(temp, output_basename);
  strcat(temp, ".seq");
  strfile = openfile(temp, "w", "sequence string file");
  x = STRFILE_MAGIC;
  fwrite(&x, sizeof(uint), 1, strfile);

  strcpy(temp, output_basename);
  strcat(temp, ".sbin");
  binfile = openfile(temp, "w", "sequence binary file");
  x = BINFILE_MAGIC;
  fwrite(&x, sizeof(uint), 1, binfile);
  
  n_seq = format_input(input_seqfile, indfile, strfile, binfile);
  fclose(indfile);
  fclose(strfile);
  fclose(binfile);
  logmsg(MSG_INFO,"%d sequences formatted\n",n_seq);

  return 0;
}
Пример #9
0
/* main */
int main() {

    bool    exit_flag = false;
    int     oldest_history = HISTORY_SIZE - 1,
            status;
    char    *username,
            input[MAX_INPUT_SIZE],
            history[HISTORY_SIZE][MAX_INPUT_SIZE] = {'\0'},
                    *tokens[CMD_MAX];

    signal(SIGINT, handle_sigint);
    //shell loop
    while(!exit_flag) {

        //print the prompt and get the input
        printf("%s> " , get_username());
        fgets(input, MAX_INPUT_SIZE, stdin);
        format_input(input);

        //built-in exit function
        if(!strcmp(input, "exit")) exit_flag = true;

        //built-in history function
        else if(!strcmp(input, "history")) {
            int i;
            for(i = oldest_history+1; i!=oldest_history; i = (i+1)%HISTORY_SIZE) {
                if(history[i][0] != '\0') printf("%s\n", history[i]);
            }
        }

        //non-built-in function
        else if(strcmp(input, "")) {
            int     token_index,
                    num_tokens;
            pid_t   pid;
            char   *tokens[CMD_MAX];

            //get the token list and size
            num_tokens = make_tokenlist(input, tokens);

            //fork a child process
            pid = fork();

            //parent: wait for child to finish
            if(pid >  0) {
                pid = wait(&status);
            }

            //child: execute command
            if(pid == 0) {

                //redirect output
                token_index = contains_token(tokens, num_tokens, ">");
                if(token_index >= 0 && token_index+1 < num_tokens) {
                    int fd_o = open(tokens[token_index+1], O_CREAT|O_TRUNC|O_WRONLY, 0644);
                    if(dup2(fd_o, STDOUT_FILENO)<0) {
                        perror("dup2 error");
                        exit(EXIT_FAILURE);
                    }
                    close(fd_o);

                    //remove ">" and the output file from the list of arguments
                    tokens[token_index  ] = '\0';
                    tokens[token_index+1] = '\0';
                }

                //redirect input
                token_index = contains_token(tokens, num_tokens, "<");
                if(token_index >= 0 && token_index+1 < num_tokens) {
                    int fd_i = open(tokens[token_index+1], O_RDONLY);
                    dup2(fd_i, STDIN_FILENO);
                    close(fd_i);

                    //remove "<" and the input file from the list of arguments
                    tokens[token_index  ] = '\0';
                    tokens[token_index+1] = '\0';
                }

                //handle pipe
                int num_pipes = count_pipes(tokens, num_tokens);
                if(num_pipes > 0) {

                    //get the indices of the pipes to extract the commands
                    int pipe_indices[CMD_MAX];
                    get_pipe_indices(tokens, num_tokens, pipe_indices);

                    //extract all the commands
                    char *commands[CMD_MAX][CMD_MAX];
                    for(int i=0; i<=num_pipes; i++) {
                        extract_tokens(tokens, commands[i], pipe_indices[i]+1, pipe_indices[i+1]);
                    }

                    //connect and run all the pipe commands
                    int fds[2],
                        in = STDIN_FILENO;
                    for(int i=0; i<num_pipes; i++) {
                        pipe(fds);
                        execute_piped_process(in, fds[1], commands[i]);
                        close(fds[1]);
                        in = fds[0];
                    }

                    //execute the last command
                    if(in != STDIN_FILENO) dup2(in, STDIN_FILENO);
                    execvp(commands[num_pipes][0], commands[num_pipes]);
                }

                //execute command
                execvp(tokens[0], tokens);
                printf("%s: invalid command\n", tokens[0]);
                exit(EXIT_FAILURE);
            }
        }

        //update history
        if(strcmp(input, "") && status==0) {
            strcpy( history[oldest_history], input );
            oldest_history--;
            if(oldest_history < 0) oldest_history = HISTORY_SIZE - 1;
        }

    }

}
Пример #10
0
/* the gateway function */
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {

	int i;
	int out_samples;
	mxArray *outcell;

	if (nrhs != 2 && nrhs != 1) {
		help();
		return;
	}

	if (nrhs == 2) {
		parse_parameters(CTRL);
	}

	if (initialize()<0) {
		mexErrMsgTxt("Error in module initialize() function\n");
	}

	allocate_memory();

	for (i=0;i<2;i++) {
		array_dims[i]=nof_output_itf;
	}

	if (nof_input_itf > 0) {
		fill_input_lengths(IN);
		format_input(IN);
	}

	for (i=0;i<nof_input_itf;i++) {
		input_ptr[i] = &input_buffer[i*input_max_samples*input_sample_sz];
	}
	for (i=0;i<nof_output_itf;i++) {
		output_ptr[i] = &output_buffer[i*output_max_samples*output_sample_sz];
	}
	out_samples = work(input_ptr, output_ptr);

	if (out_samples < 0) {
		mexErrMsgTxt("Error in module work() function\n");
	}

	if (stop()<0) {
		mexErrMsgTxt("Error in module stop() function\n");
	}

	for (i=0;i<nof_output_itf;i++) {
		if (!output_len[i]) {
			output_len[i] = out_samples;
		}
	}

	if (nof_output_itf>1) {
		OUT=mxCreateCellMatrix(nof_output_itf,1);

		for (i=0;i<nof_output_itf;i++) {
			save_output(&outcell,&output_buffer[i*output_max_samples*output_sample_sz],output_len[i]);
			mxSetCell(OUT,i,outcell);
		}
	} else {
		save_output(&OUT,&output_buffer[0],output_len[0]);
	}

    return;
}
Пример #11
0
/* LED main encryption block: it supposes that the scheduled
   keys are in memory pointed by the second argument */
__attribute__((noinline)) void LED128vperm_core(const u8* message,  const u8* subkeys, u8* ciphertext)
{
    /*      Note : message is in rdi, subkeys in rsi and ciphertext in rdx  */
    /*      __cdecl calling convention                                      */
    asm (".intel_syntax noprefix");
    \
    Push_All_Regs();
    /* Key Index */
    asm("xor "tostr(RoundCounter_)", "tostr(RoundCounter_)"");
    asm("lea "tostr(RCBase_)", [rip + RC128_LED]");
    /* Load constants (TBoxes) */
    asm("movdqa  "tostr(T1a_LED_)", [rip + T1a_LED]");
    asm("movdqa  "tostr(T1b_LED_)", [rip + T1b_LED]");
    asm("movdqa  "tostr(T2a_LED_)", [rip + T2a_LED]");
    asm("movdqa  "tostr(T2b_LED_)", [rip + T2b_LED]");
    asm("movdqa  "tostr(T3a_LED_)", [rip + T3a_LED]");
    asm("movdqa  "tostr(T3b_LED_)", [rip + T3b_LED]");
    asm("movdqa  "tostr(T4a_LED_)", [rip + T4a_LED]");
    asm("movdqa  "tostr(T4b_LED_)", [rip + T4b_LED]");

    /* Load the the And mask */
    asm("movdqa  "tostr(AndMask_LED_)", [rip + AndMask_LED]");
    asm("movdqa  "tostr(OrMask_LED_)", [rip + OrMask_LED]");
    /* Load the message */
    asm("movdqa  "tostr(State_)", ["tostr(Plaintext_)"]");

    /* Transform the message from line wise to column wise */
    format_input(State_);
    /* Transform the keys from line wise to column wise */
    asm("movdqa "tostr(Tmp4_)", ["tostr(Keys_)"]");
    format_input(Tmp4_);
    asm("movdqa ["tostr(Keys_)"], "tostr(Tmp4_)"");
    asm("movdqa "tostr(Tmp4_)", ["tostr(Keys_)"+16]");
    format_input(Tmp4_);
    asm("movdqa ["tostr(Keys_)"+16], "tostr(Tmp4_)"");
    /* Interleave the half keys */
    asm("mov rax, ["tostr(Keys_)"+8]");
    asm("mov rbx, ["tostr(Keys_)"+16]");
    asm("mov ["tostr(Keys_)"+16], rax");
    asm("mov ["tostr(Keys_)"+8], rbx");

    asm("pxor "tostr(State_)", ["tostr(Keys_)"]");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();
    asm("pxor "tostr(State_)", ["tostr(Keys_)"+16]");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();

    asm("pxor "tostr(State_)", ["tostr(Keys_)"]");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();
    asm("pxor "tostr(State_)", ["tostr(Keys_)"+16]");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();

    asm("pxor "tostr(State_)", ["tostr(Keys_)"]");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();
    asm("pxor "tostr(State_)", ["tostr(Keys_)"+16]");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();

    asm("pxor "tostr(State_)", ["tostr(Keys_)"]");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();
    asm("pxor "tostr(State_)", ["tostr(Keys_)"+16]");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();

    asm("pxor "tostr(State_)", ["tostr(Keys_)"]");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();
    asm("pxor "tostr(State_)", ["tostr(Keys_)"+16]");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();

    asm("pxor "tostr(State_)", ["tostr(Keys_)"]");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();
    asm("pxor "tostr(State_)", ["tostr(Keys_)"]+16");
    LEDROUND();
    LEDROUND();
    LEDROUND();
    LEDROUND();
    asm("pxor "tostr(State_)", ["tostr(Keys_)"]");

    /* Transform the message back from column wise to line wise */
    format_output(State_);

    /* Move back the result in the input message */
    asm("movdqa ["tostr(Ciphertext_)"], "tostr(State_)"");

    Pop_All_Regs();
    asm (".att_syntax noprefix");

    return;
}
Пример #12
0
/* Piccolo main encryption block: it supposes that the scheduled 
   keys are in memory pointed by the second argument */
__attribute__((noinline)) void Piccolo80vperm_core(const u8* message, const u8* subkeys, u8* ciphertext)
{ 	
	/*	Note : message is in rdi, subkeys in rsi and ciphertext in rdx 	*/
	/*	__cdecl calling convention		                        */	
	asm (".intel_syntax noprefix");
	Push_All_Regs();
	/* Key Index */
	asm("xor "tostr(RoundCounter_)", "tostr(RoundCounter_)"");
	/* Load constants (SBoxes, multiplications ...) */
	asm("movdqa  "tostr(PiccoloSBoxL_)", [rip + PiccoloSBoxL]");
	asm("movdqa  "tostr(PiccoloSBoxH_)", [rip + PiccoloSBoxH]");
	asm("movdqa  "tostr(TwoMulPiccoloSBoxL_)", [rip + TwoMulPiccoloSBoxL]");
	asm("movdqa  "tostr(ThreeMulPiccoloSBoxL_)", [rip + ThreeMulPiccoloSBoxL]");
	asm("movdqa  "tostr(PiccoloThreeShuf_)", [rip + PiccoloThreeShuf]");
	asm("movdqa  "tostr(PiccoloOneShufa_)", [rip + PiccoloOneShufa]");
	asm("movdqa  "tostr(PiccoloOneShufb_)", [rip + PiccoloOneShufb]");

	/* Load the masks */
	asm("movdqa  "tostr(PiccoloAndMaskL_)", [rip + PiccoloAndMaskL]");

	/* Load Piccolo's Round Permutation PiccoloRP */
	asm("movdqa  "tostr(PiccoloRP_)", [rip + PiccoloRP]");

	/* Scheduled keys from [rsi] and above */
	/* Load the messages and format them */
	format_input(Plaintext_, Plaintext_+8, State_, Tmp1_, Tmp2_, Tmp3_);

	/* Pre Whitening AddRoundKey */
	asm("pxor "tostr(State_)", ["tostr(Keys_)"+"tostr(RoundCounter_)"]");
	asm("add     "tostr(RoundCounter_)", 16");

	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();

	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();

	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();

	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();

	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();
	PiccoloROUND();
	PiccoloLASTROUND();

	/* Post Whitening AddRoundKey */
	asm("pxor "tostr(State_)", ["tostr(Keys_)"+"tostr(RoundCounter_)"]");

	/* Move back the result in the input message, formatted */
	format_output(Ciphertext_, State_, Tmp1_, Tmp2_, Tmp3_);

	Pop_All_Regs();
	asm (".att_syntax noprefix");

	return;
}
Пример #13
0
__attribute__((noinline)) void Piccolo128vperm_key_schedule(const u8* masterKey, u8* roundKeys)
{
	/*	Note : master key in rdi and round keys in rsi 	*/
	/*	__cdecl calling convention			*/	
	asm (".intel_syntax noprefix");
	Push_All_Regs();
	
	asm("lea "tostr(PiccoloTcon128_)", [rip + PiccoloTcon128]");
	/* Interleave keys */
	format_input(rdi, rdi+16, xmm11, xmm13, xmm14, xmm15);
	format_input(rdi+8, rdi+24, xmm12, xmm13, xmm14, xmm15);
#ifdef AVX
	/* Pre-Whitening Key */
	asm("vpshufb xmm0, xmm11, [rip + PiccoloKSShufw0]");
	asm("movdqa [rsi], xmm0");
	/* Post-Whitening Key */
	asm("vpshufb xmm1, xmm12, [rip + PiccoloKSShufw1]");
	asm("movdqa [rsi+512], xmm1");
	/* Even keys */
	/* --------- */
	/* Isolate interleaved k0 */
	asm("vpshufb xmm0, xmm11, [rip + PiccoloKSShuf0]");
	/* Isolate interleaved k2 */
	asm("vpshufb xmm2, xmm11, [rip + PiccoloKSShuf2]");
	/* Isolate interleaved k4 */
	asm("vpshufb xmm4, xmm12, [rip + PiccoloKSShuf0]");
	/* Isolate interleaved k6 */
	asm("vpshufb xmm6, xmm12, [rip + PiccoloKSShuf2]");
	/* Odd keys */
	/* -------- */
	/* Isolate interleaved k1 */
	asm("vpshufb xmm1, xmm11, [rip + PiccoloKSShuf1]");
	/* Isolate interleaved k3 */
	asm("vpshufb xmm3, xmm11, [rip + PiccoloKSShuf3]");
	/* Isolate interleaved k5 */
	asm("vpshufb xmm5, xmm12, [rip + PiccoloKSShuf1]");
	/* Isolate interleaved k7 */
	asm("vpshufb xmm7, xmm12, [rip + PiccoloKSShuf3]");

	/* Compute the pairs */
	/* 2 - 3 */
	asm("vpxor   xmm13, xmm2, xmm3");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"]");	
	asm("movdqa [rsi+16], xmm13");
	/* 4 - 5 */
	asm("vpxor   xmm13, xmm4, xmm5");
	asm("mov    rcx, 2*2");
	asm("vpxor   xmm8, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*7");
	asm("vpxor   xmm9, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	asm("mov    rcx, 2*9");
	asm("vpxor   xmm10, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm10");
	/* 6 - 7 */
	asm("vpxor  xmm13, xmm6, xmm7");
	asm("mov    rcx, 2*3");
	asm("vpxor  xmm8, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*5");
	asm("vpxor   xmm9, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	/* 2 - 1 */
	asm("vpxor  xmm13, xmm2, xmm1");
	asm("mov    rcx, 2*4");
	asm("vpxor  xmm8, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*20");
	asm("vpxor  xmm9, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	/* 0 - 3 */
	asm("vpxor  xmm13, xmm0, xmm3");
	asm("mov    rcx, 2*6");
	asm("vpxor  xmm8, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*11");
	asm("vpxor  xmm9, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	asm("mov    rcx, 2*13");
	asm("vpxor  xmm10, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm10");
	/* 6 - 1 */
	asm("vpxor  xmm13, xmm6, xmm1");
	asm("mov    rcx, 2*8");
	asm("vpxor   xmm8, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*24");
	asm("vpxor   xmm9, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	/* 2 - 7 */
	asm("vpxor  xmm13, xmm2, xmm7");
	asm("mov    rcx, 2*10");
	asm("vpxor  xmm8, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*15");
	asm("vpxor  xmm9, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	asm("mov    rcx, 2*17");
	asm("vpxor  xmm10, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm10");
	/* 4 - 1 */
	asm("vpxor  xmm13, xmm4, xmm1");
	asm("mov    rcx, 2*12");
	asm("vpxor  xmm8, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*28");
	asm("vpxor  xmm9, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	/* 6 - 5 */
	asm("vpxor  xmm13, xmm6, xmm5");
	asm("mov    rcx, 2*14");
	asm("vpxor  xmm8, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*19");
	asm("vpxor  xmm9, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	asm("mov    rcx, 2*21");
	asm("vpxor  xmm10, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm10");
	/* 0 - 1 */
	asm("vpxor  xmm13, xmm0, xmm1");
	asm("mov    rcx, 2*16");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	/* 4 - 3 */
	asm("vpxor  xmm13, xmm4, xmm3");
	asm("mov    rcx, 2*18");
	asm("vpxor  xmm8, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*23");
	asm("vpxor  xmm9, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	asm("mov    rcx, 2*25");
	asm("vpxor  xmm10, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm10");	
	/* 0 - 7 */
	asm("vpxor  xmm13, xmm0, xmm7");
	asm("mov    rcx, 2*22");
	asm("vpxor  xmm8, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*27");
	asm("vpxor  xmm9, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	asm("mov    rcx, 2*29");
	asm("vpxor  xmm10, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm10");
	/* 2 - 5 */
	asm("vpxor  xmm13, xmm2, xmm5");
	asm("mov    rcx, 2*26");
	asm("vpxor  xmm8, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*31");
	asm("vpxor   xmm9, xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	/* 6 - 3 */
	asm("vpxor  xmm13, xmm6, xmm3");
	asm("mov    rcx, 2*30");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
#else
	/* Pre-Whitening Key */
	asm("movdqa xmm0, xmm11");
	asm("pshufb xmm0, [rip + PiccoloKSShufw0]");
	asm("movdqa [rsi], xmm0");
	/* Post-Whitening Key */
	asm("movdqa xmm1, xmm12");
	asm("pshufb xmm1, [rip + PiccoloKSShufw1]");
	asm("movdqa [rsi+512], xmm1");
	/* Even keys */
	/* --------- */
	/* Isolate interleaved k0 */
	asm("movdqa xmm0, xmm11");
	asm("pshufb xmm0, [rip + PiccoloKSShuf0]");
	/* Isolate interleaved k2 */
	asm("movdqa xmm2, xmm11");
	asm("pshufb xmm2, [rip + PiccoloKSShuf2]");
	/* Isolate interleaved k4 */
	asm("movdqa xmm4, xmm12");
	asm("pshufb xmm4, [rip + PiccoloKSShuf0]");
	/* Isolate interleaved k6 */
	asm("movdqa xmm6, xmm12");
	asm("pshufb xmm6, [rip + PiccoloKSShuf2]");
	/* Odd keys */
	/* -------- */
	/* Isolate interleaved k1 */
	asm("movdqa xmm1, xmm11");
	asm("pshufb xmm1, [rip + PiccoloKSShuf1]");
	/* Isolate interleaved k3 */
	asm("movdqa xmm3, xmm11");
	asm("pshufb xmm3, [rip + PiccoloKSShuf3]");
	/* Isolate interleaved k5 */
	asm("movdqa xmm5, xmm12");
	asm("pshufb xmm5, [rip + PiccoloKSShuf1]");
	/* Isolate interleaved k7 */
	asm("movdqa xmm7, xmm12");
	asm("pshufb xmm7, [rip + PiccoloKSShuf3]");

	/* Compute the pairs */
	/* 2 - 3 */
	asm("movdqa xmm13, xmm2");
	asm("movdqa xmm14, xmm3");
	asm("pxor   xmm13, xmm14");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"]");	
	asm("movdqa [rsi+16], xmm13");
	/* 4 - 5 */
	asm("movdqa xmm13, xmm4");
	asm("movdqa xmm14, xmm5");
	asm("pxor   xmm13, xmm14");
	asm("movdqa xmm8, xmm13");
	asm("movdqa xmm9, xmm13");
	asm("mov    rcx, 2*2");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	asm("mov    rcx, 2*7");
	asm("pxor   xmm8, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*9");
	asm("pxor   xmm9, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	/* 6 - 7 */
	asm("movdqa xmm13, xmm6");
	asm("movdqa xmm14, xmm7");
	asm("pxor   xmm13, xmm14");
	asm("movdqa xmm8, xmm13");
	asm("mov    rcx, 2*3");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	asm("mov    rcx, 2*5");
	asm("pxor   xmm8, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	/* 2 - 1 */
	asm("movdqa xmm13, xmm2");
	asm("movdqa xmm14, xmm1");
	asm("pxor   xmm13, xmm14");
	asm("movdqa xmm8, xmm13");
	asm("mov    rcx, 2*4");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	asm("mov    rcx, 2*20");
	asm("pxor   xmm8, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	/* 0 - 3 */
	asm("movdqa xmm13, xmm0");
	asm("movdqa xmm14, xmm3");
	asm("pxor   xmm13, xmm14");
	asm("movdqa xmm8, xmm13");
	asm("movdqa xmm9, xmm13");
	asm("mov    rcx, 2*6");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	asm("mov    rcx, 2*11");
	asm("pxor   xmm8, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*13");
	asm("pxor   xmm9, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	/* 6 - 1 */
	asm("movdqa xmm13, xmm6");
	asm("movdqa xmm14, xmm1");
	asm("pxor   xmm13, xmm14");
	asm("movdqa xmm8, xmm13");
	asm("mov    rcx, 2*8");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	asm("mov    rcx, 2*24");
	asm("pxor   xmm8, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	/* 2 - 7 */
	asm("movdqa xmm13, xmm2");
	asm("movdqa xmm14, xmm7");
	asm("pxor   xmm13, xmm14");
	asm("movdqa xmm8, xmm13");
	asm("movdqa xmm9, xmm13");
	asm("mov    rcx, 2*10");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	asm("mov    rcx, 2*15");
	asm("pxor   xmm8, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*17");
	asm("pxor   xmm9, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	/* 4 - 1 */
	asm("movdqa xmm13, xmm4");
	asm("movdqa xmm14, xmm1");
	asm("pxor   xmm13, xmm14");
	asm("movdqa xmm8, xmm13");
	asm("mov    rcx, 2*12");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	asm("mov    rcx, 2*28");
	asm("pxor   xmm8, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	/* 6 - 5 */
	asm("movdqa xmm13, xmm6");
	asm("movdqa xmm14, xmm5");
	asm("pxor   xmm13, xmm14");
	asm("movdqa xmm8, xmm13");
	asm("movdqa xmm9, xmm13");
	asm("mov    rcx, 2*14");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	asm("mov    rcx, 2*19");
	asm("pxor   xmm8, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*21");
	asm("pxor   xmm9, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	/* 0 - 1 */
	asm("movdqa xmm13, xmm0");
	asm("movdqa xmm14, xmm1");
	asm("pxor   xmm13, xmm14");
	asm("mov    rcx, 2*16");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	/* 4 - 3 */
	asm("movdqa xmm13, xmm4");
	asm("movdqa xmm14, xmm3");
	asm("pxor   xmm13, xmm14");
	asm("movdqa xmm8, xmm13");
	asm("movdqa xmm9, xmm13");
	asm("mov    rcx, 2*18");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	asm("mov    rcx, 2*23");
	asm("pxor   xmm8, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*25");
	asm("pxor   xmm9, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");	
	/* 0 - 7 */
	asm("movdqa xmm13, xmm0");
	asm("movdqa xmm14, xmm7");
	asm("pxor   xmm13, xmm14");
	asm("movdqa xmm8, xmm13");
	asm("movdqa xmm9, xmm13");
	asm("mov    rcx, 2*22");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	asm("mov    rcx, 2*27");
	asm("pxor   xmm8, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	asm("mov    rcx, 2*29");
	asm("pxor   xmm9, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm9");
	/* 2 - 5 */
	asm("movdqa xmm13, xmm2");
	asm("movdqa xmm14, xmm5");
	asm("pxor   xmm13, xmm14");
	asm("movdqa xmm8, xmm13");
	asm("mov    rcx, 2*26");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
	asm("mov    rcx, 2*31");
	asm("pxor   xmm8, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm8");
	/* 6 - 3 */
	asm("movdqa xmm13, xmm6");
	asm("movdqa xmm14, xmm3");
	asm("pxor   xmm13, xmm14");
	asm("mov    rcx, 2*30");
	asm("pxor   xmm13, ["tostr(PiccoloTcon128_)"+8*rcx-16]");
	asm("movdqa [rsi+8*rcx], xmm13");
#endif
	Pop_All_Regs();
	asm (".att_syntax noprefix");

	return;
}