コード例 #1
0
int main (void)
{
  int i;
  float a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
  float b[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));

  check_vect ();

  /* from bzip2: */
  for (i=0; i<N; i++) b[i] = i;
  a[0] = 0;
  for (i = 1; i <= 256; i++) a[i] = b[i-1];

  /* check results:  */
  for (i = 1; i <= 256; i++)
    {
      if (a[i] != i-1)
	abort ();
    }
  if (a[0] != 0)
    abort ();

  main1 (a);

  return 0;
}
コード例 #2
0
ファイル: vect-35.c プロジェクト: 5432935/crossbridge
int main1 ()
{  
  union {
    char a[N] __attribute__ ((__aligned__(16)));
    char b[N] __attribute__ ((__aligned__(16)));
  } s;
  int i;

  /* Initialization.  */
  for (i = 0; i < N; i++)
    {
      s.b[i] = 3*i;
    }

  /* Can't vectorize - dependence analysis fails cause s.a and s.b may
     overlap.  */
  for (i = 0; i < N; i++)
    {
      s.a[i] = s.b[i] + 1;
    }

  /* check results:  */
  for (i = 0; i < N; i++)
    {
      if (s.a[i] != 3*i + 1)
	abort ();
    }

  return 0;
}
コード例 #3
0
ファイル: vect-60.c プロジェクト: 5432935/crossbridge
int
main1 (int n)
{
  int i;
  float a[N] __attribute__ ((__aligned__(16)));
  float b[N] __attribute__ ((__aligned__(16))) = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57};
  float c[N] __attribute__ ((__aligned__(16))) = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
  float *pa = a;
  float *pb = b;
  float *pc = c;

  for (i = 0; i < n/2; i++)
    {
      pa[i] = pb[i+1] * pc[i+1];
    }

  /* check results:  */
  for (i = 0; i < N/2; i++)
    {
      if (pa[i] != (pb[i+1] * pc[i+1]))
        abort ();
    }

  return 0;
}
コード例 #4
0
ファイル: vect-35-big-array.c プロジェクト: AlexMioMio/gcc
int main1 ()
{
  union {
    unsigned char a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
    unsigned char b[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
  } s;
  int i;

  /* Initialization.  */
  for (i = 0; i < N; i++)
    {
      s.b[i] = i;
    }

  /* Dependence analysis fails cause s.a and s.b may overlap.
     Use runtime aliasing test with versioning.  */
  for (i = 0; i < N; i++)
    {
      s.a[i] = s.b[i] + 1;
    }

  /* check results:  */
  for (i = 0; i < N; i++)
    {
      if (s.a[i] != i + 1)
	abort ();
    }

  return 0;
}
コード例 #5
0
int main1 ()
{  
  float A[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
  float B[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
  float C[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
  float D[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
  float E[4] = {0,1,2,480};
  float s;

  int i, j;

  for (i = 0; i < N; i++)
    {
      A[i] = i;
      B[i] = i;
      C[i] = i;
      D[i] = i;
    }

  /* Outer-loop 1: Vectorizable with respect to dependence distance. */
  for (i = 0; i < N-20; i++)
    {
      s = 0;
      for (j=0; j<N; j+=4)
        s += C[j];
      A[i] = A[i+20] + s;
    }

  /* check results:  */
  for (i = 0; i < N-20; i++)
    {
      s = 0;
      for (j=0; j<N; j+=4)
        s += C[j];
      if (A[i] != D[i+20] + s)
        abort ();
    }

  /* Outer-loop 2: Not vectorizable because of dependence distance. */
  for (i = 0; i < 4; i++)
    {
      s = 0;
      for (j=0; j<N; j+=4)
	s += C[j];
      B[i+3] = B[i] + s;
    }

  /* check results:  */
  for (i = 0; i < 4; i++)
    {
      if (B[i] != E[i])
	abort ();
    }

  return 0;
}
コード例 #6
0
int main (void)
{
  int i;
  float b[N+1] __attribute__ ((__aligned__(16))) = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57,60};
  float c[N] __attribute__ ((__aligned__(16))) = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};

  check_vect ();

  main1 (b,c);
  main1 (&b[1],c);

  return 0;
}
コード例 #7
0
ファイル: no-vfa-vect-53.c プロジェクト: 0day-ci/gcc
int main (void)
{
  int i;
  float a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
  float b[N+1] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57,60};
  float c[N+1] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))) = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};

  check_vect ();

  main1 (N,&b[1],c);
  main1 (N,&b[1],&c[1]);

  return 0;
}
コード例 #8
0
inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset)
{
    static constexpr UInt8 __attribute__((__aligned__(16))) masks[] =
    {
        0,  1,  2,  1,  4,  1,  4,  2,  8,  7,  6,  5,  4,  3,  2,  1, /* offset = 0, not used as mask, but for shift amount instead */
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* offset = 1 */
        0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,
        0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,
        0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,
        0,  1,  2,  3,  4,  0,  1,  2,  3,  4,  0,  1,  2,  3,  4,  0,
        0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,
        0,  1,  2,  3,  4,  5,  6,  0,  1,  2,  3,  4,  5,  6,  0,  1,
        0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  0,  1,  2,  3,  4,  5,  6,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  1,  2,  3,  4,  5,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  1,  2,  3,  4,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,  0,  1,  2,  3,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,  0,  1,  2,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,  0,  1,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,  0,
    };

    unalignedStore(op,
        vtbl2_u8(unalignedLoad<uint8x8x2_t>(match), unalignedLoad<uint8x8_t>(masks + 16 * offset)));

    unalignedStore(op + 8,
        vtbl2_u8(unalignedLoad<uint8x8x2_t>(match), unalignedLoad<uint8x8_t>(masks + 16 * offset + 8)));

    match += masks[offset];
}
コード例 #9
0
inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset)
{
#ifdef __SSSE3__

    static constexpr UInt8 __attribute__((__aligned__(16))) masks[] =
    {
        0,  1,  2,  1,  4,  1,  4,  2,  8,  7,  6,  5,  4,  3,  2,  1, /* offset = 0, not used as mask, but for shift amount instead */
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, /* offset = 1 */
        0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,
        0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,
        0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,
        0,  1,  2,  3,  4,  0,  1,  2,  3,  4,  0,  1,  2,  3,  4,  0,
        0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,
        0,  1,  2,  3,  4,  5,  6,  0,  1,  2,  3,  4,  5,  6,  0,  1,
        0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  0,  1,  2,  3,  4,  5,  6,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  1,  2,  3,  4,  5,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  1,  2,  3,  4,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,  0,  1,  2,  3,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,  0,  1,  2,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,  0,  1,
        0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,  0,
    };

    _mm_storeu_si128(reinterpret_cast<__m128i *>(op),
        _mm_shuffle_epi8(
            _mm_loadu_si128(reinterpret_cast<const __m128i *>(match)),
            _mm_load_si128(reinterpret_cast<const __m128i *>(masks) + offset)));

    match += masks[offset];

#else
    copyOverlap16(op, match, offset);
#endif
}
コード例 #10
0
/** We use 'xmm' (128bit SSE) registers here to shuffle 16 bytes.
  *
  * It is possible to use 'mm' (64bit MMX) registers to shuffle just 8 bytes as we need.
  *
  * There is corresponding version of 'pshufb' instruction that operates on 'mm' registers,
  *  (it operates on MMX registers although it is available in SSSE3)
  *  and compiler library has the corresponding intrinsic: '_mm_shuffle_pi8'.
  *
  * It can be done like this:
  *
  *  unalignedStore(op, _mm_shuffle_pi8(
  *      unalignedLoad<__m64>(match),
  *      unalignedLoad<__m64>(masks + 8 * offset)));
  *
  * This is perfectly correct and this code have the same or even better performance.
  *
  * But if we write code this way, it will lead to
  *  extremely weird and extremely non obvious
  *  effects in completely unrelated parts of code.
  *
  * Because using MMX registers alters the mode of operation of x87 FPU,
  *  and then operations with FPU become broken.
  *
  * Example 1.
  * Compile this code without optimizations:
  *
    #include <vector>
    #include <unordered_set>
    #include <iostream>
    #include <tmmintrin.h>

    int main(int, char **)
    {
        [[maybe_unused]] __m64 shuffled = _mm_shuffle_pi8(__m64{}, __m64{});

        std::vector<int> vec;
        std::unordered_set<int> set(vec.begin(), vec.end());

        std::cerr << set.size() << "\n";
        return 0;
    }

    $ g++ -g -O0 -mssse3 -std=c++17 mmx_bug1.cpp && ./a.out
    terminate called after throwing an instance of 'std::bad_alloc'
    what():  std::bad_alloc

    Also reproduced with clang. But only with libstdc++, not with libc++.

  * Example 2.

    #include <math.h>
    #include <iostream>
    #include <tmmintrin.h>

    int main(int, char **)
    {
        double max_fill = 1;

        std::cerr << (long double)max_fill << "\n";
        [[maybe_unused]] __m64 shuffled = _mm_shuffle_pi8(__m64{}, __m64{});
        std::cerr << (long double)max_fill << "\n";

        return 0;
    }

    $ g++ -g -O0 -mssse3 -std=c++17 mmx_bug2.cpp && ./a.out
    1
    -nan

  * Explanation:
  *
  * https://stackoverflow.com/questions/33692969/assembler-mmx-errors
  * https://software.intel.com/en-us/node/524274
  *
  * Actually it's possible to use 'emms' instruction after decompression routine.
  * But it's more easy to just use 'xmm' registers and avoid using 'mm' registers.
  */
inline void copyOverlap8Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset)
{
#ifdef __SSSE3__

    static constexpr UInt8 __attribute__((__aligned__(8))) masks[] =
    {
        0, 1, 2, 2, 4, 3, 2, 1, /* offset = 0, not used as mask, but for shift amount instead */
        0, 0, 0, 0, 0, 0, 0, 0, /* offset = 1 */
        0, 1, 0, 1, 0, 1, 0, 1,
        0, 1, 2, 0, 1, 2, 0, 1,
        0, 1, 2, 3, 0, 1, 2, 3,
        0, 1, 2, 3, 4, 0, 1, 2,
        0, 1, 2, 3, 4, 5, 0, 1,
        0, 1, 2, 3, 4, 5, 6, 0,
        0, 0, 0, 0, 0, 0, 0, 0, /* this row is not used: padding to allow read 16 bytes starting at previous row */
    };

    _mm_storeu_si128(reinterpret_cast<__m128i *>(op),
        _mm_shuffle_epi8(
            _mm_loadu_si128(reinterpret_cast<const __m128i *>(match)),
            _mm_loadu_si128(reinterpret_cast<const __m128i *>(masks + 8 * offset))));

    match += masks[offset];

#else
    copyOverlap8(op, match, offset);
#endif
}
コード例 #11
0
ファイル: sifbios.c プロジェクト: ryo/netbsd-src
void
sifrpc_register_service(struct sifrpc_server_system *queue,
                        struct sifrpc_server *server, sifrpc_id_t rpc_id,
                        void *(*service_func)(sifrpc_callno_t, void *, size_t), void *service_arg,
                        void *(*cancel_func)(sifrpc_callno_t, void *, size_t), void *cancel_arg)
{
    struct {
        void *server;
        sifrpc_id_t rpc_id;
        sifrpc_rpcfunc_t service_func;
        void *service_arg;
        sifrpc_rpcfunc_t cancel_func;
        void *cancel_arg;
        void *receive_queue;
    } __attribute__((__packed__, __aligned__(4))) sifbios_arg = {
server:
        server,
rpc_id:
        rpc_id,
service_func:
        service_func,
service_arg:
        service_arg,
cancel_func:
        cancel_func,
cancel_arg:
        cancel_arg,
receive_queue:
        queue,
    };

    CALL(void, 55, &sifbios_arg);
}
コード例 #12
0
ファイル: sifbios.c プロジェクト: lacombar/netbsd-alc
int
sifrpc_call(struct sifrpc_client *_cookie, sifrpc_callno_t call_no,
    u_int32_t rpc_mode, void *sendbuf, size_t sendbuf_sz, void *recvbuf,
    size_t recvbuf_sz, void (*end_func)(void *), void *end_arg)
{
	struct {
		struct sifrpc_client *_cookie;	/* binded client cookie */
		sifrpc_callno_t call_no; /* passed to service function arg. */
		u_int32_t rpc_mode;
		void *sendbuf;
		size_t sendbuf_sz;
		void *recvbuf;
		size_t recvbuf_sz;
		sifrpc_endfunc_t end_func;
		void *end_arg;
	} __attribute__((__packed__, __aligned__(4))) sifbios_arg = {
		_cookie:	_cookie,
		call_no:	call_no,
		rpc_mode:	rpc_mode,
		sendbuf:	sendbuf,
		sendbuf_sz:	sendbuf_sz,
		recvbuf:	recvbuf,
		recvbuf_sz:	recvbuf_sz,
		end_func:	end_func,
		end_arg:	end_arg,
	};

	return CALL(int, 52, &sifbios_arg);
}
コード例 #13
0
ファイル: aesni-intel_glue.c プロジェクト: EMFPGA/linux_media
static int helper_rfc4106_decrypt(struct aead_request *req)
{
	__be32 counter = cpu_to_be32(1);
	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
	struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
	void *aes_ctx = &(ctx->aes_key_expanded);
	u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
	unsigned int i;

	if (unlikely(req->assoclen != 16 && req->assoclen != 20))
		return -EINVAL;

	/* Assuming we are supporting rfc4106 64-bit extended */
	/* sequence numbers We need to have the AAD length */
	/* equal to 16 or 20 bytes */

	/* IV below built */
	for (i = 0; i < 4; i++)
		*(iv+i) = ctx->nonce[i];
	for (i = 0; i < 8; i++)
		*(iv+4+i) = req->iv[i];
	*((__be32 *)(iv+12)) = counter;

	return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
			      aes_ctx);
}
コード例 #14
0
ファイル: sd.c プロジェクト: Pikrass/pios
// For now, start and len are a number of 512-byte blocks
int sd_read(struct sd_card *card, int start, int len, void *dest) {
	int bl_addr;
	struct dma_cb ctrl __attribute__ ((__aligned__(32)));

	if(card->type == 0)
		start *= 512;

	dmb();
	*BLKSIZECNT = BLKSIZE(512) | BLKCNT(len);

	sd_send_command(CMD_READ_MULTIPLE_BLOCK,
			TM_BLKCNT_EN | TM_AUTO_CMD_12 | TM_DAT_CARD_TO_HOST |
			TM_MULTI_BLOCK | CMD_RSPNS_48 | CMD_ISDATA, start);

	ctrl.ti = DMA_TI_INTEN | DMA_TI_WAIT_RESP |
		DMA_TI_DEST_INC | DMA_TI_DEST_WIDTH |
		DMA_TI_SRC_DREQ | DMA_TI_PERMAP_EMMC;
	ctrl.source_ad = IO_TO_BUS(DATA);
	ctrl.dest_ad = virt_to_phy(dest);
	ctrl.txfr_len = 512 * len;
	ctrl.stride = 0;
	ctrl.nextconbk = 0;

	return dma_initiate(DMA_CHAN_EMMC, &ctrl);
}
コード例 #15
0
ファイル: sifbios.c プロジェクト: ryo/netbsd-src
sifdma_id_t
sifcmd_queue(sifcmd_sw_t sw, vaddr_t cmd_pkt_addr, size_t cmd_pkt_sz,
             vaddr_t src_addr, vaddr_t dst_addr, vsize_t buf_sz)
{
    struct {
        sifcmd_sw_t sw;
        vaddr_t cmd_pkt_addr;	/* command buffer */
        size_t cmd_pkt_sz;
        vaddr_t src_addr;	/* data buffer */
        vaddr_t dst_addr;
        vsize_t buf_sz;
    } __attribute__((__packed__, __aligned__(4))) sifbios_arg = {
sw:
        sw,
cmd_pkt_addr:
        cmd_pkt_addr,
cmd_pkt_sz:
        cmd_pkt_sz,
src_addr:
        src_addr,
dst_addr:
        dst_addr,
buf_sz:
        buf_sz,
    };

    return CALL(sifdma_id_t, 34, &sifbios_arg);
}
コード例 #16
0
ファイル: sifbios.c プロジェクト: ryo/netbsd-src
void
sifcmd_establish(sifcmd_sw_t sw, struct sifcmd_callback_holder *holder)
{
    struct {
        sifcmd_sw_t sw;
        sifcmd_callback_t func;
        void *arg;
    } __attribute__((__packed__, __aligned__(4))) sifbios_arg = {
sw:
        sw,
func:
        holder->func,
arg:
        holder->arg,
    };

    CALL(void, 36, &sifbios_arg);
}

void
sifcmd_disestablish(sifcmd_sw_t sw)
{
    u_int32_t sifbios_arg = sw;

    CALL(void, 37, &sifbios_arg);
}
コード例 #17
0
ファイル: main.c プロジェクト: ddddavidee/PHC
int main(){

	size_t saltlen = 16;
	size_t outlen  = 32;
	unsigned int t_cost = (unsigned int)pow(2,13);
	unsigned int m_cost = (unsigned int)pow(2,15);
	int i;

	char *passwd="password";
	uint8_t res[outlen] 
	__attribute__((__aligned__(__alignof__(uint32_t))));
	
	srand(time(NULL));
	rand();
	uint32_t salt[saltlen >> 2];
	for (i=0;i<(saltlen >> 2);i++)
		salt[i] = rand();

	clock_t start = -clock();

	PHS((void *)res,outlen,(void *)passwd,strlen(passwd),(void *)salt,saltlen,t_cost,m_cost);

	start += clock();
	
	float sec = (float)start/CLOCKS_PER_SEC;
	printf("%.3f secs,%.3f passwords\n",sec,(float)(1/sec));
	
	return 0;
}
コード例 #18
0
ファイル: sifbios.c プロジェクト: ryo/netbsd-src
int
sifrpc_receive_buffer(struct sifrpc_receive *_cookie, void *src_iop,
                      void *dst_ee, size_t sz, u_int32_t rpc_mode, void (*end_func)(void *),
                      void *end_arg)
{
    struct {
        void *_cookie;
        void *src_iop;
        void *dst_ee;
        size_t sz;
        u_int32_t rpc_mode;
        sifrpc_endfunc_t end_func;
        void *end_arg;
    } __attribute__((__packed__, __aligned__(4))) sifbios_arg = {
_cookie:
        _cookie,
src_iop:
        src_iop,
dst_ee:
        dst_ee,
sz:
        sz,
rpc_mode:
        rpc_mode,
end_func:
        end_func,
end_arg:
        end_arg,
    };

    return CALL(int, 50, &sifbios_arg);
}
コード例 #19
0
float ADMDolbyContext::DolbyShift_convolutionAlignSSE(float *oldie, float *coef)
{
     float *src1=oldie;         // Aligned also
     float *src2=coef;          // that one is always aligned to a 16 bytes boundary
    int mod16=(1+NZEROS)>>2;
    int left=(1+NZEROS)&3;
    static float __attribute__ ((__aligned__ (16))) sum16[4];
    
    float sum = 0;
     __asm__(
                        "xorps          %%xmm2,%%xmm2     \n" // carry
                        "1: \n"
                        "movaps         (%0),%%xmm0  \n" // src1
                        "movaps         (%1),%%xmm1  \n" // src2
                        "mulps          %%xmm1,%%xmm0 \n" // src1*src2
                        "addps          %%xmm0,%%xmm2 \n" // sum+=src1*src2
                        "add           $16,%0      \n"
                        "add           $16,%1      \n"
                        "sub           $1,%3      \n"
                        "jnz             1b        \n"
                        "movaps        %%xmm2,(%2)        \n"

                : : "r" (src1),"r" (src2),"r"(sum16),"r"(mod16)
                );
   
    
	for (int i = 0; i <left; i++)
		sum += (*src1++)*(*src2++);
        for(int i=0;i<4;i++)
            sum+=sum16[i];
	return sum;
}
コード例 #20
0
int main(int argc, char* argv[]) {
    const int mb = 1024 * 1024;
    pid_t child_pid, wpid;
    int status = 0;

    for (int num_of_processes = 1; num_of_processes <= 16; num_of_processes *= 2) {
        for (int num_of_tries = 0; num_of_tries < 10; num_of_tries++) {
            printf("========START: num_of_processes : %d try: %d========\n", num_of_processes, num_of_tries);
            int curr_child_process = -1;
            for (int i = 0; i < num_of_processes; i++) {
                curr_child_process++;

                if ((child_pid = fork()) == 0) {
                    int fd;
                    int total_bytes_read = 0;
                    int bytes_read = 0;
                    static char block[4096] __attribute__ ((__aligned__ (4096)));
                    const int block_size = 4096;
                    const int size = 8 * mb;
                    struct timespec start, end, time_diff;

                    char filename[80];
                    snprintf(filename, sizeof (filename), "./random8M_%d", curr_child_process);
                    printf("start reading child pid: %d, filename: %s\n", child_pid, filename);

                    if ((fd = open(filename, O_RDONLY | O_DIRECT)) == -1) {
                        perror("Error: read error");
                        exit(1);
                    }

                    if (lseek(fd, 0, SEEK_SET) == -1) {
                        perror("Error: lseek()");
                        exit(1);
                    }

                    clock_gettime(CLOCK_MONOTONIC, &start);

                    while (total_bytes_read < size) {
                        if ((bytes_read = read(fd, block, block_size)) == -1) {
                            perror("Error: read()");
                            exit(1);
                        }
                        total_bytes_read += bytes_read;
                    }

                    clock_gettime(CLOCK_MONOTONIC, &end);
                    timespec_subtract(&start, &end, &time_diff);
                    printf("INSTANT diff: %ld sec %lld ns, start time: %ld sec %lld ns, end time: %ld sec %lld ns\n", time_diff.tv_sec, (uint64_t)time_diff.tv_nsec, start.tv_sec, (uint64_t)start.tv_nsec, end.tv_sec, (uint64_t)end.tv_nsec);
                    close(fd);

                    exit(1);
                }
            }

            while ((wpid = wait(&status)) > 0) {
                printf("end reading child_pid: %d, status: %d\n", (int)wpid, status);
            }
            printf("========END: num_of_processes : %d try: %d========\n\n", num_of_processes, num_of_tries);
        }
    }
コード例 #21
0
ファイル: sifbios.c プロジェクト: ryo/netbsd-src
int
sifrpc_bind(struct sifrpc_client *_cookie, sifrpc_id_t rpc_id,
            u_int32_t rpc_mode, void (*end_func)(void *), void *end_arg)
{
    struct {
        void *_cookie;		/* filled by this call */
        sifrpc_id_t rpc_id;	/* specify server RPC id */
        u_int32_t rpc_mode;
        sifrpc_endfunc_t end_func;
        void *end_arg;
    } __attribute__((__packed__, __aligned__(4))) sifbios_arg = {
_cookie:
        _cookie,
rpc_id:
        rpc_id,
rpc_mode:
        rpc_mode,
end_func:
        end_func,
end_arg:
        end_arg,
    };

    return CALL(int, 51, &sifbios_arg);
}
コード例 #22
0
int main()
{
	warmUp();
	uint64_t start,end;
	static char buffer[FOUR_KB] __attribute__ ((__aligned__ (FOUR_KB)));
	string prefix = "/mnt/nfs/import/";
	string files[] = {"file1", "file2", "file3", "file4", "file5", "file6", "file7", "file8", "file9"};
	double results[lessIter], sum;
	for (int k=0;k<9;k++)
	{
		for(int i=0;i<lessIter;++i)
		{
			sum = 0;
			for (int j=0;j<lessInner;j++)
			{
				int fd = open((prefix + files[k]).c_str(), O_RDONLY | O_DIRECT);
				if (fd <= 0)
				{
					cout << "open failed\n";
				}
				int n;
				getStartTick(start);
				int tot = 0;
				while ( true )
				{
					n=read(fd, &buffer, FOUR_KB);
					if (n<0)
						cout << "Read error\n";
					tot += n;
					if (tot >= READLIMIT)
						break;
				}
				getEndTick(end);
//				cout << "total:" << tot << " k: " << k << "\n";
				close (fd);
				sum += end - start;	
			}
			sum /= lessInner;
			results[i] = sum;
		}
		string fileName = files[k] + "SequentialRemoteCycles.txt";
		string fileTimeName = files[k] + "SequentialRemoteTime.txt";
		writeToFile(results, fileName);
		getTimeFromTicks(results, lessIter);
		writeToFile(results, fileTimeName);
		pair<double, double> meanAndVariance = getMeanAndVariance(results, lessIter);
		cout << "File: " << files[k] << "\n";
		cout << "File read mean= " << (meanAndVariance.first * FOUR_KB / READLIMIT) << "\n";
		cout << "File read variance= " << (meanAndVariance.second * FOUR_KB / READLIMIT) << "\n";
		
		ofstream myfile;
		myfile.open ( (files[k] + "SequentialRemoteResults.txt").c_str());
  		myfile << "File: " << files[k] << "\n";
		myfile << "File read mean= " << (meanAndVariance.first * FOUR_KB / READLIMIT) << "\n";
		myfile << "File read variance= " << (meanAndVariance.second * FOUR_KB / READLIMIT) << "\n";
		myfile.close();
		
	}
	return 0;
}
コード例 #23
0
ファイル: vect-43.c プロジェクト: 5432935/crossbridge
int
main1 (float *pa)
{
  int i;
  float pb[N] __attribute__ ((__aligned__(16))) = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57};
  float pc[N] __attribute__ ((__aligned__(16))) = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};

 /* Not vectorizable: pa may alias pb and/or pc, since their addresses escape.  */
  for (i = 0; i < N; i++)
    {
      pa[i] = pb[i] * pc[i];
    }

  bar (pa,pb,pc);

  return 0;
}
コード例 #24
0
ファイル: vect-61.c プロジェクト: 5432935/crossbridge
int
main1 (int n , float *pa)
{
  int i;
  float b[N] __attribute__ ((__aligned__(16))) = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,48,51,54,57};
  float c[N] __attribute__ ((__aligned__(16))) = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19};
  float *pb = b;
  float *pc = c;

  for (i = 0; i < n/2; i++)
    {
      pa[i] = pb[i+1] * pc[i+1];
    }

  bar (pa,pb,pc);

  return 0;
}
コード例 #25
0
ファイル: sifbios.c プロジェクト: ryo/netbsd-src
int
sifbios_rpc_call(int callno, void *arg, int *result)
{
    volatile int done = 0;
    int retry;
    struct {
        int result;
        void *arg;
        void (*callback)(void *, int);
        volatile void *callback_arg;
    } __attribute__((__packed__, __aligned__(4))) sifbios_arg = {
arg:
        arg,
callback:
        sifbios_rpc_callback,
callback_arg:
        (volatile void *)&done,
    };

    /* call SIF BIOS */
    retry = 100;
    while (CALL(int, callno, &sifbios_arg) != 0 && --retry > 0)
        delay(20000);	/* .02 sec. for slow IOP */

    if (retry == 0) {
        printf("SIF BIOS call %d failed\n", callno);
        goto error;
    }

    /* wait IOP response (1 sec.) */
    _sif_call_start();
    retry = 10000;
    while (!done && --retry > 0)
        delay(100);
    _sif_call_end();

    if (retry == 0) {
        printf("IOP not respond (callno = %d)\n", callno);
        goto error;
    }

    *result = sifbios_arg.result;

    return (0);

error:
    return (-1);
}

void
sifbios_rpc_callback(void *arg, int result)
{
    int *done = (int *)arg;

    *done = 1;
}
コード例 #26
0
int main (void)
{
  int i;
  float a[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));

  check_vect ();
  main1 (a);

  return 0;
}
コード例 #27
0
ファイル: ex2part1.c プロジェクト: guysalama/OS
int main(int argc, char** argv){
	int i, j, ws, fd, o_direct;
	if (argc != 4){
		printf(ARGS_ERROR);
		return -1;
	}
	if (typeCheck(argv[1]) == -1){
		return -1;
	}
	if (!(strcmp(argv[2], "1") == 0 || strcmp(argv[2], "0") == 0)){
		printf(ARG_ERROR, argv[2]);
		return -1;
	}
	ws = atoi(argv[3]);
	if (!ws){ // ws == 0
		printf(ARG_ERROR, argv[3]);
		return -1;
	}
	static char buf[MB] __attribute__((__aligned__(4096)));
	for (j = 0; j < MB; j++) buf[i] = 'a' + (random() % 26);


	struct timeval t1, t2; //referance: http://stackoverflow.com/questions/2150291/how-do-i-measure-a-time-interval-in-c, first answer
	double elapsedTime;
	gettimeofday(&t1, NULL); // start timer

	o_direct = atoi(argv[2]);
	if (o_direct) fd = open(argv[1], O_WRONLY | O_DIRECT, S_IRWXU | S_IRWXG | S_IRWXO);
	else  fd = open(argv[1], O_WRONLY, S_IRWXU | S_IRWXG | S_IRWXO);
	if (fd == -1){
		printf(OPEN_ERROR, argv[2], strerror(errno));
		return -1;
	}
	int repeats = (128 * MB) / (ws * KB);
	for (i = 0; i < repeats; i++){
		int offset = (random() % repeats) * ws;
		if (lseek(fd, offset, SEEK_SET) == (off_t)-1){
			printf(SEEK_ERROR, argv[1], strerror(errno));
			close(fd);
			return -1;
		}
		if (write(fd, buf, ws) == -1){
			printf(WRITE_ERROR, argv[1], strerror(errno));
			close(fd);
			return -1;
		}
	}
	close(fd);

	gettimeofday(&t2, NULL);
	elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0;      // sec to ms
	elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0;   // us to ms
	printf(THROUGHPUT, elapsedTime);
	return 0;
}
コード例 #28
0
__attribute__ ((noinline)) int
main1 (float *pa)
{
  int i;
  float b[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
  float c[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
  float *pb = b;
  float *pc = c;

  foo (pb, pc);

  for (i = 0; i < N/2; i++)
    {
      pa[i] = pb[i+1] * pc[i+1];
    }

  bar (pa, pb, pc);

  return 0;
}
コード例 #29
0
ファイル: vect-61.c プロジェクト: 5432935/crossbridge
int main (void)
{
  int i;
  int n=N;
  float a[N] __attribute__ ((__aligned__(16)));

  check_vect ();
  main1 (n,a);

  return 0;
}
コード例 #30
0
ファイル: sifbios.c プロジェクト: lacombar/netbsd-alc
/* queue DMA request to SIFBIOS. returns queue identifier. */
sifdma_id_t
sifdma_queue(struct sifdma_transfer *arg, int n)
{
	struct {
		void *arg;	/* pointer to sifdma_transfer array */
		int n;		/* # of elements */
	} __attribute__((__packed__, __aligned__(4))) sifbios_arg = {
		arg:	arg,
		n:	n
	};

	return CALL(sifdma_id_t, 18, &sifbios_arg);
}