/// Process the characters we receive as the user presses keys.
static void process_input(bool continuous_mode) {
    bool first_char_seen = false;
    double prev_tstamp = 0.0;
    std::vector<wchar_t> bind_chars;

    std::fwprintf(stderr, L"Press a key\n\n");
    while (keep_running) {
        char_event_t evt{0};
        if (reader_test_and_clear_interrupted()) {
            evt = char_event_t{shell_modes.c_cc[VINTR]};
        } else {
            evt = input_common_readch_timed(true);
        }
        if (!evt.is_char()) {
            output_bind_command(bind_chars);
            if (first_char_seen && !continuous_mode) {
                return;
            }
            continue;
        }

        wchar_t wc = evt.get_char();
        prev_tstamp = output_elapsed_time(prev_tstamp, first_char_seen);
        add_char_to_bind_command(wc, bind_chars);
        output_info_about_char(wc);
        if (output_matching_key_name(wc)) {
            output_bind_command(bind_chars);
        }

        if (should_exit(wc)) {
            std::fwprintf(stderr, L"\nExiting at your request.\n");
            break;
        }

        first_char_seen = true;
    }
}
/// Process the characters we receive as the user presses keys.
void process_input(bool continuous_mode) {
    bool first_char_seen = false;
    while (true) {
        wchar_t wc = input_common_readch(first_char_seen && !continuous_mode);
        if (wc == WEOF) {
            return;
        }
        if (wc > 255) {
            printf("\nUnexpected wide character from input_common_readch(): %lld / 0x%llx\n",
                   (long long)wc, (long long)wc);
            return;
        }

        long long int curr_tstamp, delta_tstamp;
        timeval char_tstamp;
        gettimeofday(&char_tstamp, NULL);
        curr_tstamp = char_tstamp.tv_sec * 1000000 + char_tstamp.tv_usec;
        delta_tstamp = curr_tstamp - prev_tstamp;
        if (delta_tstamp >= 1000000) delta_tstamp = 999999;
        if (delta_tstamp >= 200000 && continuous_mode) {
            printf("\n");
            printf("Type 'exit' or 'quit' to terminate this program.\n");
            printf("\n");
        }
        prev_tstamp = curr_tstamp;

        unsigned char c = wc;
        if (c < 32) {
            // Control characters.
            if (ctrl_equivalents[c]) {
                printf("%6lld usec  dec: %3u  hex: %2x  char: %s (aka \\c%c)\n", delta_tstamp, c, c,
                       ctrl_equivalents[c], c + 64);
            } else {
                printf("%6lld usec  dec: %3u  hex: %2x  char: \\c%c\n", delta_tstamp, c, c, c + 64);
            }
        } else if (c == 32) {
            // The space character.
            printf("%6lld usec  dec: %3u  hex: %2x  char: <space>\n", delta_tstamp, c, c);
        } else if (c == 127) {
            // The "del" character.
            printf("%6lld usec  dec: %3u  hex: %2x  char: \\x7f (aka del)\n", delta_tstamp, c, c);
        } else if (c >= 128) {
            // Non-ASCII characters (i.e., those with bit 7 set).
            printf("%6lld usec  dec: %3u  hex: %2x  char: non-ASCII\n", delta_tstamp, c, c);
        } else {
            // ASCII characters that are not control characters.
            printf("%6lld usec  dec: %3u  hex: %2x  char: %c\n", delta_tstamp, c, c, c);
        }

        char *const name = key_name(c);
        if (name) {
            printf("FYI: Saw sequence for bind key name \"%s\"\n", name);
            free(name);
        }

        if (should_exit(c)) {
            printf("\nExiting at your request.\n");
            break;
        }

        first_char_seen = true;
    }
}
static int one_round_run(int round_no)
{
	int ret = 0, fd = -1, j;
	unsigned long i, chunk_no = 0;
	struct write_unit wu;

	MPI_Request request;
	MPI_Status  status;

	/*
	 * Root rank creates working file in chunks.
	 */
	if (!rank) {
		rank_printf("Prepare file of %lu bytes\n", file_size);

		open_rw_flags |= O_DIRECT;
		open_ro_flags |= O_DIRECT;

		ret = prep_orig_file_in_chunks(workfile, file_size);
		should_exit(ret);
	}

	MPI_Barrier_Sync();

	if (!rank) {
		fd = open_file(workfile, open_rw_flags);
		should_exit(fd);
	} else {

		/*
		 * Verification at the very beginning doesn't do anything more
		 * than reading the file into pagecache on none-root nodes.
		 */
		open_rw_flags &= ~O_DIRECT;
		open_ro_flags &= ~O_DIRECT;

		ret = verify_file(1, NULL, remote_wus, workfile, file_size);
		should_exit(fd);
	}

	MPI_Barrier_Sync();

	/*
	 * Root ranks write chunks at random serially.
	 */
	for (i = 0; i < num_chunks; i++) {
		
		MPI_Barrier_Sync();
		/*
		 * Root rank generates random write unit, then sends it to
		 * rest of ranks in-memoery after O_DIRECT write into file.
		 */
		if (!rank) {

			chunk_no = get_rand_ul(0, num_chunks - 1);
			prep_rand_dest_write_unit(&wu, chunk_no);
			rank_printf("Write #%lu chunk with char(%c)\n",
				    chunk_no, wu.wu_char);
			ret = do_write_chunk(fd, wu);
			should_exit(ret);
			
			memcpy(&remote_wus[wu.wu_chunk_no], &wu, sizeof(wu));

			for (j = 1; j < size; j++) {
				if (verbose)
					rank_printf("Send write unit #%lu chunk "
						    "char(%c) to rank %d\n",
						     wu.wu_chunk_no,
						     wu.wu_char, j);
				ret = MPI_Isend(&wu, sizeof(wu), MPI_BYTE, j,
						1, MPI_COMM_WORLD, &request);
				if (ret != MPI_SUCCESS)
					abort_printf("MPI_Isend failed: %d\n",
						     ret);
				MPI_Wait(&request, &status);

                        }
		} else {

			MPI_Irecv(&wu, sizeof(wu), MPI_BYTE, 0, 1,
				  MPI_COMM_WORLD, &request);
			MPI_Wait(&request, &status);

			if (verbose)
				rank_printf("Receive write unit #%lu chunk "
					    "char(%c)\n", wu.wu_chunk_no, wu.wu_char);

			if (wu.wu_timestamp >=
				remote_wus[wu.wu_chunk_no].wu_timestamp)
				memcpy(&remote_wus[wu.wu_chunk_no],
				       &wu, sizeof(wu));
		}

		MPI_Barrier_Sync();

		if (rank) {

			/*
			 * All none-root ranks need to verify if O_DIRECT writes
			 * from remote root node can be seen locally.
			 */
			rank_printf("Try to verify whole file in chunks.\n");

			ret = verify_file(1, NULL, remote_wus, workfile, file_size);
			should_exit(ret);
		}
	}

	MPI_Barrier_Sync();

	if (!rank)
		if (fd > 0)
			close(fd);

	return ret;
}
static int comp_test(void)
{

	int ret;
	char dest[PATH_MAX];

	unsigned long i;

	root_printf("Test %d: Multi-nodes comprehensive test.\n", testno++);

	snprintf(orig_path, PATH_MAX, "%s/multi_original_comp_refile",
		 workplace);
	snprintf(dest, PATH_MAX, "%s_target", orig_path);

	if (!rank) {
		ret = prep_orig_file(orig_path, file_size, 1);
		should_exit(ret);
		ret = do_reflinks(orig_path, orig_path, ref_counts, 0);
		should_exit(ret);
		ret = reflink(orig_path, dest, 1);
		should_exit(ret);
	}

	MPI_Barrier_Sync();

	if (rank == 1) {
		/*also doing reflinks and unlinks*/
		printf("  *Test Rank %d: Doing reflinks,cows and unlink.\n",
		       rank);
		ret = do_reflinks(dest, dest, ref_counts, 0);
		should_exit(ret);
		ret = do_cows_on_write(dest, ref_counts, file_size, HUNK_SIZE);
		should_exit(ret);
		ret = do_unlinks(dest, ref_counts);
		should_exit(ret);
	}

	if (rank % 6 == 2) {
		/*Write former reflinks to cause cow*/
		printf("  *Test Rank %d: Doing cows.\n", rank);
		ret = do_cows_on_write(orig_path, ref_counts, file_size,
				       HUNK_SIZE);
		should_exit(ret);
	}

	if (rank % 6 == 3) {
		/*Read former reflinks*/
		printf("  *Test Rank %d: Doing reads.\n", rank);
		ret = do_reads_on_reflinks(orig_path, ref_counts, file_size,
					   HUNK_SIZE);
		should_exit(ret);
	}

	if (rank % 6 == 4) {
		/*Append to former reflinks*/
		printf("  *Test Rank %d: Doing appends.\n", rank);
		ret = do_appends(orig_path, ref_counts);
		should_exit(ret);
	}

	if (rank % 6 == 5) {
		/*Truncate former reflinks*/
		printf("  *Test Rank %d: Doing truncates.\n", rank);
		ret = do_cows_on_ftruncate(orig_path, ref_counts, file_size);
		should_exit(ret);
	}

	if (!rank) {

		printf("  *Test Rank %d: Doing verifications.\n", rank);
		for (i = 0; i < size; i++) {
			ret = verify_orig_file(orig_path);
			should_exit(ret);
			sleep(1);
		}
	}

	MPI_Barrier_Sync();

	if (!rank) {
		printf("  *Test Rank %d: Doing unlinks.\n", rank);
		ret = verify_orig_file(orig_path);
		should_exit(ret);
		ret = do_unlinks(orig_path, ref_counts);
		should_exit(ret);
		ret = do_unlink(orig_path);
		should_exit(ret);
		ret = do_unlink(dest);
		should_exit(ret);
	}

	MPI_Barrier_Sync();

	return 0;
}
static int directio_test(void)
{
	int ret, fd;
	char dest[PATH_MAX];
	int sub_testno = 1;
	int o_flags_rw, o_flags_ro;

	unsigned long write_size = 0, read_size = 0;
	unsigned long append_size = 0, truncate_size = 0;
	unsigned long interval, offset = 0;

	unsigned long align_slice = 512;
	unsigned long align_filesz = align_slice;

	o_flags_rw = open_rw_flags;
	o_flags_ro = open_ro_flags;

	open_rw_flags |= O_DIRECT;
	open_ro_flags |= O_DIRECT;

	while (align_filesz < file_size)
		align_filesz += align_slice;

	root_printf("Test %d: Multi-nodes O_DIRECT test.\n", testno++);

	snprintf(orig_path, PATH_MAX, "%s/multi_original_directio_refile",
		 workplace);

	root_printf("  *SubTest %d:Prepare original inode %s.\n",
		    sub_testno++, orig_path);

	if (!rank) {

		ret = prep_orig_file_dio(orig_path, align_filesz);
		should_exit(ret);
	}

	MPI_Barrier_Sync();

	/*
	* All ranks try to reflink the original to increment the
	* refcount concurrently.
	*/
	root_printf("  *SubTest %d:Reflinking inode %s among nodes.\n",
		    sub_testno++, orig_path);

	if (rank) {

		snprintf(dest, PATH_MAX, "%s-%s-%d", orig_path, hostname, rank);
		ret = reflink(orig_path, dest, 1);
		should_exit(ret);
	}

	MPI_Barrier_Sync();

	/*
	* All ranks try to do cow to decrement the
	* refcount concurrently.
	*/

	root_printf("  *SubTest %d:Cowing reflinks by O_DIRECT writes among"
		    " nodes.\n", sub_testno++);
	if (rank) {

		snprintf(dest, PATH_MAX, "%s-%s-%d", orig_path, hostname, rank);
		interval = DIRECTIO_SLICE;
		offset = 0;

		while (offset < align_filesz) {

			write_size = DIRECTIO_SLICE;

			if (offset + write_size > align_filesz)
				write_size = align_filesz - offset;

			get_rand_buf(dio_buf, write_size);

			ret = write_at_file(dest, dio_buf, write_size, offset);

			should_exit(ret);

			offset += write_size + interval;
		}
	}

	MPI_Barrier_Sync();

	/*
	* All ranks try to read reflinks concurrently
	*/
	root_printf("  *SubTest %d:O_DIRECT reading reflinks among nodes.\n",
		    sub_testno++);

	if (rank) {

		snprintf(dest, PATH_MAX, "%s-%s-%d", orig_path, hostname, rank);
		interval = DIRECTIO_SLICE;
		offset = 0;

		while (offset < align_filesz) {

			read_size = DIRECTIO_SLICE;

			if (offset + read_size > align_filesz)
				read_size = align_filesz - offset;

			ret = read_at_file(dest, dio_buf, read_size, offset);

			should_exit(ret);

			offset = offset + read_size + interval;
		}

	}

	MPI_Barrier_Sync();

	/*
	* All ranks try to append reflinks concurrently
	*/

       root_printf("  *SubTest %d:Appending reflinks among nodes.\n",
		   sub_testno++);

	if (rank) {

		snprintf(dest, PATH_MAX, "%s-%s-%d", orig_path, hostname, rank);
		fd = open64(dest, open_rw_flags | O_APPEND);
		if (fd < 0) {
			fd = errno;
			abort_printf("open file %s failed:%d:%s\n",
				     dest, fd, strerror(fd));
		}

		append_size = DIRECTIO_SLICE;

		get_rand_buf(dio_buf, append_size);

		ret = write(fd, dio_buf, append_size);
		if (ret < 0) {
			ret = errno;
			abort_printf("write file %s failed:%d:%s\n",
				     dest, ret, strerror(ret));
		}

		close(fd);
	}

	MPI_Barrier_Sync();

	/*
	* All ranks try to truncate reflinks concurrently
	*/

	root_printf("  *SubTest %d:Truncating reflinks among nodes.\n",
		    sub_testno++);

	if (rank) {

		snprintf(dest, PATH_MAX, "%s-%s-%d", orig_path, hostname, rank);

		truncate_size = get_rand(0, align_filesz / DIRECTIO_SLICE) *
					DIRECTIO_SLICE;

		ret = truncate(dest, truncate_size);

		if (ret < 0) {
			ret = errno;
			abort_printf("truncate file %s failed:%d:%s\n",
				     dest, ret, strerror(ret));
		}
	}

	MPI_Barrier_Sync();

	if (rank) {

		ret = do_unlink(dest);
		should_exit(ret);

	} else {

		ret = do_unlink(orig_path);
		should_exit(ret);
	}

	open_rw_flags = o_flags_rw;
	open_ro_flags = o_flags_ro;

	MPI_Barrier_Sync();

	return 0;
}
static int basic_test(void)
{
	int ret = 0, fd;
	char dest[PATH_MAX];
	int sub_testno = 1;

	char *write_buf = NULL, *read_buf = NULL;

	unsigned long write_size = 0, read_size = 0;
	unsigned long append_size = 0, truncate_size = 0;
	unsigned long interval, offset = 0;

	write_buf = (char *)malloc(HUNK_SIZE * 2);
	read_buf = (char *)malloc(HUNK_SIZE * 2);

	root_printf("Test %d: Multi-nodes basic refcount test.\n", testno++);

	snprintf(orig_path, PATH_MAX, "%s/multi_original_basic_refile",
		 workplace);

	root_printf("  *SubTest %d:Prepare original inode %s.\n",
		    sub_testno++, orig_path);

	if (!rank) {

		ret = prep_orig_file(orig_path, file_size, 1);
		if (ret)
			goto bail_free;
	}

	MPI_Barrier_Sync();

	/*
	* All ranks try to reflink the original to increment the
	* refcount concurrently.
	*/
	root_printf("  *SubTest %d:Reflinking inode %s among nodes.\n",
		    sub_testno++, orig_path);

	if (rank) {

		snprintf(dest, PATH_MAX, "%s-%s-%d", orig_path, hostname, rank);
		ret = reflink(orig_path, dest, 1);
		if (ret)
			goto bail_free;
	}

	MPI_Barrier_Sync();

	if (!rank) {
		ret = verify_orig_file(orig_path);
		if (ret)
			goto bail_free;
	}

	MPI_Barrier_Sync();

	/*
	* All ranks try to do cow to decrement the
	* refcount concurrently.
	*/

	root_printf("  *SubTest %d:Cowing reflinks among nodes.\n",
		    sub_testno++);

	if (rank) {

		snprintf(dest, PATH_MAX, "%s-%s-%d", orig_path, hostname, rank);
		interval = file_size / 100;
		offset = 0;
		while (offset < file_size) {
			if (test_flags & RAND_TEST)
				write_size = get_rand(1, M_SIZE * 2);
			else
				write_size = 1;

			if (offset + write_size > file_size)
				write_size = file_size - offset;

			get_rand_buf(write_buf, write_size);
			if (test_flags & MMAP_TEST)
				ret = mmap_write_at_file(dest, write_buf,
							 write_size, offset);
			else
				ret = write_at_file(dest, write_buf, write_size,
						    offset);

			if (ret)
				goto bail_free;

			if (test_flags & RAND_TEST)
				offset += write_size + get_rand(1, interval);
			else
				offset += write_size + interval;
		}
	}

	MPI_Barrier_Sync();

	if (!rank) {
		ret = verify_orig_file(orig_path);
		if (ret)
			goto bail_free;
	}

	MPI_Barrier_Sync();

	/*
	* All ranks try to read reflinks concurrently
	*/
	root_printf("  *SubTest %d:Reading reflinks among nodes.\n",
		    sub_testno++);

	if (rank) {

		snprintf(dest, PATH_MAX, "%s-%s-%d", orig_path, hostname, rank);
		interval = file_size / 100;
		offset = 0;
		while (offset < file_size) {
			if (test_flags & RAND_TEST)
				read_size = get_rand(1, M_SIZE * 2);
			else
				read_size = 1;

			if (offset + read_size > file_size)
				read_size = file_size - offset;

			if (test_flags & MMAP_TEST)
				ret = mmap_read_at_file(dest, read_buf,
							read_size, offset);
			else
				ret = read_at_file(dest, read_buf, read_size,
						   offset);

			if (ret)
				goto bail_free;

			if (test_flags & RAND_TEST)
				offset = offset + read_size +
					 get_rand(1, interval);
			else
				offset = offset + read_size + interval;
		}

	}

	MPI_Barrier_Sync();

	if (!rank) {
		ret = verify_orig_file(orig_path);
		if (ret)
			goto bail_free;
	}

	MPI_Barrier_Sync();

	if (test_flags & MMAP_TEST)
		goto bail;

	/*
	* All ranks try to append reflinks concurrently
	*/

       root_printf("  *SubTest %d:Appending reflinks among nodes.\n",
		   sub_testno++);

	if (rank) {
		snprintf(dest, PATH_MAX, "%s-%s-%d", orig_path, hostname, rank);
		fd = open64(dest, open_rw_flags | O_APPEND);
		if (fd < 0) {
			if (write_buf)
				free(write_buf);

			if (read_buf)
				free(read_buf);
			fd = errno;
			abort_printf("open file %s failed:%d:%s\n",
				     dest, fd, strerror(fd));
		}

		if (test_flags & RAND_TEST)
			append_size = get_rand(1, HUNK_SIZE);
		else
			append_size = HUNK_SIZE;

		get_rand_buf(write_buf, append_size);

		ret = write(fd, write_buf, append_size);
		if (ret < 0) {
			if (write_buf)
				free(write_buf);

			if (read_buf)
				free(read_buf);
			ret = errno;
			abort_printf("write file %s failed:%d:%s\n",
				     dest, ret, strerror(ret));
		}

		close(fd);
	}

	MPI_Barrier_Sync();

	if (!rank) {
		ret = verify_orig_file(orig_path);
		if (ret)
			goto bail_free;
	}

	MPI_Barrier_Sync();

	/*
	* All ranks try to truncate reflinks concurrently
	*/

	root_printf("  *SubTest %d:Truncating reflinks among nodes.\n",
		    sub_testno++);

	if (rank) {

		snprintf(dest, PATH_MAX, "%s-%s-%d", orig_path, hostname, rank);

		if (test_flags & RAND_TEST)
			truncate_size = get_rand(0, file_size);
		else
			truncate_size = file_size / (rank + 1);

		ret = truncate(dest, truncate_size);

		if (ret < 0) {
			if (write_buf)
				free(write_buf);

			if (read_buf)
				free(read_buf);
			ret = errno;
			abort_printf("truncate file %s failed:%d:%s\n",
				     dest, ret, strerror(ret));
		}
	}

	MPI_Barrier_Sync();

	if (!rank) {
		ret = verify_orig_file(orig_path);
		if (ret)
			goto bail_free;
	}

	MPI_Barrier_Sync();

bail:
	if (rank) {

		ret = do_unlink(dest);
		if (ret)
			goto bail_free;

	} else {

		ret = do_unlink(orig_path);
		if (ret)
			goto bail_free;
	}

	MPI_Barrier_Sync();

bail_free:

	if (write_buf)
		free(write_buf);

	if (read_buf)
		free(read_buf);
	
	should_exit(ret);

	return ret;
}
static int dest_test(void)
{
	int ret;
	char dest[PATH_MAX];
	int sub_testno = 1;

	/*
	* Muti-nodes to reflink the original from a node forever,
	* Then manually or automatically crash the target node
	* where reflinking is performing, we next check the validation
	* of original file and restore its correct state by fsck.
	* For target nodes, the reflinked inode should not exist
	* in orphan directory anyway!
	*/

	root_printf("Test %d: Multi-nodes destructive refcount test.\n",
		    testno++);

	snprintf(orig_path, PATH_MAX, "%s/multi_original_basic_refile",
		 workplace);

	/*
	* Note that we use somewhat large files with very separated extents
	* to let the reflinking process relatively slow. which therefore make
	* it easy for us to interrupt.
	*/
	if (!rank) {
		ret = prep_orig_file(orig_path, file_size, 0);
		should_exit(ret);
	}


	MPI_Barrier_Sync();

	if (rank) {

		while (1) {
			snprintf(dest, PATH_MAX, "%s-%s-%d", orig_path,
				 hostname, rank);
			printf("  *SubTest %d:Reflinking to inode %s on %s.\n",
			       sub_testno++, dest, hostname);
			ret = reflink(orig_path, dest, 1);
			should_exit(ret);
			sleep(5);

			printf("  *SubTest %d:Unlinking inode %s from %s.\n",
			       sub_testno++, dest, hostname);
			ret = do_unlink(dest);
			should_exit(ret);
		}
	}

	MPI_Barrier_Sync();

	if (!rank) {

		ret = verify_orig_file(orig_path);
		should_exit(ret);
		ret = do_unlink(orig_path);
		should_exit(ret);
	}

	return 0;
}
static int stress_test(void)
{
	unsigned long i, j;
	int ret = 0, sub_testno = 1;
	char *write_buf = NULL, dest[PATH_MAX];
	char tmp_dest[PATH_MAX], tmp_orig[PATH_MAX];
	char *pattern_buf = NULL, *verify_buf = NULL;

	unsigned long offset = 0, write_size = 0, interval = 0;
	unsigned long verify_size = 0, verify_offset = 0;

	write_buf = (char *)malloc(HUNK_SIZE * 2);
	pattern_buf = (char *)malloc(HUNK_SIZE * 2);
	verify_buf = (char *)malloc(HUNK_SIZE * 2);

	root_printf("Test %d: Multi-nodes stress refcount test.\n", testno++);

	root_printf("  *SubTest %d: Stress test with tremendous refcount "
		    "trees.\n", sub_testno++);

	for (i = 0; i < ref_trees; i++) {

		snprintf(orig_path, PATH_MAX, "%s/multi_original_stress_"
			 "refile_rank%d_%ld", workplace, rank, i);
		snprintf(dest, PATH_MAX, "%s_target", orig_path);
		ret = prep_orig_file(orig_path, 32 * 1024, 1);
		if (ret)
			goto bail;
		ret = reflink(orig_path, dest, 1);
		if (ret)
			goto bail;
	}

	for (i = 0; i < ref_trees; i++) {

		snprintf(orig_path, PATH_MAX, "%s/multi_original_stress_refile_"
			 "rank%d_%ld", workplace, rank, i);
		snprintf(dest, PATH_MAX, "%s_target", orig_path);
		offset = get_rand(0, 32 * 1024 - 1);
		write_size = 1;
		get_rand_buf(write_buf, write_size);
		ret = write_at_file(dest, write_buf, write_size, offset);
		if (ret)
			goto bail;
	}

	for (i = 0; i < ref_trees; i++) {
		snprintf(orig_path, PATH_MAX, "%s/multi_original_stress_refile_"
			 "rank%d_%ld", workplace, rank, i);
		snprintf(dest, PATH_MAX, "%s_target", orig_path);
		ret = do_unlink(orig_path);
		if (ret)
			goto bail;
		ret = do_unlink(dest);
		if (ret)
			goto bail;
	}

	MPI_Barrier_Sync();

	root_printf("  *SubTest %d: Stress test with tremendous shared inodes "
		    "on one refcount tree.\n", sub_testno++);

	snprintf(orig_path, PATH_MAX, "%s/multi_original_stress_refile",
		 workplace);
	snprintf(dest, PATH_MAX, "%s_target", orig_path);

	if (!rank) {

		ret = prep_orig_file(orig_path, 10 * HUNK_SIZE, 1);
		if (ret)
			goto bail;

		for (i = 1; i < size; i++) {
			snprintf(ref_path, PATH_MAX, "%s_%ld", dest, i);
			ret = reflink(orig_path, ref_path, 1);
			if (ret)
				goto bail;
		}
	}

	MPI_Barrier_Sync();

	if (rank) {

		snprintf(ref_path, PATH_MAX, "%s_%d", dest, rank);
		ret = do_reflinks(ref_path, ref_path, ref_counts, 1);
		if (ret)
			goto bail;

		for (i = 0; i < ref_counts; i++) {

			if (get_rand(0, 1))
				continue;

			snprintf(ref_path, PATH_MAX, "%s_%dr%ld", dest, rank,
				 i);

			offset = get_rand(0, 10 * HUNK_SIZE - 1);

			write_size = get_rand(1, HUNK_SIZE);
			if (offset + write_size > 10 * HUNK_SIZE)
				write_size = 10 * HUNK_SIZE - offset;

			get_rand_buf(write_buf, write_size);
			ret = write_at_file(ref_path, write_buf, write_size,
					    offset);
			if (ret)
				goto bail;
		}

	}

	MPI_Barrier_Sync();

	if (rank) {

		for (i = 0; i < ref_counts; i++) {

			if (get_rand(0, 1))
				continue;

			snprintf(ref_path, PATH_MAX, "%s_%dr%ld", orig_path,
				 rank, i);

			truncate(ref_path, 0);
		}
	}

	MPI_Barrier_Sync();

	if (rank) {

		snprintf(ref_path, PATH_MAX, "%s_%d", dest, rank);
		ret = do_unlinks(ref_path, ref_counts);
		if (ret)
			goto bail;

	} else {

		for (i = 1; i < size; i++) {

			snprintf(ref_path, PATH_MAX, "%s_%ld", dest, i);
			ret = do_unlink(ref_path);
			if (ret)
				goto bail;
		}

		ret = do_unlink(orig_path);
		if (ret)
			goto bail;
	}

	MPI_Barrier_Sync();

	root_printf("  *SubTest %d: Stress test with HUGEFILE reflinked.\n",
		    sub_testno++);
	snprintf(orig_path, PATH_MAX, "%s/multi_original_stress_huge_refile",
		 workplace);
	snprintf(dest, PATH_MAX, "%s_target", orig_path);
	strcpy(tmp_dest, dest);
	strcpy(tmp_orig, orig_path);

	if (!rank) {
		ret = prep_orig_file(orig_path, file_size, 1);
		if (ret)
			goto bail;
		ret = reflink(orig_path, dest, 1);
		if (ret)
			goto bail;
	}

	MPI_Barrier_Sync();

	if (rank) {

		offset = 0;
		interval = file_size / 1000;
		i = 0;
		while (offset < file_size) {

			snprintf(dest, PATH_MAX, "%s_%d_%ld", tmp_dest,
				 rank, i);
			ret = reflink(orig_path, dest, 1);
			if (ret)
				goto bail;

			write_size = get_rand(1, M_SIZE);
			get_rand_buf(write_buf, write_size);

			verify_size = get_rand(M_SIZE, 2 * M_SIZE);

			if (offset < (verify_size - write_size) / 2)
				verify_offset = 0;
			else
				verify_offset = offset -
						(verify_size - write_size) / 2;

			if (verify_offset + verify_size > file_size)
				verify_size = file_size - verify_offset;

			ret = read_at_file(orig_path, pattern_buf, verify_size,
					   verify_offset);
			if (ret)
				goto bail;

			ret = write_at_file(dest, write_buf, write_size,
					    offset);
			if (ret)
				goto bail;

			ret = read_at_file(orig_path, verify_buf, verify_size,
					   verify_offset);
			if (ret)
				goto bail;

			if (memcmp(pattern_buf, verify_buf, verify_size)) {
				abort_printf("Verify original file date failed"
					     " after writting to snapshot!\n");
				ret = -1;
				goto bail;
			}

			offset = offset + write_size + interval;

			strcpy(orig_path, dest);
			i++;
		}

		for (j = 0; j < i; j++) {

			snprintf(dest, PATH_MAX, "%s_%d_%ld", tmp_dest,
				 rank, j);

			ret = do_unlink(dest);
			if (ret)
				goto bail;
		}

		strcpy(dest, tmp_dest);
		offset = 0;
		interval = file_size / 1000;

		while (offset < file_size) {

			write_size = get_rand(1, M_SIZE * 2);
			get_rand_buf(write_buf, write_size);
			ret = write_at_file(dest, write_buf,
					    write_size, offset);
			if (ret)
				goto bail;

			offset = offset + write_size + interval;
		}

	}

	MPI_Barrier_Sync();

	if (!rank) {

		ret = do_unlink(dest);
		if (ret)
			goto bail;

		ret = do_unlink(orig_path);
		if (ret)
			goto bail;
	}

bail:
	if (write_buf)
		free(write_buf);

	if (pattern_buf)
		free(pattern_buf);

	if (verify_buf)
		free(verify_buf);

	should_exit(ret);

	return ret;
}
static int xattr_basic_test(int ea_name_size, int ea_value_size)
{
	int ret, fd = -1;
	int sub_testno = 1;
	char dest[PATH_MAX];

	MPI_Request request;
	MPI_Status  status;

	unsigned long i, j;

	xattr_name_sz = ea_name_size;
	xattr_value_sz = ea_value_size;


	snprintf(orig_path, PATH_MAX, "%s/multi_original_xattr_refile",
		 workplace);
	snprintf(dest, PATH_MAX, "%s_target", orig_path);

	root_printf("  *SubTest %d: Prep original inode.\n", sub_testno++);

	if (!rank) {

		ret = prep_orig_file(orig_path, file_size, 1);

		should_exit(ret);
	}

	MPI_Barrier_Sync();

	root_printf("  *SubTest %d: Prep %ld xattr name list among nodes.\n",
		    sub_testno++, xattr_nums);

	for (i = 0; i < xattr_nums; i++) {

		memset(xattr_name, 0, xattr_name_sz + 1);
		memset(xattr_value, 0, xattr_value_sz);
		memset(xattr_value_get, 0, xattr_value_sz);

		if (!rank) {

			xattr_name_generator(i, USER, xattr_name_sz,
					     xattr_name_sz);
			strcpy(xattr_name_list_set[i], xattr_name);

			for (j = 1; j < size; j++) {

				ret = MPI_Isend(xattr_name, xattr_name_sz + 1,
						MPI_BYTE, j, 1, MPI_COMM_WORLD,
						&request);
				if (ret != MPI_SUCCESS)
					abort_printf("MPI_Isend failed: %d\n",
						     ret);

				MPI_Wait(&request, &status);
			}

		} else {

			ret = MPI_Irecv(xattr_name, xattr_name_sz + 1, MPI_BYTE,
					0, 1, MPI_COMM_WORLD, &request);
			if (ret != MPI_SUCCESS)
				abort_printf("MPI_Irecv failed: %d\n", ret);

			MPI_Wait(&request, &status);
			strcpy(xattr_name_list_set[i], xattr_name);

		}

	}

	MPI_Barrier_Sync();

	root_printf("  *SubTest %d: Prep original inode with %ld EAs.\n",
		    sub_testno++, xattr_nums);

	if (!rank) {

		fd = open64(orig_path, open_rw_flags);

		for (i = 0; i < xattr_nums; i++) {

			strcpy(xattr_name, xattr_name_list_set[i]);
			xattr_value_constructor(i);
			ret = add_or_update_ea(NORMAL, fd, XATTR_CREATE, "add");
			should_exit(ret);
			ret = read_ea(NORMAL, fd);
			should_exit(ret);
			ret = xattr_value_validator(i);
			should_exit(ret);

		}

		ret = do_reflinks(orig_path, orig_path, ref_counts, 0);
		should_exit(ret);
		ret = reflink(orig_path, dest, 1);
		should_exit(ret);

	}

	MPI_Barrier_Sync();

	if (rank % 6 == 1) {
		/*also doing reflinks and unlinks*/
		printf("  *SubTest Rank %d: Do reflinks and cows on %ld EAs.\n",
		       rank, xattr_nums);
		ret = do_reflinks(dest, dest, ref_counts, 0);
		should_exit(ret);
		ret = do_xattr_cows(dest, ref_counts, xattr_nums);
		should_exit(ret);
		ret = do_unlinks(dest, ref_counts);
		should_exit(ret);

	}

	if (rank % 6 == 2) {

		printf("  *SubTest Rank %d: Do cows on %ld EAs.\n", rank,
		       xattr_nums);
		ret = do_xattr_cows(orig_path, ref_counts, xattr_nums);
		should_exit(ret);
	}

	if (rank % 6 == 3) {

		printf("  *SubTest Rank %d: Do data&ea cows on %ld EAs.\n",
		       rank, xattr_nums);
		ret = do_xattr_data_cows(orig_path, ref_counts, xattr_nums);
		should_exit(ret);
	}

	if (rank % 6 == 4) {

		printf("  *SubTest Rank %d: Do reads on %ld EAs.\n", rank,
		       xattr_nums);
		xattr_value_sz = XATTR_VALUE_MAX_SZ;
		ret = do_xattr_reads(orig_path, ref_counts, xattr_nums);
		should_exit(ret);
	}

	if (rank % 6 == 5) {

		printf("  *SubTest Rank %d: Do lists on %ld EAs.\n", rank,
		       xattr_nums);
		if (list_sz < XATTR_LIST_MAX_SZ) {
			ret = do_xattr_lists(orig_path, ref_counts);
			should_exit(ret);
		}
	}

	MPI_Barrier_Sync();

	if (!rank) {

		printf("  *SubTest Rank %d: Do EA removal.\n", rank);

		ret = do_xattr_removes(orig_path, ref_counts, xattr_nums);
		should_exit(ret);

	}

	MPI_Barrier_Sync();

	if (!rank) {

		close(fd);
		ret = do_unlinks(orig_path, ref_counts);
		should_exit(ret);

		ret = do_unlink(dest);
		should_exit(ret);

		ret = do_unlink(orig_path);
		should_exit(ret);
	}

	return 0;
}
Beispiel #10
0
static void *am_log_worker(void *arg) {
    struct am_log *log = AM_LOG();
    int i, level, is_audit;
    unsigned int index;
    char *data;
    size_t data_sz;
    unsigned long instance_id;
    struct stat st;
    struct log_files *f;

    if (log == NULL) {
        return NULL;
    }

#ifdef _WIN32
    WaitForSingleObject(am_log_lck.lock, INFINITE);
#else
    pthread_mutex_lock(&log->lock);
#endif

    for (;;) {
        index = log->out;

#ifdef _WIN32
        while (log->read_count == 0 || !log->bucket[index].ready_to_read) {
            ReleaseMutex(am_log_lck.lock);
            if (WaitForSingleObject(am_log_lck.new_data_cond, 1000) == WAIT_TIMEOUT) {
                if (WaitForSingleObject(am_log_lck.exit, 0) == WAIT_OBJECT_0) {
                    return NULL;
                }
            }
            WaitForSingleObject(am_log_lck.lock, INFINITE);
        }
#else
        while (log->read_count == 0 || !log->bucket[index].ready_to_read) {
            struct timeval now = {0, 0};
            struct timespec ts = {0, 0};
            gettimeofday(&now, NULL);
            ts.tv_sec = now.tv_sec + 1;
            ts.tv_nsec = now.tv_usec * 1000;
            if (pthread_cond_timedwait(&log->new_data_cond, &log->lock, &ts) == ETIMEDOUT) {
                if (should_exit(&log->exit)) {
                    pthread_mutex_unlock(&log->lock);
                    return NULL;
                }
            }
        }
#endif  /* _WIN32 */

        log->bucket[index].ready_to_read = AM_FALSE;
#ifdef _WIN32
        ReleaseMutex(am_log_lck.lock);
#else
        pthread_mutex_unlock(&log->lock);
#endif  /* _WIN32 */

        data = log->bucket[index].data;
        data_sz = log->bucket[index].size;
        level = log->bucket[index].level;
        is_audit = (level & AM_LOG_LEVEL_AUDIT) != 0;
        instance_id = log->bucket[index].instance_id;

        f = NULL;

        for (i = 0; i < AM_MAX_INSTANCES; i++) {
            f = &log->files[i];
            if (f->used && f->instance_id == instance_id) {
                break;
            }
        }

        if (f != NULL) {
            
            if (ISINVALID(f->name_debug)) {
                fprintf(stderr, "am_log_worker(): the debug file name is invalid (i.e. empty or null)\n");
                f->fd_debug = -1;
                f->fd_audit = -1;
                return NULL;
            }
            
            if (ISINVALID(f->name_audit)) {
                fprintf(stderr, "am_log_worker(): the audit file name is invalid (i.e. empty or null)\n");
                f->fd_debug = -1;
                f->fd_audit = -1;
                return NULL;
            }
            
            /* log files are not opened yet, do it now */
            if (f->fd_audit == -1 && f->fd_debug == -1) {
#ifdef _WIN32
                f->fd_debug = _open(f->name_debug, _O_CREAT | _O_WRONLY | _O_APPEND | _O_BINARY,
                        _S_IREAD | _S_IWRITE);
                f->fd_audit = _open(f->name_audit, _O_CREAT | _O_WRONLY | _O_APPEND | _O_BINARY,
                        _S_IREAD | _S_IWRITE);
                if (f->fd_debug != -1 && stat(f->name_debug, &st) == 0) {
                    f->created_debug = st.st_ctime;
                    f->owner = getpid();
                }
                if (f->fd_audit != -1 && stat(f->name_audit, &st) == 0) {
                    f->created_audit = st.st_ctime;
                    f->owner = getpid();
                }
#else
                f->fd_debug = open(f->name_debug, O_CREAT | O_WRONLY | O_APPEND, S_IWUSR | S_IRUSR);
                f->fd_audit = open(f->name_audit, O_CREAT | O_WRONLY | O_APPEND, S_IWUSR | S_IRUSR);
                if (f->fd_debug != -1 && stat(f->name_debug, &st) == 0) {
                    f->node_debug = st.st_ino;
                    f->created_debug = st.st_ctime;
                    f->owner = getpid();
                }
                if (f->fd_audit != -1 && stat(f->name_audit, &st) == 0) {
                    f->node_audit = st.st_ino;
                    f->created_audit = st.st_ctime;
                    f->owner = getpid();
                }
#endif
            }

            if (f->fd_debug == -1) {
                fprintf(stderr, "am_log_worker() failed to open log file %s: error: %d", f->name_debug, errno);
                f->fd_debug = f->fd_audit = -1;
            } else if (f->fd_audit == -1) {
                fprintf(stderr, "am_log_worker() failed to open audit file %s: error: %d", f->name_audit, errno);
                f->fd_debug = f->fd_audit = -1;
            } else {
                int file_handle = is_audit ? f->fd_audit : f->fd_debug;
                char *file_name = is_audit ? f->name_audit : f->name_debug;
                int max_size = is_audit ? f->max_size_audit : f->max_size_debug;
                time_t file_created = is_audit ? f->created_audit : f->created_debug;
#ifdef _WIN32
                int wrote;
#else 
                ssize_t wrote;
                ino_t file_inode = is_audit ? f->node_audit : f->node_debug;
#endif
                wrote = write(file_handle, data, (unsigned int) data_sz);
#ifdef _WIN32
                wrote = write(file_handle, "\r\n", 2);
                _commit(file_handle);

                /* check file timestamp; rotate by date if set so */
                if (max_size == -1 && should_rotate_time(file_created)) {
                    HANDLE fh = (HANDLE) _get_osfhandle(file_handle);
                    unsigned int idx = 1;
                    static char tmp[AM_PATH_SIZE];
                    do {
                        snprintf(tmp, sizeof (tmp), "%s.%d", file_name, idx);
                        idx++;
                    } while (_access(tmp, 0) == 0);
                    if (CopyFileA(file_name, tmp, FALSE)) {
                        SetFilePointer(fh, 0, NULL, FILE_BEGIN);
                        SetEndOfFile(fh);
                        if (is_audit) {
                            f->created_audit = time(NULL);
                        } else {
                            f->created_debug = time(NULL);
                        }
                    } else {
                        fprintf(stderr, "could not rotate log file %s (error: %d)\n",
                                file_name, GetLastError());
                    }
                }

                /* check file size; rotate by size if set so */
                if (max_size > 0) {
                    BY_HANDLE_FILE_INFORMATION info;
                    uint64_t fsz = 0;
                    HANDLE fh = (HANDLE) _get_osfhandle(file_handle);
                    if (GetFileInformationByHandle(fh, &info)) {
                        fsz = ((DWORDLONG) (((DWORD) (info.nFileSizeLow)) |
                                (((DWORDLONG) ((DWORD) (info.nFileSizeHigh))) << 32)));
                    }
                    if ((fsz + 1024) > max_size) {
                        unsigned int idx = 1;
                        static char tmp[AM_PATH_SIZE];
                        do {
                            snprintf(tmp, sizeof (tmp), "%s.%d", file_name, idx);
                            idx++;
                        } while (_access(tmp, 0) == 0);
                        if (CopyFileA(file_name, tmp, FALSE)) {
                            SetFilePointer(fh, 0, NULL, FILE_BEGIN);
                            SetEndOfFile(fh);
                            if (is_audit) {
                                f->created_audit = time(NULL);
                            } else {
                                f->created_debug = time(NULL);
                            }
                        } else {
                            fprintf(stderr, "could not rotate log file %s (error: %d)\n",
                                    file_name, GetLastError());
                        }
                    }
                }
#else
                wrote = write(file_handle, "\n", 1);
                fsync(file_handle);

                /* check file timestamp; rotate by date if set so */
                if (max_size == -1 && should_rotate_time(file_created)) {
                    rename_file(file_name);
                }

                /* check file size; rotate by size if set so */
                if (max_size > 0 && stat(file_name, &st) == 0 && (st.st_size + 1024) > max_size) {
                    rename_file(file_name);
                }

                /* reset file inode number (in case it has changed as a result of rename_file) */
                if (stat(file_name, &st) != 0 || st.st_ino != file_inode) {
                    close(file_handle);
                    if (is_audit) {
                        f->fd_audit = open(f->name_audit, O_CREAT | O_WRONLY | O_APPEND, S_IWUSR | S_IRUSR);
                        f->node_audit = st.st_ino;
                        f->created_audit = st.st_ctime;
                        f->owner = getpid();
                    } else {
                        f->fd_debug = open(f->name_debug, O_CREAT | O_WRONLY | O_APPEND, S_IWUSR | S_IRUSR);
                        f->node_debug = st.st_ino;
                        f->created_debug = st.st_ctime;
                        f->owner = getpid();
                    }
                    if (f->fd_debug == -1 || f->fd_audit == -1) {
                        fprintf(stderr, "am_log_worker() log file re-open failed with error: %d", errno);
                        f->fd_debug = f->fd_audit = -1;
                    }
                }
#endif                            
            }
        }

        log->out = AM_LOG_BUFFER_MASK(log->out + 1, log->bucket_count);
#ifdef _WIN32
        WaitForSingleObject(am_log_lck.lock, INFINITE);
#else
        pthread_mutex_lock(&log->lock);
#endif
        log->read_count--;
#ifdef _WIN32
        SetEvent(am_log_lck.new_space_cond);
#else
        pthread_cond_broadcast(&log->new_space_cond);
#endif
    }
    return NULL;
}