bool example_test() {
  void *uc = NULL;

  int errors = 0;
  for (int channels = 1; channels <= 4; channels++) {
    errors += run_test(uc, channels, kCPU, kChunky);
    errors += run_test(uc, channels, kCPU, kPlanar);
    errors += run_test(uc, channels, kGLSL, kChunky);
    // GLSL+Planar is a silly combination; the conversion overhead is high.
    // But let's run it anyway, since it should work.
    errors += run_test(uc, channels, kGLSL, kPlanar);
  }

  // -------- Other stuff

  halide_print(uc, "Here is a random image.\n");

  Image<uint8_t> randomness(300, 400, 3);
  (void) halide_randomize_buffer_host<uint8_t>(uc, 0, 0, 255, randomness);
  halide_buffer_display(randomness);


  halide_print(uc, "Here is a smooth image.\n");

  Image<uint8_t> smoothness(300, 400, 3);
  (void) halide_smooth_buffer_host<uint8_t>(uc, 0, smoothness);
  halide_buffer_display(smoothness);

  return errors > 0;
}
예제 #2
0
파일: sim_remote.cpp 프로젝트: kgnk/Halide
int initialize_kernels(const unsigned char *code, int codeLen, bool use_dlopenbuf, handle_t *module_ptr) {
    void *lib = NULL;
    if (use_dlopenbuf) {
        if (!dlopenbuf) {
            log_printf("dlopenbuf not available.\n");
            return -1;
        }
        // We need a unique soname, or dlopenbuf will return a
        // previously opened library.
        static int unique_id = 0;
        char soname[256];
        sprintf(soname, "libhalide_kernels%04d.so", __sync_fetch_and_add(&unique_id, 1));

        // Open the library
        dllib_init();
        // We need to use RTLD_NOW, the libraries we build for Hexagon
        // offloading do not support lazy bindin.
        lib = dlopenbuf(soname, (const char*)code, codeLen, RTLD_LOCAL | RTLD_NOW);
        if (!lib) {
            halide_print(NULL, "dlopenbuf failed\n");
            halide_print(NULL, dlerror());
            return -1;
        }
    } else {
        lib = mmap_dlopen(code, codeLen);
        if (!lib) {
            halide_print(NULL, "mmap_dlopen failed\n");
            return -1;
        }
    }

    *module_ptr = reinterpret_cast<handle_t>(lib);
    return 0;
}
예제 #3
0
파일: cache.cpp 프로젝트: bleibig/Halide
WEAK void validate_cache() {
    print(NULL) << "validating cache, "
                << "current size " << current_cache_size
                << " of maximum " << max_cache_size << "\n";
    int entries_in_hash_table = 0;
    for (size_t i = 0; i < kHashTableSize; i++) {
        CacheEntry *entry = cache_entries[i];
        while (entry != NULL) {
            entries_in_hash_table++;
            if (entry->more_recent == NULL && entry != most_recently_used) {
                halide_print(NULL, "cache invalid case 1\n");
                __builtin_trap();
            }
            if (entry->less_recent == NULL && entry != least_recently_used) {
                halide_print(NULL, "cache invalid case 2\n");
                __builtin_trap();
            }
            entry = entry->next;
        }
    }
    int entries_from_mru = 0;
    CacheEntry *mru_chain = most_recently_used;
    while (mru_chain != NULL) {
        entries_from_mru++;
        mru_chain = mru_chain->less_recent;
    }
    int entries_from_lru = 0;
    CacheEntry *lru_chain = least_recently_used;
    while (lru_chain != NULL) {
        entries_from_lru++;
        lru_chain = lru_chain->more_recent;
    }
    print(NULL) << "hash entries " << entries_in_hash_table
                << ", mru entries " << entries_from_mru
                << ", lru entries " << entries_from_lru << "\n";
    if (entries_in_hash_table != entries_from_mru) {
        halide_print(NULL, "cache invalid case 3\n");
        __builtin_trap();
    }
    if (entries_in_hash_table != entries_from_lru) {
        halide_print(NULL, "cache invalid case 4\n");
        __builtin_trap();
    }
    if (current_cache_size < 0) {
        halide_print(NULL, "cache size is negative\n");
        __builtin_trap();
    }
}
예제 #4
0
WEAK int halide_printf(void *user_context, const char * fmt, ...) {
    char buffer[4096];
    va_list args;
    va_start(args,fmt);
    int ret = vsnprintf(buffer, sizeof(buffer), fmt, args);
    va_end(args);
    halide_print(user_context, buffer);
    return ret;
}
예제 #5
0
파일: profiler.cpp 프로젝트: DoDNet/Halide
WEAK void halide_profiler_report_unlocked(void *user_context, halide_profiler_state *s) {

    char line_buf[160];
    Printer<StringStreamPrinter, sizeof(line_buf)> sstr(user_context, line_buf);

    for (halide_profiler_pipeline_stats *p = s->pipelines; p;
         p = (halide_profiler_pipeline_stats *)(p->next)) {
        float t = p->time / 1000000.0f;
        if (!p->runs) continue;
        sstr.clear();
        sstr << p->name
             << "  total time: " << t << " ms"
             << "  samples: " << p->samples
             << "  runs: " << p->runs
             << "  time per run: " << t / p->runs << " ms\n";
        halide_print(user_context, sstr.str());
        if (p->time) {
            for (int i = 0; i < p->num_funcs; i++) {
                sstr.clear();
                halide_profiler_func_stats *fs = p->funcs + i;

                // The first func is always a catch-all overhead
                // slot. Only report overhead time if it's non-zero
                if (i == 0 && fs->time == 0) continue;

                sstr << "  " << fs->name << ": ";
                while (sstr.size() < 25) sstr << " ";

                float ft = fs->time / (p->runs * 1000000.0f);
                sstr << ft << "ms";
                while (sstr.size() < 40) sstr << " ";

                int percent = fs->time / (p->time / 100);
                sstr << "(" << percent << "%)\n";

                halide_print(user_context, sstr.str());
            }
        }
    }
}
예제 #6
0
WEAK void default_error_handler(void *user_context, const char *msg) {
    char buf[4096];
    char *dst = halide_string_to_string(buf, buf + 4095, "Error: ");
    dst = halide_string_to_string(dst, buf + 4095, msg);
    // We still have one character free. Add a newline if there
    // isn't one already.
    if (dst[-1] != '\n') {
        dst[0] = '\n';
        dst[1] = 0;
    }
    halide_print(user_context, buf);
    exit(1);
}
예제 #7
0
int initialize_kernels(const unsigned char *code, int codeLen,
                       handle_t *module_ptr) {
    elf_t *lib = obj_dlopen_mem(code, codeLen);
    if (!lib) {
        halide_print(NULL, "dlopen_mem failed\n");
        return -1;
    }

    // Initialize the runtime. The Hexagon runtime can't call any
    // system functions (because we can't link them), so we put all
    // the implementations that need to do so here, and pass poiners
    // to them in here.
    set_runtime_t set_runtime = (set_runtime_t)obj_dlsym(lib, "halide_noos_set_runtime");
    if (!set_runtime) {
        obj_dlclose(lib);
        halide_print(NULL, "halide_noos_set_runtime not found in shared object\n");
        return -1;
    }

    int result = set_runtime(halide_malloc,
                             halide_free,
                             halide_print,
                             halide_error,
                             halide_do_par_for,
                             halide_do_task,
                             halide_get_symbol,
                             halide_load_library,
                             halide_get_library_symbol);
    if (result != 0) {
        obj_dlclose(lib);
        halide_print(NULL, "set_runtime failed\n");
        return result;
    }
    *module_ptr = reinterpret_cast<handle_t>(lib);

    return 0;
}
예제 #8
0
WEAK void halide_default_error(void *user_context, const char *msg) {
    char buf[4096];
    char *dst = halide_string_to_string(buf, buf + 4094, "Error: ");
    dst = halide_string_to_string(dst, buf + 4094, msg);
    // We still have one character free. Add a newline if there
    // isn't one already.
    if (dst[-1] != '\n') {
        dst[0] = '\n';
        dst[1] = 0;
        dst += 1;
    }
    halide_msan_annotate_memory_is_initialized(user_context, buf, dst - buf + 1);
    halide_print(user_context, buf);
    Halide::Runtime::Internal::halide_abort();
}
예제 #9
0
// This checks if there are any log messages available on the remote
// side. It should be called after every remote call.
WEAK void poll_log(void *user_context) {
    if (!remote_poll_log) return;

    while (true) {
        char message[1024];
        int read = 0;
        int result = remote_poll_log(&message[0], sizeof(message), &read);
        if (result != 0) {
            // Don't make this an error, otherwise we might obscure
            // more information about errors that would come later.
            print(user_context) << "Hexagon: remote_poll_log failed " << result << "\n";
            return;
        }

        if (read > 0) {
            halide_print(user_context, message);
        } else {
            break;
        }
    }
}
예제 #10
0
파일: profiler.cpp 프로젝트: nsknojj/Halide
WEAK void halide_profiler_report_unlocked(void *user_context, halide_profiler_state *s) {

    char line_buf[1024];
    Printer<StringStreamPrinter, sizeof(line_buf)> sstr(user_context, line_buf);

    for (halide_profiler_pipeline_stats *p = s->pipelines; p;
         p = (halide_profiler_pipeline_stats *)(p->next)) {
        float t = p->time / 1000000.0f;
        if (!p->runs) continue;
        sstr.clear();
        int alloc_avg = 0;
        if (p->num_allocs != 0) {
            alloc_avg = p->memory_total/p->num_allocs;
        }
        sstr << p->name << "\n"
             << " total time: " << t << " ms"
             << "  samples: " << p->samples
             << "  runs: " << p->runs
             << "  time/run: " << t / p->runs << " ms\n"
             << " heap allocations: " << p->num_allocs
             << "  peak heap usage: " << p->memory_peak << " bytes\n";
        halide_print(user_context, sstr.str());

        bool print_f_states = p->time || p->memory_total;
        if (!print_f_states) {
            for (int i = 0; i < p->num_funcs; i++) {
                halide_profiler_func_stats *fs = p->funcs + i;
                if (fs->stack_peak) {
                    print_f_states = true;
                    break;
                }
            }
        }

        if (print_f_states) {
            for (int i = 0; i < p->num_funcs; i++) {
                sstr.clear();
                halide_profiler_func_stats *fs = p->funcs + i;

                // The first func is always a catch-all overhead
                // slot. Only report overhead time if it's non-zero
                if (i == 0 && fs->time == 0) continue;

                sstr << "  " << fs->name << ": ";
                while (sstr.size() < 25) sstr << " ";

                float ft = fs->time / (p->runs * 1000000.0f);
                sstr << ft << "ms";
                while (sstr.size() < 40) sstr << " ";

                int percent = 0;
                if (p->time != 0) {
                    percent = (100*fs->time) / p->time;
                }
                sstr << "(" << percent << "%)";
                while (sstr.size() < 50) sstr << " ";

                int alloc_avg = 0;
                if (fs->num_allocs != 0) {
                    alloc_avg = fs->memory_total/fs->num_allocs;
                }

                if (fs->memory_peak) {
                    sstr << " peak: " << fs->memory_peak;
                    while (sstr.size() < 65) sstr << " ";
                    sstr << " num: " << fs->num_allocs;
                    while (sstr.size() < 80) sstr << " ";
                    sstr << " avg: " << alloc_avg;
                }
                if (fs->stack_peak > 0) {
                    sstr << " stack: " << fs->stack_peak;
                }
                sstr << "\n";

                halide_print(user_context, sstr.str());
            }
        }
    }
}
예제 #11
0
파일: tracing.cpp 프로젝트: Iamquen/Halide
WEAK int32_t halide_trace(void *user_context, const halide_trace_event *e) {

    static int32_t ids = 1;

    if (halide_custom_trace) {
        return (*halide_custom_trace)(user_context, e);
    } else {

        int32_t my_id = __sync_fetch_and_add(&ids, 1);

        // If we're dumping to a file, use a binary format
        int fd = halide_get_trace_file(user_context);
        if (fd > 0) {
            // A 32-byte header. The first 6 bytes are metadata, then the rest is a zero-terminated string.
            uint8_t clamped_width = e->vector_width < 256 ? e->vector_width : 255;
            uint8_t clamped_dimensions = e->dimensions < 256 ? e->dimensions : 255;

            // Upgrade the bit count to a power of two, because that's
            // how it will be stored on the stack.
            int bytes = 1;
            while (bytes*8 < e->bits) bytes <<= 1;

            // Compute the size of each portion of the tracing packet
            size_t header_bytes = 32;
            size_t value_bytes = clamped_width * bytes;
            size_t int_arg_bytes = clamped_dimensions * sizeof(int32_t);
            size_t total_bytes = header_bytes + value_bytes + int_arg_bytes;
            uint8_t buffer[4096];
            halide_assert(user_context, total_bytes <= 4096 && "Tracing packet too large");

            ((int32_t *)buffer)[0] = my_id;
            ((int32_t *)buffer)[1] = e->parent_id;
            buffer[8] = e->event;
            buffer[9] = e->type_code;
            buffer[10] = e->bits;
            buffer[11] = clamped_width;
            buffer[12] = e->value_index;
            buffer[13] = clamped_dimensions;

            // Use up to 17 bytes for the function name
            int i = 14;
            for (; i < header_bytes-1; i++) {
                buffer[i] = e->func[i-14];
                if (buffer[i] == 0) break;
            }
            // Fill the rest with zeros
            for (; i < header_bytes; i++) {
                buffer[i] = 0;
            }

            // Next comes the value
            for (size_t i = 0; i < value_bytes; i++) {
                buffer[header_bytes + i] = ((uint8_t *)(e->value))[i];
            }

            // Then the int args
            for (size_t i = 0; i < int_arg_bytes; i++) {
                buffer[header_bytes + value_bytes + i] = ((uint8_t *)(e->coordinates))[i];
            }


            size_t written = write(fd, &buffer[0], total_bytes);
            halide_assert(user_context, written == total_bytes && "Can't write to trace file");

        } else {
            stringstream ss(user_context);

            // Round up bits to 8, 16, 32, or 64
            int print_bits = 8;
            while (print_bits < e->bits) print_bits <<= 1;
            halide_assert(user_context, print_bits <= 64 && "Tracing bad type");

            // Otherwise, use halide_printf and a plain-text format
            const char *event_types[] = {"Load",
                                         "Store",
                                         "Begin realization",
                                         "End realization",
                                         "Produce",
                                         "Update",
                                         "Consume",
                                         "End consume"};

            // Only print out the value on stores and loads.
            bool print_value = (e->event < 2);

            ss << event_types[e->event] << " " << e->func << "." << e->value_index << "[";
            if (e->vector_width > 1) {
                ss << "<";
            }
            for (int i = 0; i < e->dimensions; i++) {
                if (i > 0) {
                    if ((e->vector_width > 1) && (i % e->vector_width) == 0) {
                        ss << ">, <";
                    } else {
                        ss << ", ";
                    }
                }
                ss << e->coordinates[i];
            }
            if (e->vector_width > 1) {
                ss << ">]";
            } else {
                ss << "]";
            }

            if (print_value) {
                if (e->vector_width > 1) {
                    ss << " = <";
                } else {
                    ss << " = ";
                }
                for (int i = 0; i < e->vector_width; i++) {
                    if (i > 0) {
                        ss << ", ";
                    }
                    if (e->type_code == 0) {
                        if (print_bits == 8) {
                            ss << ((int8_t *)(e->value))[i];
                        } else if (print_bits == 16) {
                            ss << ((int16_t *)(e->value))[i];
                        } else if (print_bits == 32) {
                            ss << ((int32_t *)(e->value))[i];
                        } else {
                            ss << ((int64_t *)(e->value))[i];
                        }
                    } else if (e->type_code == 1) {
                        if (print_bits == 8) {
                            ss << ((uint8_t *)(e->value))[i];
                        } else if (print_bits == 16) {
                            ss << ((uint16_t *)(e->value))[i];
                        } else if (print_bits == 32) {
                            ss << ((uint32_t *)(e->value))[i];
                        } else {
                            ss << ((uint64_t *)(e->value))[i];
                        }
                    } else if (e->type_code == 2) {
                        halide_assert(user_context, print_bits >= 32 && "Tracing a bad type");
                        if (print_bits == 32) {
                            ss << ((float *)(e->value))[i];
                        } else {
                            ss << ((double *)(e->value))[i];
                        }
                    } else if (e->type_code == 3) {
                        ss << ((void **)(e->value))[i];
                    }
                }
                if (e->vector_width > 1) {
                    ss << ">";
                }
            }
            ss << "\n";

            halide_print(user_context, ss.str());
        }

        return my_id;

    }
}
예제 #12
0
WEAK void halide_profiler_report_unlocked(void *user_context, halide_profiler_state *s) {

    char line_buf[1024];
    Printer<StringStreamPrinter, sizeof(line_buf)> sstr(user_context, line_buf);

    for (halide_profiler_pipeline_stats *p = s->pipelines; p;
         p = (halide_profiler_pipeline_stats *)(p->next)) {
        float t = p->time / 1000000.0f;
        if (!p->runs) continue;
        sstr.clear();
        int alloc_avg = 0;
        if (p->num_allocs != 0) {
            alloc_avg = p->memory_total/p->num_allocs;
        }
        bool serial = p->active_threads_numerator == p->active_threads_denominator;
        float threads = p->active_threads_numerator / (p->active_threads_denominator + 1e-10);
        sstr << p->name << "\n"
             << " total time: " << t << " ms"
             << "  samples: " << p->samples
             << "  runs: " << p->runs
             << "  time/run: " << t / p->runs << " ms\n";
        if (!serial) {
            sstr << " average threads used: " << threads << "\n";
        }
        sstr << " heap allocations: " << p->num_allocs
             << "  peak heap usage: " << p->memory_peak << " bytes\n";
        halide_print(user_context, sstr.str());

        bool print_f_states = p->time || p->memory_total;
        if (!print_f_states) {
            for (int i = 0; i < p->num_funcs; i++) {
                halide_profiler_func_stats *fs = p->funcs + i;
                if (fs->stack_peak) {
                    print_f_states = true;
                    break;
                }
            }
        }

        if (print_f_states) {
            for (int i = 0; i < p->num_funcs; i++) {
                size_t cursor = 0;
                sstr.clear();
                halide_profiler_func_stats *fs = p->funcs + i;

                // The first func is always a catch-all overhead
                // slot. Only report overhead time if it's non-zero
                if (i == 0 && fs->time == 0) continue;

                sstr << "  " << fs->name << ": ";
                cursor += 25;
                while (sstr.size() < cursor) sstr << " ";

                float ft = fs->time / (p->runs * 1000000.0f);
                sstr << ft;
                // We don't need 6 sig. figs.
                sstr.erase(3);
                sstr << "ms";
                cursor += 10;
                while (sstr.size() < cursor) sstr << " ";

                int percent = 0;
                if (p->time != 0) {
                    percent = (100*fs->time) / p->time;
                }
                sstr << "(" << percent << "%)";
                cursor += 8;
                while (sstr.size() < cursor) sstr << " ";

                if (!serial) {
                    float threads = fs->active_threads_numerator / (fs->active_threads_denominator + 1e-10);
                    sstr << "threads: " << threads;
                    sstr.erase(3);
                    cursor += 15;
                    while (sstr.size() < cursor) sstr << " ";
                }

                int alloc_avg = 0;
                if (fs->num_allocs != 0) {
                    alloc_avg = fs->memory_total/fs->num_allocs;
                }

                if (fs->memory_peak) {
                    cursor += 15;
                    sstr << " peak: " << fs->memory_peak;
                    while (sstr.size() < cursor) sstr << " ";
                    sstr << " num: " << fs->num_allocs;
                    cursor += 15;
                    while (sstr.size() < cursor) sstr << " ";
                    sstr << " avg: " << alloc_avg;
                }
                if (fs->stack_peak > 0) {
                    sstr << " stack: " << fs->stack_peak;
                }
                sstr << "\n";

                halide_print(user_context, sstr.str());
            }
        }
    }
}
예제 #13
0
파일: sim_remote.cpp 프로젝트: kgnk/Halide
// This is a basic implementation of the Halide runtime for Hexagon.
void halide_error(void *user_context, const char *str) {
    halide_print(user_context, str);
}