/* BlastIn: * * Build a Lib7 heap chunk from a sequence of bytes; the fd is the underlying * file descriptor (== -1, if blasting from a string), buf is any pre-read * bytes of data, and nbytesP points to the number of bytes in buf. */ lib7_val_t BlastIn (lib7_state_t *lib7_state, Byte_t *buf, long len, bool_t *seen_error) { inbuf_t inBuf; lib7_image_hdr_t header; lib7_val_t chunk; inBuf.needsSwap = FALSE; inBuf.file = NULL; inBuf.base = buf; inBuf.buf = buf; inBuf.nbytes = len; /* Read the chunk header: */ if (HeapIO_ReadBlock (&inBuf, &header, sizeof(header)) == FAILURE) { *seen_error = TRUE; return LIB7_void; } if (header.byteOrder != ORDER) { if (BIGENDIAN_TO_HOST(header.byteOrder) != ORDER) { *seen_error = TRUE; return LIB7_void; } header.magic = BIGENDIAN_TO_HOST(header.magic); header.kind = BIGENDIAN_TO_HOST(header.kind); inBuf.needsSwap = TRUE; } if (header.magic != BLAST_MAGIC) { *seen_error = TRUE; return LIB7_void; } switch (header.kind) { case BLAST_IMAGE: if (ReadImage(lib7_state, &inBuf, &chunk) == FAILURE) { *seen_error = TRUE; return LIB7_void; } break; case BLAST_UNBOXED: { lib7_blast_hdr_t bhdr; if (HeapIO_ReadBlock(&inBuf, &bhdr, sizeof(bhdr)) == FAILURE) { *seen_error = TRUE; return LIB7_void; } else chunk = bhdr.rootChunk; } break; default: *seen_error = TRUE; return LIB7_void; } return chunk; } /* end of BlastIn */
static void load_compiled_file__may_heapclean ( // ================================= // Task* task, char* filename, Roots* extra_roots ){ /////////////////////////////////////////////////////// // Loading an compiledfile is a five-step process: // // 1. Read the header, which holds various // numbers we need such as the number of // code segments in the compiledfile. // // 2. Locate all the values imported by this // compiledfile from the export lists of // previously loaded compiled_files. // For subsequent ease of access, we // construct an 'import record' (a vector) // holding all these values packed // consecutively. // /////////////////////////////////////////////////////// FILE* file; int i; int bytes_of_code_remaining; int bytes_of_exports = 0; Compiledfile_Header header; Picklehash export_picklehash; Int1 segment_bytesize; Int1 entrypoint_offset_in_bytes; size_t archive_offset; char* compiledfile_filename = filename; // If 'filename' is a "library@offset:compiledfile" triple, // parse it into its three parts: // { char* at_ptr = strchr (filename, '@'); if (!at_ptr) { archive_offset = 0; // We're loading a bare .compiled, not one packed within a library archive. } else { char* colon_ptr = strchr (at_ptr + 1, ':'); if (colon_ptr) { *colon_ptr = '\0'; compiledfile_filename = colon_ptr + 1; } archive_offset = strtoul (at_ptr + 1, NULL, 0); // XXX SUCKO FIXME Needs more sanity checking. *at_ptr = '\0'; } } // Log all files loaded, for diagnostic/information purposes: // if (!archive_offset) { // fprintf ( log_fd ? log_fd : stderr, " load-compiledfiles.c: Loading object file %s\n", filename ); } else { fprintf ( log_fd ? log_fd : stderr, " load-compiledfiles.c: Loading offset %8d in lib %s \tnamely object file %s\n", archive_offset, filename, compiledfile_filename ); } // Open the file: // file = open_file( filename, TRUE ); if (!file) print_stats_and_exit( 1 ); // If an offset is given (which is to say, if we are loading // an compiledfile packed within a library archive) then // then seek to the beginning of the section that contains // the image of our compiledfile: // if (archive_offset) { // if (fseek (file, archive_offset, SEEK_SET) == -1) { // die ("Cannot seek on archive file \"%s@%ul\": %s", filename, (unsigned long) archive_offset, strerror(errno) ); } } // Get the header: // read_n_bytes_from_file( file, &header, sizeof(Compiledfile_Header), filename ); // The integers in the header are kept in big-endian byte // order, so convert them if we're on a little-endian box: // header.number_of_imported_picklehashes = BIGENDIAN_TO_HOST( header.number_of_imported_picklehashes ); header.number_of_exported_picklehashes = BIGENDIAN_TO_HOST( header.number_of_exported_picklehashes ); header.bytes_of_import_tree = BIGENDIAN_TO_HOST( header.bytes_of_import_tree ); header.bytes_of_dependency_info = BIGENDIAN_TO_HOST( header.bytes_of_dependency_info ); header.bytes_of_inlinable_code = BIGENDIAN_TO_HOST( header.bytes_of_inlinable_code ); header.reserved = BIGENDIAN_TO_HOST( header.reserved ); header.pad = BIGENDIAN_TO_HOST( header.pad ); header.bytes_of_compiled_code = BIGENDIAN_TO_HOST( header.bytes_of_compiled_code ); header.bytes_of_symbolmapstack = BIGENDIAN_TO_HOST( header.bytes_of_symbolmapstack ); // XXX SUCKO FIXME These days 99% of the market is little-endian, // so should either change to always little-endian, or else // (better) always use host system's native byte ordering. // Ideally we should be able to just mmap the .compiledfile into // memory and be ready to go, with no bit-fiddling needed at all. // Read the 'import tree' and locate all the thus-specified // needed values located in the export tree of previously- // loaded compiled_files: // int imports_record_slot_count = header.number_of_imported_picklehashes + 1; // Make sure we have enough free heap space to allocate // our 'import record' vector of imported values: // if (need_to_call_heapcleaner (task, REC_BYTESIZE(imports_record_slot_count))) { // call_heapcleaner_with_extra_roots (task, 0, extra_roots ); } // Write the header for our 'import record', which will be // a Mythryl record with 'imports_record_slot_count' slots: // set_slot_in_nascent_heapchunk (task, 0, MAKE_TAGWORD(imports_record_slot_count, PAIRS_AND_RECORDS_BTAG)); // Locate all the required import values and // save them in our nascent on-heap 'import record': // { int next_imports_record_slot_to_fill = 1; // Over all previously loaded .compiled files // from which we import values: // while (next_imports_record_slot_to_fill < imports_record_slot_count) { // Picklehash picklehash_naming_previously_loaded_compiled_file; read_n_bytes_from_file( file, &picklehash_naming_previously_loaded_compiled_file, sizeof(Picklehash), filename ); // Locate all needed imports exported by that // particular pre-loaded compiledfile: // next_imports_record_slot_to_fill = fetch_imports ( task, file, filename, next_imports_record_slot_to_fill, picklehash_to_exports_tree( &picklehash_naming_previously_loaded_compiled_file ) ); } } // Put a dummy valid value (NIL) in the last slot, // just so the cleaner won't go bananas if it // looks at that slot: // set_slot_in_nascent_heapchunk( task, imports_record_slot_count, HEAP_NIL ); // Complete the above by actually allocating // the 'import record' on the Mythryl heap: // Val import_record = commit_nascent_heapchunk( task, imports_record_slot_count ); // Contains all the values we import from other compiled_files. Roots roots1 = { &import_record, extra_roots }; // Get the export picklehash for this compiledfile. // This is the name by which other compiled_files will // refer to us in their turn as they are loaded. // // Some compiled_files may not have such a name, in // which case they have no directly visible exported // values. (This typically means that they are a // plug-in which installs pointers to itself in some // other module's datastructures, as a side-effect // during loading.) // if (header.number_of_exported_picklehashes == 1) { bytes_of_exports = sizeof( Picklehash ); read_n_bytes_from_file( file, &export_picklehash, bytes_of_exports, filename ); } else if (header.number_of_exported_picklehashes != 0) { die ("Number of exported picklehashes is %d (should be 0 or 1)", (int)header.number_of_exported_picklehashes ); } // Seek to the first "code segment" within our compiledfile image. // This contains bytecoded instructions interpretable by // make-package-literals-via-bytecode-interpreter.c which construct all the needed constant // lists etc for this compiledfile. (If we stored them as actual // lists, we'd have to do relocations on all the pointers in // those structures at this point. The bytecode solution seems // simpler.) { // XXX BUGGO FIXME A 'long' is 32 bits on 32-bit Linux, // but files longer than 2GB (signed long!) are often // supported. We probably should use fseeko in those // cases and then // #define _FILE_OFFSET_BITS 64 // so as to support large (well, *huge* :) library files. // See the manpage for details. // This probably won't be a frequent problem in practice // for a few years yet, and by then we'll probably be // running 64-bit Linux anyhow, so not a high priority. // long file_offset = archive_offset + sizeof(Compiledfile_Header) + header.bytes_of_import_tree + bytes_of_exports + header.bytes_of_dependency_info + header.bytes_of_inlinable_code + header.reserved + header.pad; if (fseek(file, file_offset, SEEK_SET) == -1) { // die ("cannot seek on .compiled file \"%s\": %s", filename, strerror(errno) ); } } //////////////////////////////////////////////////////////////// // In principle, a .compiled file can contain any number of // code segments, so we track the number of bytes of code // left to process: When it hits zero, we've done all // the code segments. // // In practice, we currently always have exactly two // code segments, the first of which contains the byte- // coded logic constructing our literals (constants // -- see src/c/heapcleaner/make-package-literals-via-bytecode-interpreter.c) // and the second of which contains all our compiled // native code for the compiledfile, including that // which constructs our tree of exported (directly externally // visible) values. //////////////////////////////////////////////////////////////// bytes_of_code_remaining = header.bytes_of_compiled_code; // Read the size and the dummy entry point for the // first code segment (literal-constructing bytecodes). // The entrypoint offset of this first segment is always // zero, which is why we ignore it here: // read_n_bytes_from_file( file, &segment_bytesize, sizeof(Int1), filename ); // segment_bytesize = BIGENDIAN_TO_HOST( segment_bytesize ); // read_n_bytes_from_file( file, &entrypoint_offset_in_bytes, sizeof(Int1), filename ); // // entrypoint_offset_in_bytes = BIGENDIAN_TO_HOST( entrypoint_offset_in_bytes ); bytes_of_code_remaining -= segment_bytesize + 2 * sizeof(Int1); // if (bytes_of_code_remaining < 0) { // die ("format error (data size mismatch) in .compiled file \"%s\"", filename); } Val mythryl_result = HEAP_VOID; if (segment_bytesize > 0) { // Unt8* data_chunk = MALLOC_VEC( Unt8, segment_bytesize ); read_n_bytes_from_file( file, data_chunk, segment_bytesize, filename ); mythryl_result = make_package_literals_via_bytecode_interpreter__may_heapclean (task, data_chunk, segment_bytesize, &roots1); FREE(data_chunk); } // Do a functional update of the last element of the import_record: // for (i = 0; i < imports_record_slot_count; i++) { // set_slot_in_nascent_heapchunk(task, i, PTR_CAST(Val*, import_record)[i-1]); // <============ last use of import_record } set_slot_in_nascent_heapchunk( task, imports_record_slot_count, mythryl_result ); mythryl_result = commit_nascent_heapchunk( task, imports_record_slot_count ); Roots roots2 = { &mythryl_result, extra_roots }; // 'extra_roots' not '&roots1' because import_record is dead here. // Do a garbage collection, if necessary: // if (need_to_call_heapcleaner( task, PICKLEHASH_BYTES + REC_BYTESIZE(5)) ) { // call_heapcleaner_with_extra_roots (task, 0, &roots2 ); } while (bytes_of_code_remaining > 0) { // In practice, we always execute this loop exactly once. // // Read the size and entry point for this code chunk: read_n_bytes_from_file( file, &segment_bytesize, sizeof(Int1), filename ); segment_bytesize = BIGENDIAN_TO_HOST( segment_bytesize ); read_n_bytes_from_file( file, &entrypoint_offset_in_bytes, sizeof(Int1), filename ); entrypoint_offset_in_bytes = BIGENDIAN_TO_HOST( entrypoint_offset_in_bytes ); // How much more? // bytes_of_code_remaining -= segment_bytesize + 2 * sizeof(Int1); // if (bytes_of_code_remaining < 0) die ("format error (code size mismatch) in .compiled file \"%s\"", filename); // Allocate heap space and read code chunk: // Val code_chunk = allocate_nonempty_code_chunk (task, segment_bytesize); // read_n_bytes_from_file( file, PTR_CAST(char*, code_chunk), segment_bytesize, filename ); // Flush the instruction cache, so CPU will see // our newly loaded code. (To gain speed, and // simplify the hardware design, most modern CPUs // assume that code is never modified on the fly, // or at least not without manually flushing the // instruction cache this way.) // flush_instruction_cache (PTR_CAST(char*, code_chunk), segment_bytesize); // Create closure, taking entry point into account: // { Val closure = make_one_slot_record( task, PTR_CAST( Val, PTR_CAST (char*, code_chunk) + entrypoint_offset_in_bytes) ); // Apply the closure to the import picklehash vector. // // This actually executes all the top-level code for // the compile unit, which is to say that if the // source for our compiledfile looked something like // // package my_pkg { // my _ = file::print "Hello, world!\n"; // }; // // then when we do the following 'apply' call, you'd see // // Hello, world! // // printed on the standard output. // // In addition, invisible compiler-generated code // constructs and returns the tree of exports from // our compiledfile. // save_c_state (task, extra_roots); // We do NOT want mythryl_result on the extra_roots list here. mythryl_result = run_mythryl_function__may_heapclean (task, closure, mythryl_result, TRUE, NULL); // run_mythryl_function__may_heapclean def in src/c/main/run-mythryl-code-and-runtime-eventloop.c restore_c_state (task, extra_roots); } if (need_to_call_heapcleaner (task, PICKLEHASH_BYTES+REC_BYTESIZE(5))) { // call_heapcleaner_with_extra_roots (task, 0, &roots2 ); } } // Publish this compiled_file's exported-values tree // for the benefit of compiled_files loaded later: // if (bytes_of_exports) { // register_compiled_file_exports__may_heapclean ( task, &export_picklehash, // key -- the 16-byte picklehash naming this compiledfile. mythryl_result, // val -- the tree of exported Mythryl values. extra_roots ); } fclose( file ); } // load_compiled_file__may_heapclean
Val unpickle_datastructure (Task* task, Unt8* buf, long len, Bool* seen_error) { //====================== // // Build a heap chunk from a sequence of bytes. // The fd is the underlying file descriptor (== -1, if unpickling from a bytevector). // buf is any pre-read bytes of data. // nbytesP points to the number of bytes in buf. // // This fn gets exported to the Mythryl level as 'unpickle_datastructure' via // // src/c/lib/heap/datastructure-unpickler.c // and then // src/lib/std/src/unsafe/unsafe.pkg // Inbuf inbuf; Heapfile_Header header; Val chunk; inbuf.needs_to_be_byteswapped = FALSE; inbuf.file = NULL; inbuf.base = buf; inbuf.buf = buf; inbuf.nbytes = len; // Read the chunk header: // if (heapio__read_block( &inbuf, &header, sizeof(header) ) == FALSE) { // heapio__read_block def in src/c/heapcleaner/import-heap-stuff.c // *seen_error = TRUE; return HEAP_VOID; } if (header.byte_order != ORDER) { if (BIGENDIAN_TO_HOST(header.byte_order) != ORDER) { *seen_error = TRUE; return HEAP_VOID; } header.magic = BIGENDIAN_TO_HOST(header.magic); header.kind = BIGENDIAN_TO_HOST(header.kind); inbuf.needs_to_be_byteswapped = TRUE; } if (header.magic != PICKLE_MAGIC) { *seen_error = TRUE; return HEAP_VOID; } switch (header.kind) { // case NORMAL_DATASTRUCTURE_PICKLE: if (read_image( task, &inbuf, &chunk ) == FALSE) { // Defined below *seen_error = TRUE; return HEAP_VOID; } break; case UNBOXED_PICKLE: { Pickle_Header bhdr; if (heapio__read_block( &inbuf, &bhdr, sizeof(bhdr) ) != FALSE) { chunk = bhdr.root_chunk; } else { *seen_error = TRUE; return HEAP_VOID; } } break; default: *seen_error = TRUE; return HEAP_VOID; } return chunk; } // fun unpickle_datastructure