/* * Open an arbitrary file that will (optionally) disappear when we close it. * This is similar to OpenTemporaryFile, except the exact name specified in * fileName is used. */ File OpenNamedFile(const char *fileName, bool create, bool delOnClose, bool closeAtEOXact) { char tempfilepath[MAXPGPATH]; strncpy(tempfilepath, fileName, sizeof(tempfilepath)); /* * File flags when open the file. Note: we don't use O_EXCL, in case there is an orphaned * temp file that can be reused. */ int fileFlags = O_RDWR | PG_BINARY; if (create) { fileFlags |= O_TRUNC | O_CREAT; } File file = FileNameOpenFile(tempfilepath, fileFlags, 0600); if (file <= 0) { char *dirpath; if (!create) return file; /* * We might need to create the pg_tempfiles subdirectory, if no one * has yet done so. * * Don't check for error from mkdir; it could fail if someone else * just did the same thing. If it doesn't work then we'll bomb out on * the second create attempt, instead. */ dirpath = (char*)palloc(PATH_MAX); snprintf(dirpath, PATH_MAX, "%s/%s", getCurrentTempFilePath, PG_TEMP_FILES_DIR); mkdir(dirpath, S_IRWXU); pfree(dirpath); file = FileNameOpenFile(tempfilepath, fileFlags, 0600); if (file <= 0) elog(ERROR, "could not create temporary file \"%s\": %m", tempfilepath); } /* Mark it for deletion at close */ if(delOnClose) VfdCache[file].fdstate |= FD_TEMPORARY; /* Mark it to be closed at end of transaction. */ if (closeAtEOXact) { VfdCache[file].fdstate |= FD_CLOSE_AT_EOXACT; VfdCache[file].create_subid = GetCurrentSubTransactionId(); } return file; } /* OpenNamedFile */
/* * mdopen() -- Open the specified relation. ereport's on failure. * (Optionally, can return NULL instead of ereport for ENOENT.) * * Note we only open the first segment, when there are multiple segments. */ static MdfdVec * mdopen(SMgrRelation reln, bool allowNotFound) { MdfdVec *mdfd; char *path; File fd; /* No work if already open */ if (reln->md_fd) return reln->md_fd; path = relpath(reln->smgr_rnode); fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600); if (fd < 0) { /* * During bootstrap, there are cases where a system relation will * be accessed (by internal backend processes) before the * bootstrap script nominally creates it. Therefore, accept * mdopen() as a substitute for mdcreate() in bootstrap mode only. * (See mdcreate) */ if (IsBootstrapProcessingMode()) fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); if (fd < 0) { pfree(path); if (allowNotFound && errno == ENOENT) return NULL; ereport(ERROR, (errcode_for_file_access(), errmsg("could not open relation %u/%u/%u: %m", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode))); } } pfree(path); reln->md_fd = mdfd = _fdvec_alloc(); mdfd->mdfd_vfd = fd; mdfd->mdfd_segno = 0; #ifndef LET_OS_MANAGE_FILESIZE mdfd->mdfd_chain = NULL; Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE)); #endif return mdfd; }
/* * mdcreate() -- Create a new relation on magnetic disk. * * If isRedo is true, it's okay for the relation to exist already. */ bool mdcreate(SMgrRelation reln, bool isRedo) { char *path; File fd; if (isRedo && reln->md_fd != NULL) return true; /* created and opened already... */ Assert(reln->md_fd == NULL); path = relpath(reln->smgr_rnode); fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); if (fd < 0) { int save_errno = errno; /* * During bootstrap, there are cases where a system relation will * be accessed (by internal backend processes) before the * bootstrap script nominally creates it. Therefore, allow the * file to exist already, even if isRedo is not set. (See also * mdopen) */ if (isRedo || IsBootstrapProcessingMode()) fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600); if (fd < 0) { pfree(path); /* be sure to return the error reported by create, not open */ errno = save_errno; return false; } errno = 0; } pfree(path); reln->md_fd = _fdvec_alloc(); reln->md_fd->mdfd_vfd = fd; reln->md_fd->mdfd_segno = 0; #ifndef LET_OS_MANAGE_FILESIZE reln->md_fd->mdfd_chain = NULL; #endif return true; }
/* * Open the specified segment of the relation, * and make a MdfdVec object for it. Returns NULL on failure. */ static MdfdVec * _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) { MdfdVec *v; int fd; char *path, *fullpath; path = relpath(reln->smgr_rnode); if (segno > 0) { /* be sure we have enough space for the '.segno' */ fullpath = (char *) palloc(strlen(path) + 12); sprintf(fullpath, "%s.%u", path, segno); pfree(path); } else fullpath = path; /* open the file */ fd = FileNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags, 0600); pfree(fullpath); if (fd < 0) return NULL; /* allocate an mdfdvec entry for it */ v = _fdvec_alloc(); /* fill the entry */ v->mdfd_vfd = fd; v->mdfd_segno = segno; v->mdfd_chain = NULL; Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE)); /* all done */ return v; }
/* ---------------------------------------------------------------- * ExecHash * * build hash table for hashjoin, all do partitioning if more * than one batches are required. * ---------------------------------------------------------------- */ TupleTableSlot * ExecHash(Hash *node) { EState *estate; HashState *hashstate; Plan *outerNode; Var *hashkey; HashJoinTable hashtable; TupleTableSlot *slot; ExprContext *econtext; int nbatch; File *batches; RelativeAddr *batchPos; int *batchSizes; int i; RelativeAddr *innerbatchNames; /* ---------------- * get state info from node * ---------------- */ hashstate = node->hashstate; estate = node->plan.state; outerNode = outerPlan(node); hashtable = node->hashtable; if (hashtable == NULL) elog(WARN, "ExecHash: hash table is NULL."); nbatch = hashtable->nbatch; if (nbatch > 0) { /* if needs hash partition */ innerbatchNames = (RelativeAddr *) ABSADDR(hashtable->innerbatchNames); /* -------------- * allocate space for the file descriptors of batch files * then open the batch files in the current processes. * -------------- */ batches = (File*)palloc(nbatch * sizeof(File)); for (i=0; i<nbatch; i++) { batches[i] = FileNameOpenFile(ABSADDR(innerbatchNames[i]), O_CREAT | O_RDWR, 0600); } hashstate->hashBatches = batches; batchPos = (RelativeAddr*) ABSADDR(hashtable->innerbatchPos); batchSizes = (int*) ABSADDR(hashtable->innerbatchSizes); } /* ---------------- * set expression context * ---------------- */ hashkey = node->hashkey; econtext = hashstate->cstate.cs_ExprContext; /* ---------------- * get tuple and insert into the hash table * ---------------- */ for (;;) { slot = ExecProcNode(outerNode, (Plan*)node); if (TupIsNull(slot)) break; econtext->ecxt_innertuple = slot; ExecHashTableInsert(hashtable, econtext, hashkey, hashstate->hashBatches); ExecClearTuple(slot); } /* * end of build phase, flush all the last pages of the batches. */ for (i=0; i<nbatch; i++) { if (FileSeek(batches[i], 0L, SEEK_END) < 0) perror("FileSeek"); if (FileWrite(batches[i],ABSADDR(hashtable->batch)+i*BLCKSZ,BLCKSZ) < 0) perror("FileWrite"); NDirectFileWrite++; } /* --------------------- * Return the slot so that we have the tuple descriptor * when we need to save/restore them. -Jeff 11 July 1991 * --------------------- */ return slot; }