/** \internal Does all the actual globbing. \author Matthias Wandel ([email protected]) http://http://www.sentex.net/~mwandel/ \author Joshua Jensen ([email protected]) Matthias Wandel wrote the original C algorithm, which is contained in his Exif Jpeg header parser at http://www.sentex.net/~mwandel/jhead/ under the filename MyGlob.c. It should be noted that the MAJORITY of this function is his, albeit rebranded style-wise. I have made the following extensions: - Support for ignoring directories. - Perforce-style (and DJGPP-style) ... for recursion, instead of **. - Automatic conversion from ...Stuff to .../*Stuff. Allows lookup of files by extension, too: '....h' translates to '.../*.h'. - Ability to handle forward slashes and backslashes. - A minimal C++ class design. - Wildcard matching not based on FindFirstFile(). Should allow greater control in the future and patching in of the POSIX fnmatch() function on systems that support it. **/ void FileGlobBase::GlobHelper( const char* inPattern ) { char patternBuf[ _MAX_PATH * 2 ]; strcpy( patternBuf, inPattern ); DoRecursion: char basePath[ _MAX_PATH ]; char* basePathEndPtr = basePath; char* recurseAtPtr = NULL; // Split the path into base path and pattern to match against. bool hasWildcard = false; char* pattern; for ( pattern = patternBuf; *pattern != '\0'; ++pattern ) { char ch = *pattern; // Is it a '?' ? if ( ch == '?' ) hasWildcard = true; // Is it a '*' ? else if ( ch == '*' ) { hasWildcard = true; // Is there a '**'? if ( pattern[ 1 ] == '*' ) { // If we're just starting the pattern or the characters immediately // preceding the pattern are a drive letter ':' or a directory path // '/', then set up the internals for later recursion. if ( pattern == patternBuf || pattern[ -1 ] == '/' || pattern[ -1 ] == ':') { char ch2 = pattern[ 2 ]; if ( ch2 == '/' ) { recurseAtPtr = pattern; memcpy(pattern, pattern + 3, strlen( pattern ) - 2 ); } else if ( ch2 == '\0' ) { recurseAtPtr = pattern; *pattern = '\0'; } } } } // Is there a '/' or ':' in the pattern at this location? if ( ch == '/' || ch == ':' ) { if ( hasWildcard ) break; basePathEndPtr = &basePath[ pattern - patternBuf + 1 ]; } } // If there is no wildcard this time, then just add the current file and // get out of here. if ( !hasWildcard ) { // This should refer to a file. FoundMatch( patternBuf ); return; } // Did we make it to the end of the pattern? If so, we should match files, // since there were no slashes encountered. bool matchFiles = *pattern == '\0'; // Copy the directory down. size_t basePathLen = basePathEndPtr - basePath; strncpy( basePath, patternBuf, basePathLen ); // Copy the wildcard matching string. char matchPattern[ _MAX_PATH ]; size_t matchLen = ( pattern - patternBuf ) - basePathLen; strncpy( matchPattern, patternBuf + basePathLen, matchLen + 1 ); if ( matchPattern[ matchLen ] == '/' ) matchPattern[ matchLen ] = 0; StringList fileList; // Do the file search with *.* in the directory specified in basePattern. strcpy( basePathEndPtr, "*.*" ); // Start the find. WIN32_FIND_DATA fd; HANDLE handle = FindFirstFile( basePath, &fd ); // Clear out the *.* so we can use the original basePattern string. *basePathEndPtr = 0; // Any files found? if ( handle != INVALID_HANDLE_VALUE ) { for ( ;; ) { // Is the file a directory? if ( ( fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ) && !matchFiles ) { // Do a wildcard match. if ( WildMatch( matchPattern, fd.cFileName, false ) ) { // It matched. Let's see if the file should be ignored. bool ignore = false; // Knock out "." or ".." if they haven't already been. size_t len = strlen( fd.cFileName ); fd.cFileName[ len ] = '/'; fd.cFileName[ len + 1 ] = '\0'; // See if this is a directory to ignore. ignore = MatchIgnorePattern( fd.cFileName ); fd.cFileName[ len ] = 0; // Should this file be ignored? if ( !ignore ) { // Nope. Add it to the linked list. fileList.push_back( fd.cFileName ); } } } else if ( !( fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY ) && matchFiles ) { // Do a wildcard match. if ( WildMatch( matchPattern, fd.cFileName, false ) ) { // It matched. Let's see if the file should be ignored. bool ignore = MatchIgnorePattern( fd.cFileName ); // Is this pattern exclusive? if ( !ignore && m_exclusiveFilePatterns.begin() != m_exclusiveFilePatterns.end() ) { ignore = !MatchExclusivePattern( fd.cFileName ); } // Should this file be ignored? if ( !ignore ) { // Nope. Add it to the linked list. fileList.push_back( fd.cFileName ); } } } // Look up the next file. if ( !FindNextFile( handle, &fd ) ) break; } // Close down the file find handle. FindClose( handle ); } // Sort the list. fileList.sort(); // Iterate the file list and either recurse or add the file as a found // file. if ( !matchFiles ) { for ( StringList::iterator it = fileList.begin(); it != fileList.end(); ++it ) { char combinedName[ _MAX_PATH * 2 ]; // Need more directories. CatPath( combinedName, basePath, (*it).c_str() ); strcat( combinedName, pattern ); GlobHelper( combinedName ); } } else // if ( !matchFiles ) { for ( StringList::iterator it = fileList.begin(); it != fileList.end(); ++it ) { char combinedName[ _MAX_PATH * 2 ]; CatPath( combinedName, basePath, (*it).c_str()); FoundMatch( combinedName ); } } // Clear out the file list, so the goto statement below can recurse // internally. fileList.clear(); // Do we need to recurse? if ( !recurseAtPtr ) return; // Copy in the new recursive pattern to match. strcpy( matchPattern, recurseAtPtr ); strcpy( recurseAtPtr, "*/**/" ); strcat( patternBuf, matchPattern ); // As this function context is no longer needed, we can just go back // to the top of it to avoid adding another context on the stack. goto DoRecursion; }
//---------------------------------------------------------------------------- // Decide how a particular pattern should be handled, and call function for // each. //---------------------------------------------------------------------------- void MyGlob (const char *Pattern, value FileFuncParm) { char BasePattern[_MAX_PATH]; char MatchPattern[_MAX_PATH]; char PatCopy[_MAX_PATH * 2]; int a; int MatchFiles, MatchDirs; int BaseEnd, PatternEnd; int SawPat; int RecurseAt; CAMLlocal1(v_string); strcpy (PatCopy, Pattern); DoRecursion: MatchFiles = FALSE; MatchDirs = TRUE; BaseEnd = 0; PatternEnd = 0; SawPat = FALSE; RecurseAt = -1; // Split the path into base path and pattern to match agains using findfirst. for (a = 0;; a++) { if (PatCopy[a] == '*' || PatCopy[a] == '?') { SawPat = TRUE; } if (PatCopy[a] == '*' && PatCopy[a + 1] == '*') { if (a == 0 || PatCopy[a - 1] == '\\' || PatCopy[a - 1] == ':') { if (PatCopy[a + 2] == '\\' || PatCopy[a + 2] == '\0') { // x\**\y --> x\y x\*\**\y RecurseAt = a; if (PatCopy[a + 2]) { memcpy (PatCopy + a, PatCopy + a + 3, strlen (PatCopy) - a - 1); } else { PatCopy[a + 1] = '\0'; } } } } if (PatCopy[a] == '\\' || (PatCopy[a] == ':' && PatCopy[a + 1] != '\\')) { PatternEnd = a; if (SawPat) break; // Findfirst can only match one level of wildcard at a time. BaseEnd = a + 1; } if (PatCopy[a] == '\0') { PatternEnd = a; MatchFiles = TRUE; MatchDirs = FALSE; break; } } if (!SawPat) { // No pattern. This should refer to a file. v_string = copy_string(PatCopy); callback(FileFuncParm,v_string); return; } strncpy (BasePattern, PatCopy, BaseEnd); BasePattern[BaseEnd] = 0; strncpy (MatchPattern, PatCopy, PatternEnd); MatchPattern[PatternEnd] = 0; { FileEntry *FileList = NULL; int NumAllocated = 0; int NumHave = 0; struct _finddata_t finddata; long find_handle; find_handle = _findfirst (MatchPattern, &finddata); for (;;) { if (find_handle == -1) break; // Eliminate the obvious patterns. if (!memcmp (finddata.name, ".", 2)) goto next_file; if (!memcmp (finddata.name, "..", 3)) goto next_file; if (finddata.attrib & _A_SUBDIR) { if (!MatchDirs) goto next_file; } else { if (!MatchFiles) goto next_file; } // Add it to the list. if (NumAllocated <= NumHave) { NumAllocated = NumAllocated + 10 + NumAllocated / 2; FileList = realloc (FileList, NumAllocated * sizeof (FileEntry)); if (FileList == NULL) goto nomem; } a = strlen (finddata.name); FileList[NumHave].Name = malloc (a + 1); if (FileList[NumHave].Name == NULL) { nomem: printf ("malloc failure\n"); exit (-1); } memcpy (FileList[NumHave].Name, finddata.name, a + 1); FileList[NumHave].attrib = finddata.attrib; NumHave++; next_file: if (_findnext (find_handle, &finddata) != 0) break; } _findclose (find_handle); // Sort the list... qsort (FileList, NumHave, sizeof (FileEntry), CompareFunc); // Use the list. for (a = 0; a < NumHave; a++) { char CombinedName[_MAX_PATH * 2]; if (FileList[a].attrib & _A_SUBDIR) { if (MatchDirs) { // Need more directories. CatPath (CombinedName, BasePattern, FileList[a].Name); strcat (CombinedName, PatCopy + PatternEnd); MyGlob (CombinedName, FileFuncParm); } } else { if (MatchFiles) { // We need files at this level. CatPath (CombinedName, BasePattern, FileList[a].Name); v_string = copy_string(CombinedName); callback(FileFuncParm,v_string); } } free (FileList[a].Name); } free (FileList); } if (RecurseAt >= 0) { strcpy (MatchPattern, PatCopy + RecurseAt); PatCopy[RecurseAt] = 0; strcpy (PatCopy + RecurseAt, "*\\**\\"); strcat (PatCopy, MatchPattern); // As this funciton context is no longer needed, we can just goto back // to the top of it to avoid adding another context on the stack. goto DoRecursion; } }