Counting files
time -p /usr/bin/find -x "${HOME}" -type f -print0 | /usr/bin/tr -dc '\0' | /usr/bin/wc -c function countfiles() { declare -i i=0; while read -d $'\0' file; do let i++; done < <(/usr/bin/find -x "${HOME}" -type f -print0); echo $i; return 0; } time -p countfiles
Three C code snippets to count files (using recursive directory scanning).
1. A raw file counter for Mac OS X:
/* countfiles -- count the number of regular Mac OS X files in a given directory Note: - skips symbolic links and Mac-type aliases - skips /dev directory compile with: gcc -std=c99 -Wall -Wextra -pedantic -O3 -framework CoreServices -o countfiles countfiles.c Usage: ./countfiles ~ ./countfiles ~/Desktop Compare with: /usr/bin/find -x ~ -type f -print0 | /usr/bin/tr -dc '\0' | /usr/bin/wc -c Note also: The maximum length of a name in a directory is MAXNAMLEN (cf. /usr/include/sys/dirent.h): #define __DARWIN_MAXNAMLEN 255 #define MAXNAMLEN __DARWIN_MAXNAMLEN References: - http://www.sleuthkit.org/mac-robber/desc.php (directory scanner code; author: Brian Carrier; license: GNU version 2 or later, http://en.wikipedia.org/wiki/GNU_General_Public_License) - http://my-sample-code.googlecode.com/svn/trunk/realpath/realpath.c (resolves symbolic links as well as Mac-type aliases; author: MM Weiss; license: BSD, http://en.wikipedia.org/wiki/BSD_licenses) */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <dirent.h> #include <sys/stat.h> #include <unistd.h> #include <limits.h> // added from realpath.c #include <sys/param.h> #include <errno.h> #include <CoreServices/CoreServices.h> #define OK 0 #define ERROR 1 static char directory[PATH_MAX+1] = {0}; static char curpath[2*PATH_MAX+2] = {0}; static char *curpath_ptr = curpath; static char *p1 = 0; static char *p2 = 0; static unsigned long long int file_count = 0; // prototype your functions static unsigned int scan_dir(char *dir); static char *resolveAlias(const char * pathToResolve); static Boolean isFinderAlias(const char *pathname); static char *resolveFinderAlias(const char *pathname); int main(int argc, char * const argv[]) { //printf("sizeof(directory): %zu\n", sizeof(directory)); //printf("sizeof(curpath): %zu\n", sizeof(curpath)); if (argc != 2) return 1; char *pathToResolve = argv[1]; char *resolvedPath = 0; // resolve symlinked or Mac-aliased input directory if (!(resolvedPath = resolveAlias(pathToResolve))) { fprintf(stderr, "failed: \"%s\" %s\n", strerror(errno), pathToResolve); exit(1); } // get the last character of resolvedPath char *last_char_ptr = resolvedPath; while (*last_char_ptr) last_char_ptr++; // last_char_ptr now points to the terminating '\0' --last_char_ptr; // add a trailing slash / if missing if (*last_char_ptr != '/') { strcat(directory, resolvedPath); strcat(directory, "/"); }else{ strcat(directory, resolvedPath); } scan_dir(directory); printf("%llu\n", file_count); return 0; } static unsigned int scan_dir(char *dir) { DIR *dirp; struct dirent *dp; int dir_len = 0; char *dir_len_ptr; FSRef ref; Boolean aliasFileFlag, folderFlag; // skip the /dev directory //if (strcmp(dir, "/dev/") == 0) // return OK; // alternative if ( ( dir[0] == '/' ) && ( dir[1] == 'd' ) && ( dir[2] == 'e' ) && ( dir[3] == 'v' ) && ( dir[4] == '/' ) ) return OK; // get the length of dir //dir_len = strlen(dir); // strlen alternative dir_len_ptr = dir; while(*dir_len_ptr++) dir_len++; if (!(dirp = opendir (dir))) { fprintf (stderr, "\ninvalid directory: %s\n\n", dir); return 0; } // copy dir into curpath //strcpy(curpath, dir); // strcpy alternative curpath_ptr = curpath; while ( *dir ) { *curpath_ptr++ = *dir++; } *curpath_ptr = '\0'; // cycle through the directories while ((dp = readdir(dirp)) != NULL) { struct stat sp; // skip the . and .. entries if ((dp->d_name[0] == '.') && ( (dp->d_name[1] == '\0') || ((dp->d_name[1] == '.') && (dp->d_name[2] == '\0')))) continue; // make the full name and do an lstat //strcat (curpath, dp->d_name); // strcat alternative p1 = curpath; p2 = dp->d_name; while ( ( *p1 ) != 0 ) p1++; while ( ( *p1++ = *p2++ ) != 0); --p1; // set pointer p1 to point to the first terminating ASCII nul '\0' character if (0 != lstat (curpath, &sp)) { printf ("lstat error: %s\n", curpath); return ERROR; } // skip Mac-type aliases if (noErr == FSPathMakeRef((const UInt8 *)curpath, &ref, NULL)) { if (noErr == FSIsAliasFile(&ref, &aliasFileFlag, &folderFlag)) { if (aliasFileFlag) { curpath[dir_len] = '\0'; continue; } } } // file counter if ((sp.st_mode & S_IFMT) == S_IFREG) { //printf("%s\n", curpath); file_count++; curpath[dir_len] = '\0'; continue; } /* // skip symbolic links if ((sp.st_mode & S_IFMT) == S_IFLNK) { curpath[dir_len] = '\0'; continue; } */ // recurse if we have a directory if ((sp.st_mode & S_IFMT) == S_IFDIR) { //strcat (curpath, "/"); // strcat alternative p1 = curpath; p2 = "/"; while ( ( *p1 ) != 0 ) p1++; while ( ( *p1++ = *p2++ ) != 0); --p1; // set pointer p1 to point to the first terminating ASCII nul '\0' character if (scan_dir(curpath)) { return ERROR; } } // null terminate the curpath so strncat works for the next entry curpath[dir_len] = '\0'; } // end of while ... readdir closedir(dirp); return 0; } // scan_dir static Boolean isFinderAlias(const char *pathname) { FSRef ref; Boolean aliasFileFlag, folderFlag; if (noErr == FSPathMakeRef((const UInt8 *)pathname, &ref, NULL)) { if (noErr == FSIsAliasFile(&ref, &aliasFileFlag, &folderFlag)) { return aliasFileFlag; } } return FALSE; } static char *resolveFinderAlias(const char *pathname) { FSRef ref; Boolean targetIsFolder, wasAliased; char *resolvedAlias; if (noErr == FSPathMakeRef((const UInt8 *)pathname, &ref, NULL)) { if (noErr == FSResolveAliasFile(&ref, TRUE, &targetIsFolder, &wasAliased)) { if (TRUE == wasAliased) { if (NULL != (resolvedAlias = malloc(PATH_MAX + 1))) { if (noErr == FSRefMakePath(&ref, (UInt8 *)resolvedAlias, PATH_MAX)) { errno = 0; return resolvedAlias; } free(resolvedAlias); } } } } if (!errno) errno = ENOENT; return NULL; } static char *resolveAlias(const char * pathToResolve) { char buffer[PATH_MAX+1]; char *pathname; int err = EINVAL; if (isFinderAlias(pathToResolve)) { if (NULL != (pathname = resolveFinderAlias( pathToResolve ))) { free(pathname); err = 0; } } if (err) { if (NULL != (pathname = realpath(pathToResolve, buffer))) err = 0; } if (err) return NULL; else return pathname; }
2. A file counter for Mac OS X using the Carbon File Manager API:
/* countfiles -- count the number of Mac OS X files in a given directory (using the Carbon File Manager API) Based on: Iterating Directory Contents: Iterating Directories in Carbon (FSGetCatalogInfoBulk), http://developer.apple.com/documentation/Performance/Conceptual/FileSystem/Articles/IteratingFiles.html Note: - skips symbolic links and Mac-type (Finder) aliases - will also count sockets and FIFO files compile with: gcc -std=c99 -Wall -Wextra -pedantic -O3 -framework CoreServices -o countfiles countfiles.c Usage: ./countfiles ~ ./countfiles ~/Desktop Compare with: /usr/bin/find -x ~ -type f -print0 | /usr/bin/tr -dc '\0' | /usr/bin/wc -c */ #include <stdio.h> #include <CoreServices/CoreServices.h> // Forward declarations. OSStatus IterateFolder( FSRef * inFolder ); void DoSomethingWithThisObject( const FSCatalogInfo * inCatInfo ); static UInt8 path[1000]; static UInt32 pathSize = 1000; static OSStatus status; static const FSCatalogInfo * inCatInfo; static FInfo * theFinderInfo; static OSType type; static Boolean aliasFileFlag, folderFlag; static unsigned long long int file_count = 0; int main(int argc, char * const argv[]) { OSStatus outStatus; FSRef folderRef; //printf("begin file iteration!\n"); //fflush( stdout ); if(argc != 2) return 1; // Get a folder path, // make it into an FSRef, and iterate it // outStatus = FSPathMakeRef((const UInt8 *) argv[1], &folderRef, NULL); if( outStatus == noErr ) { outStatus = IterateFolder( &folderRef ); } printf("%llu\n", file_count); printf( "final error status is (#%d)\n", (int)outStatus ); return 0; } OSStatus IterateFolder( FSRef * inFolder ) { OSStatus outStatus; // Get permissions and node flags and Finder info // // For maximum performance, specify in the catalog // bitmap only the information you need to know FSCatalogInfoBitmap kCatalogInfoBitmap = (kFSCatInfoNodeFlags | kFSCatInfoFinderInfo); // On each iteration of the do-while loop, retrieve this // number of catalog infos // // We use the number of FSCatalogInfos that will fit in // exactly four VM pages (#113). This is a good balance // between the iteration I/O overhead and the risk of // incurring additional I/O from additional memory // allocation const size_t kRequestCountPerIteration = ((4096 * 4) / sizeof(FSCatalogInfo)); FSIterator iterator; FSCatalogInfo * catalogInfoArray; // Create an iterator outStatus = FSOpenIterator( inFolder, kFSIterateFlat, &iterator ); if( outStatus == noErr ) { // Allocate storage for the returned information catalogInfoArray = (FSCatalogInfo *) malloc(sizeof(FSCatalogInfo) *kRequestCountPerIteration); FSRefPtr resultRefs = (FSRefPtr )malloc(sizeof(FSRef) * kRequestCountPerIteration); if (!resultRefs) { fprintf(stderr, "malloc failed for resultRefs\n"); return 1; } if( catalogInfoArray == NULL ) { outStatus = memFullErr; } else { // Request information about files in the given directory, // until we get a status code back from the File Manager do { ItemCount actualCount; outStatus = FSGetCatalogInfoBulk(iterator, kRequestCountPerIteration, &actualCount, NULL, kCatalogInfoBitmap, catalogInfoArray, resultRefs, NULL, NULL ); // Process all items received if( outStatus == noErr || outStatus == errFSNoMoreItems ) { UInt32 index; for( index = 0; index < actualCount; index += 1 ) { inCatInfo = &catalogInfoArray[ index ]; // if we have a directory ... if( (inCatInfo->nodeFlags & kFSNodeIsDirectoryMask) == kFSNodeIsDirectoryMask ) { /* // print directory path status = FSRefMakePath(&resultRefs[index], path, pathSize); if (status != noErr) { printf("FSRefMakePath failed %ld\n", outStatus); }else{ printf("d %s\n", path); } */ // recursive directory scanning if (IterateFolder( &resultRefs[index] )) continue; // or else we have a file ... } else { theFinderInfo = (FInfo *)&inCatInfo->finderInfo[0]; type = theFinderInfo->fdType; // skip symbolic links if ( (((type & 0xFF000000) >> 24) == 's') && (((type & 0x00FF0000) >> 16) == 'l') && (((type & 0x0000FF00) >> 8) == 'n') && (((type & 0x000000FF)) == 'k') ) continue; // skip Mac-type aliases if (noErr == FSIsAliasFile(&resultRefs[index], &aliasFileFlag, &folderFlag)) { if (aliasFileFlag) continue; } file_count++; // file counter /* // print file path status = FSRefMakePath(&resultRefs[index], path, pathSize); if (status != noErr) { printf("FSRefMakePath failed %ld\n", outStatus); }else{ printf("f %s\n", path); //printf( "Found a file (type %c)\n", (char) ((type & 0xFF000000) >> 24)); //printf( "Found a file (type %c)\n", (char) ((type & 0x00FF0000) >> 16)); //printf( "Found a file (type %c)\n", (char) ((type & 0x0000FF00) >> 8)); //printf( "Found a file (type %c)\n\n", (char) (type & 0x000000FF)); } */ } // if dir - else file } // for } // if } while( outStatus == noErr ); // errFSNoMoreItems tells us we have successfully processed all // items in the directory -- not really an error if( outStatus == errFSNoMoreItems ) { outStatus = noErr; } // Free the array memory free( (void *) catalogInfoArray ); } } // if FSCloseIterator(iterator); return outStatus; }
3. A Unix-based file counter with optional suffix filtering:
/* countfiles -- count the number of regular (suffix-filtered) files in a given directory compile with: gcc -std=c99 -Wall -Wextra -pedantic -O3 -o countfiles countfiles.c Usage: ./countfiles -h ./countfiles ~/Desktop 2>/dev/null ./countfiles -v ~/Desktop 2>/dev/null ./countfiles -s '.c' ~/Desktop ./countfiles -v -s '.m' ~/Desktop Note: - countfiles does not scan a Mac-type (Finder) alias to a directory, i. e. it does not follow a Mac-type (Finder) alias to a directory. - Files named '.' or '..' get scanned. - File search will not descend into directories that have a different device number than the file from which the descent began. - fts_options: FTS_COMFOLLOW | FTS_PHYSICAL | FTS_NOCHDIR | FTS_XDEV | FTS_SEEDOT (see man 3 fts) For how to implement recursive directory scanning please see man 3 fts and: "fts(3) or Avoiding to Reinvent the Wheel", http://keramida.wordpress.com/2009/07/05/fts3-or-avoiding-to-reinvent-the-wheel/ Compare with: /usr/bin/find -x ~ -type f -print0 | /usr/bin/tr -dc '\0' | /usr/bin/wc -c */ #include <err.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <sys/stat.h> #include <fts.h> #include <unistd.h> // getopt static int ptree(char * const argv[], const char *suffix, int verbose); static void usage(void) { static const char *usageinfo[] = { "countfiles -- count the number of regular (suffix-filtered) files in a given directory", "Usage:", "countfiles [-hv] [-s suffix] [ [dir1] [dir2] ...]", "-h: help", "-s: suffix", "-v: verbose", "Note:", "- countfiles does not scan a Mac-type (Finder) alias to a directory, i. e. it does not follow a Mac-type alias to a directory.", "- Files named '.' or '..' get scanned.", "- File search will not descend into directories that have a different device number than the file from which the descent began.", "- fts_options: FTS_COMFOLLOW | FTS_PHYSICAL | FTS_NOCHDIR | FTS_XDEV | FTS_SEEDOT (see man 3 fts)" }; fprintf(stderr, "\n%s\n\n" "%s\n\n" "\t%s\n\n" "\t%s\n\n" "\t%s\n\n" "\t%s\n\n" "\n%s\n\n" "\t%s\n\n" "\t%s\n\n" "\t%s\n\n" "\t%s\n\n", usageinfo[0], usageinfo[1], usageinfo[2], usageinfo[3], usageinfo[4], usageinfo[5], usageinfo[6], usageinfo[7], usageinfo[8], usageinfo[9], usageinfo[10] ); } int main(int argc, char * const argv[]) { int verbose = 0; const char *suffix = 0; if ( argc == 1 ) { usage(); return 0; } int ch; while ((ch = getopt(argc, (char **)argv, "s:hv")) != -1) { switch (ch) { case 'h': usage(); return 0; case 's': suffix = optarg; break; case 'v': verbose = 1; break; case '?': default: usage(); return 1; } } argc -= optind; argv += optind; int rc; if ((rc = ptree(argv, suffix, verbose)) != 0) rc = 1; return rc; } static int ptree(char * const argv[], const char *suffix, int verbose) { int i; unsigned long long int file_count = 0; char *pathptr = 0; int equal = 0; size_t suffix_len = 0; FTS *ftsp = 0; FTSENT *p = 0, *chp = 0; //int fts_options = FTS_COMFOLLOW | FTS_PHYSICAL | FTS_XDEV | FTS_SEEDOT; int fts_options = FTS_COMFOLLOW | FTS_PHYSICAL | FTS_NOCHDIR | FTS_XDEV | FTS_SEEDOT; int rval; rval = 0; if ((ftsp = fts_open(argv, fts_options, NULL)) == NULL) { warn("fts_open"); return -1; } // Initialize ftsp with as many argv[] parts as possible. chp = fts_children(ftsp, 0); if (chp == NULL) { fprintf(stderr, "No files to traverse. See -h option. Exiting ...\n"); return 1; } if (suffix) { suffix_len = strlen(suffix); while ((p = fts_read(ftsp)) != NULL) { switch (p->fts_info) { case FTS_F: //printf("%s\n", p->fts_path); equal = 0; pathptr = p->fts_path; while (*pathptr) pathptr++; // pathptr now points to the terminating '\0' for (i = suffix_len - 1; i >= 0; i--) { pathptr--; if ( (!pathptr) || (*pathptr != suffix[i]) ) { equal = 1; break; } } if (equal == 0) { file_count++; if (verbose == 1) printf("%s\n", p->fts_path); } break; default: break; } // switch } // while }else{ while ((p = fts_read(ftsp)) != NULL) { switch (p->fts_info) { case FTS_F: //printf("%s\n", p->fts_path); file_count++; break; default: break; } // switch } // while } // if-suffix-else fts_close(ftsp); if (verbose == 1) { if (suffix) printf("\ntotal number of files ending with %s is: %llu\n\n", suffix, file_count); else printf("\ntotal number of files: %llu\n\n", file_count); } else { printf("%llu\n", file_count); } return 0; } // ptree