Пример #1
0
int ohcount_is_binary_filename(const char *filename) {
  char *p = (char *)filename + strlen(filename);
  while (p > filename && *(p - 1) != '.') p--;
  if (p > filename) {
    struct ExtensionMap *re;
    int length = strlen(p);
    re = ohcount_hash_language_from_ext(p, length);
    if (re) return ISBINARY(re->value);
    // Try the lower-case version of this extension.
    char lowerext[length];
    strncpy(lowerext, p, length);
    lowerext[length] = '\0';
    for (p = lowerext; p < lowerext + length; p++) *p = tolower(*p);
    re = ohcount_hash_language_from_ext(lowerext, length);
    if (re) return ISBINARY(re->value);
  }
  return 0;
}
Пример #2
0
const char *ohcount_detect_language(SourceFile *sourcefile) {
  const char *language = NULL;
  char *p, *pe;
  int length;

  // Attempt to detect based on file extension.
  length = strlen(sourcefile->ext);
  struct ExtensionMap *re = ohcount_hash_language_from_ext(sourcefile->ext,
                                                           length);
  if (re) language = re->value;
  if (language == NULL) {
    // Try the lower-case version of this extension.
    char lowerext[length + 1];
    strncpy(lowerext, sourcefile->ext, length);
    lowerext[length] = '\0';
    for (p = lowerext; p < lowerext + length; p++) *p = tolower(*p);
    struct ExtensionMap *re = ohcount_hash_language_from_ext(lowerext, length);
    if (re) return re->value;
	}
  if (language) {
    if (ISAMBIGUOUS(language)) {
      // Call the appropriate function for disambiguation.
      length = strlen(DISAMBIGUATEWHAT(language));
      struct DisambiguateFuncsMap *rd =
        ohcount_hash_disambiguate_func_from_id(DISAMBIGUATEWHAT(language),
                                               length);
      if (rd) return rd->value(sourcefile);
    } else return ISBINARY(language) ? NULL : language;
  }

  // Attempt to detect based on filename.
  length = strlen(sourcefile->filename);
  struct FilenameMap *rf =
    ohcount_hash_language_from_filename(sourcefile->filename, length);
  if (rf) return rf->value;

  char line[81] = { '\0' }, buf[81];

  // Attempt to detect using Emacs mode line (/^-\*-\s*mode[\s:]*\w/i).
  p = ohcount_sourcefile_get_contents(sourcefile);
  pe = p;
  char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
  while (pe < eof) {
    // Get the contents of the first line.
    while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
    length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
    strncpy(line, p, length);
    line[length] = '\0';
    if (*line == '#' && *(line + 1) == '!') {
      // First line was sh-bang; loop to get contents of second line.
      while (*pe == '\r' || *pe == '\n') pe++;
      p = pe;
    } else break;
  }
  char *eol = line + strlen(line);
  for (p = line; p < eol; p++) *p = tolower(*p);
  p = strstr(line, "-*-");
  if (p) {
    p += 3;
    while (*p == ' ' || *p == '\t') p++;
    if (strncmp(p, "mode", 4) == 0) {
      p += 4;
      while (*p == ' ' || *p == '\t' || *p == ':') p++;
    }
    pe = p;
    while (isalnum(*pe)) pe++;
    length = pe - p;
    strncpy(buf, p, length);
    buf[length] = '\0';
    struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
    if (rl) return rl->name;
  }

  // Attempt to detect based on Unix 'file' command.
  int tmpfile = 0;
  char *path = sourcefile->filepath;
  if (sourcefile->diskpath)
    path = sourcefile->diskpath;
  if (access(path, F_OK) != 0) { // create temporary file
    path = malloc(21);
    strncpy(path, "/tmp/ohcount_XXXXXXX", 20);
    *(path + 21) = '\0';
    int fd = mkstemp(path);
    char *contents = ohcount_sourcefile_get_contents(sourcefile);
		log_it("contents:");
		log_it(contents);
		length = contents ? strlen(contents) : 0;
    write(fd, contents, length);
    close(fd);
    tmpfile = 1;
  }
  char command[strlen(path) + 11];
  sprintf(command, "file -b '%s'", path);
  FILE *f = popen(command, "r");
  if (f) {
    fgets(line, sizeof(line), f);
    char *eol = line + strlen(line);
    for (p = line; p < eol; p++) *p = tolower(*p);
    p = strstr(line, "script text");
    if (p && p == line) { // /^script text(?: executable)? for \w/
      p = strstr(line, "for ");
      if (p) {
        p += 4;
        pe = p;
        while (isalnum(*pe)) pe++;
        length = pe - p;
        strncpy(buf, p, length);
        buf[length] = '\0';
        struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
        if (rl) language = rl->name;
      }
    } else if (p) { // /(\w+)(?: -\w+)* script text/
      do {
        p--;
        pe = p;
        while (*p == ' ') p--;
        while (p != line && isalnum(*(p - 1))) p--;
        if (p != line && *(p - 1) == '-') p--;
      } while (*p == '-'); // Skip over any switches.
      length = pe - p;
      strncpy(buf, p, length);
      buf[length] = '\0';
      struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
      if (rl) language = rl->name;
    } else if (strstr(line, "xml")) language = LANG_XML;
    pclose(f);
    if (tmpfile) {
      remove(path);
      free(path);
    }
    if (language) return language;
  }

  return NULL;
}
Пример #3
0
const char *disambiguate_h(SourceFile *sourcefile) {
  char *p, *pe;
  int length;

  // If the directory contains a matching *.m file, likely Objective C.
  length = strlen(sourcefile->filename);
  if (strcmp(sourcefile->ext, "h") == 0) {
    char path[length];
    strncpy(path, sourcefile->filename, length);
    path[length] = '\0';
    *(path + length - 1) = 'm';
    char **filenames = ohcount_sourcefile_get_filenames(sourcefile);
    if (filenames) {
      int i;
      for (i = 0; filenames[i] != NULL; i++)
        if (strcmp(path, filenames[i]) == 0)
          return LANG_OBJECTIVE_C;
    }
  }

  // Attempt to detect based on file contents.
  char line[81], buf[81];
  p = ohcount_sourcefile_get_contents(sourcefile);
  pe = p;
  char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
  while (pe < eof) {
    // Get a line at a time.
    while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
    length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
    strncpy(line, p, length);
    line[length] = '\0';
    char *eol = line + strlen(line);
    char *line_end = pe;

    // Look for C++ headers.
    if (*line == '#') {
      p = line + 1;
      while (*p == ' ' || *p == '\t') p++;
      if (strncmp(p, "include", 7) == 0 &&
          (*(p + 7) == ' ' || *(p + 7) == '\t')) {
        // /^#\s*include\s+[<"][^>"]+[>"]/
        p += 8;
        while (*p == ' ' || *p == '\t') p++;
        if (*p == '<' || *p == '"') {
          // Is the header file a C++ header file?
          p++;
          pe = p;
          while (pe < eol && *pe != '>' && *pe != '"') pe++;
          length = pe - p;
          strncpy(buf, p, length);
          buf[length] = '\0';
          if (ohcount_hash_is_cppheader(buf, length))
            return LANG_CPP;
          // Is the extension for the header file a C++ file?
          p = pe;
          while (p > line && *(p - 1) != '.') p--;
          length = pe - p;
          strncpy(buf, p, length);
          buf[length] = '\0';
          struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
          if (re && strcmp(re->value, LANG_CPP) == 0)
            return LANG_CPP;
        }
      }
    }

    // Look for C++ keywords.
    p = line;
    while (p < eol) {
      if (islower(*p) && p != line && !isalnum(*(p - 1)) && *(p - 1) != '_') {
        pe = p;
        while (islower(*pe)) pe++;
        if (!isalnum(*pe) && *pe != '_') {
          length = pe - p;
          strncpy(buf, p, length);
          buf[length] = '\0';
          if (strcmp(buf, "class") == 0 ||
              strcmp(buf, "namespace") == 0 ||
              strcmp(buf, "template") == 0 ||
              strcmp(buf, "typename") == 0)
            return LANG_CPP;
        }
        p = pe + 1;
      } else p++;
    }

    // Next line.
    pe = line_end;
    while (*pe == '\r' || *pe == '\n') pe++;
    p = pe;
  }

  // Nothing to suggest C++.
  return LANG_C;
}
Пример #4
0
/* Use all available means to detect file language
 */
const char *ohcount_detect_language(SourceFile *sourcefile) {
  const char *language = NULL;
  char *p, *pe;
  int length;

  // Attempt to detect using Emacs mode line (/^-\*-\s*mode[\s:]*\w/i).
  char line[81] = { '\0' }, buf[81];
  p = ohcount_sourcefile_get_contents(sourcefile);
  pe = p;
  char *eof = p + ohcount_sourcefile_get_contents_size(sourcefile);
  while (pe < eof) {
    // Get the contents of the first line.
    while (pe < eof && *pe != '\r' && *pe != '\n') pe++;
    length = (pe - p <= sizeof(line)) ? pe - p : sizeof(line);
    strncpy(line, p, length);
    line[length] = '\0';
    if (*line == '#' && *(line + 1) == '!') {
      // First line was sh-bang; loop to get contents of second line.
      while (*pe == '\r' || *pe == '\n') pe++;
      p = pe;
    } else break;
  }
  p = strstr(line, "-*-");
  if (p) {
    p += 3;
    while (*p == ' ' || *p == '\t') p++;
    // detect "mode" (any capitalization)
    if (strncasecmp(p, "mode", 4) == 0) {
      p += 4;
      while (*p == ' ' || *p == '\t' || *p == ':') p++;
    }
    pe = p;
    while (!isspace(*pe) && *pe != ';' && pe != strstr(pe, "-*-")) pe++;
    length = (pe - p <= sizeof(buf)) ? pe - p : sizeof(buf);
    strncpy(buf, p, length);
    buf[length] = '\0';

		// Special case for "c" or "C" emacs mode header: always means C, not C++
		if (strcasecmp(buf, "c") == 0) {
				return LANG_C;
		}

    // First try it with the language name.
    struct LanguageMap *rl = ohcount_hash_language_from_name(buf, length);
    if (rl) language = rl->name;
    if(!language) {
      // Then try it with the extension table.
      struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
      if (re) language = re->value;
    }
    if (!language) {
      // Try the lower-case version of this modeline.
      for (pe = buf; pe < buf+length; pe++) *pe = tolower(*pe);
      // First try it with the language name.
      rl = ohcount_hash_language_from_name(buf, length);
      if (rl) language = rl->name;
    }
    if (!language) {
      // Then try it with the extension table.
      struct ExtensionMap *re = ohcount_hash_language_from_ext(buf, length);
      if (re) language = re->value;
    }
  }

  // Attempt to detect based on file extension.
  if(!language) {
      length = strlen(sourcefile->ext);
      struct ExtensionMap *re = ohcount_hash_language_from_ext(sourcefile->ext,
                                                               length);
      if (re) language = re->value;
    if (!language) {
      // Try the lower-case version of this extension.
      char lowerext[length + 1];
      strncpy(lowerext, sourcefile->ext, length);
      lowerext[length] = '\0';
      for (p = lowerext; p < lowerext + length; p++) *p = tolower(*p);
      struct ExtensionMap *re = ohcount_hash_language_from_ext(lowerext, length);
      if (re) language = re->value;
    }
  }

  // Attempt to detect based on filename.
  if(!language) {
    length = strlen(sourcefile->filename);
    struct FilenameMap *rf =
      ohcount_hash_language_from_filename(sourcefile->filename, length);
    if (rf) language = rf->value;
  }

  // Attempt to detect based on Unix 'file' command.
  if(!language) {
    language = detect_language_magic(sourcefile);
  }

  if (language) {
    if (ISAMBIGUOUS(language)) {
      // Call the appropriate function for disambiguation.
      length = strlen(DISAMBIGUATEWHAT(language));
      struct DisambiguateFuncsMap *rd =
        ohcount_hash_disambiguate_func_from_id(DISAMBIGUATEWHAT(language),
                                               length);
      if (rd) language = rd->value(sourcefile);
    } else language = ISBINARY(language) ? NULL : language;
  }
  return language;
}