/* File: char_freq.c. Contents: Source code for char_freq command. Author: Jeffrey S. Leon Updated: 1/25/05 The char_freq command counts the number of occurences of each letter (a-z or A-Z, with upper and lower case letters being treated as equivalent) in a text file. Characters other than letters are ignored. The frequencies are printed in decreasing order. The syntax for invoking the command is char_freq text_file adjust_to where text_file is the name of the text file. adjust_to (optional) is an integer that indicates that the frequency of each letter is to be scaled so that the frequencies sum to n, where n is the value of adjust_to. In particular, if adjust_to == 1, the probability of each letter is shown. If adjust_to is omitted, no scaling is performed. */ #include #include #include typedef int bool; const int false = 0, true = 1; struct { int freq; int letter; } count[26]; int compar( const void *f1, const void *f2) { int *n1 = (int *)f1, *n2 = (int *)f2; return (*n1 > *n2) ? -1 : (*n1 < *n2) ? 1 : 0; } int main( int argc, char *argv[]) { int ch, total = 0, adjust_to = 0; FILE *input; char base = 'A'; /* Check number of command-line arguments, and process arguments. */ if ( argc != 2 && argc != 3 ) { printf( "Error: Wrong number of command-line arguments"); exit(1); } input = fopen( argv[1], "r"); if ( input == NULL ) { printf( "Error: File %s could not be opened.\n", argv[1]); exit(2); } if ( argc == 3 ) { char *p; adjust_to = strtol( argv[2], &p, 0); if ( p == argv[2] || adjust_to < 0 ) { printf( "Error: Invalid second argument %s.\n", argv[2]); exit(2); } } /* Count number of occurences of each letter. */ for ( ch = 0 ; ch < 26; ++ch ) { count[ch].letter = ch; count[ch].freq = 0; } while ( (ch = getc(input)) != EOF ) if ( isalpha(ch) ) { if ( islower(ch) ) base = 'a'; ++count[tolower(ch)-'a'].freq; ++total; } /* Sort letters by decreasing frequency. */ qsort( count, 26, 2*sizeof(int), compar); /* Display total number of letters in file. */ printf( "\n %d alphabetic characters in sample.\n\n", total); /* Display frequencies of individual letters, adjusted if necessary. */ if (adjust_to == 0 ) { printf( " Letter Frequency\n"); for ( ch = 0 ; ch < 26 ; ++ch ) printf( " %c %8d\n", base+count[ch].letter, count[ch].freq); } else if (adjust_to == 1 ) { printf( " Letter Prob\n"); for ( ch = 0 ; ch < 26 ; ++ch ) printf( " %c %8.4f\n", base+count[ch].letter, count[ch].freq / (double)total); } else if (adjust_to < 100 ) { printf( " Letter Frequency\n"); printf( " (per %d lets)\n", adjust_to); for ( ch = 0 ; ch < 26 ; ++ch ) printf( " %c %8.3f\n", base+count[ch].letter, count[ch].freq / (double)total * (double)adjust_to); } else if (adjust_to < 1000 ) { printf( " Letter Frequency\n"); printf( " (per %d lets)\n", adjust_to); for ( ch = 0 ; ch < 26 ; ++ch ) printf( " %c %8.2f\n", base+count[ch].letter, count[ch].freq / (double)total * (double)adjust_to); } else if (adjust_to < 10000 ) { printf( " Letter Frequency\n"); printf( " (per %d lets)\n", adjust_to); for ( ch = 0 ; ch < 26 ; ++ch ) printf( " %c %8.1f\n", base+count[ch].letter, count[ch].freq / (double)total * (double)adjust_to); } else { printf( " Letter Frequency\n"); printf( " (per %d lets)\n", adjust_to); for ( ch = 0 ; ch < 26 ; ++ch ) printf( " %c %8.0f\n", base+count[ch].letter, count[ch].freq / (double)total * (double)adjust_to); } /* All done. */ fclose( input); return 0; }