#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define READ_MODE "rt"
typedef struct WordCountTreeNode{
char *word;
int count;
struct WordCountTreeNode *left;
struct WordCountTreeNode *right;
}WCTNode;
WCTNode* wctnode_add(WCTNode *t, char *w);
void wctnode_print(WCTNode *t);
char* get_token(FILE *readfile);
static int usage(int exit_status){
fputs("Usage: token_split TEXT_FILE\n", stderr);
return exit_status;
}
int main(int argc, char *argv[]){
FILE *readfile;
char *word;
WCTNode *root = NULL;
if (argc != 2) return usage(0);
if ( (readfile = fopen(argv[1], READ_MODE) ) == NULL)
perror(argv[1]);
word = get_token(readfile);
if (word != NULL){
root = wctnode_add(root, word);
free(word);
}
while( (word = get_token(NULL) ) != NULL){
root = wctnode_add(root, word);
free(word);
}
wctnode_print(root);
fclose(readfile);
return 0;
}
static int istoken(int c);
static void chomp(char *s);
char* get_token(FILE *readfile){
static FILE *fp;
int c, i;
long bpos, len;
char *token;
if (readfile != NULL) fp = readfile;
while( (c = fgetc(fp) ) != EOF){
if (istoken(c)){
bpos = ftell(fp);
while(istoken(c = fgetc(fp)));
len = ftell(fp) - bpos;
if ( (token = (char*)malloc(len + 1) ) == NULL)
return NULL;
fseek(fp, (len + 1) * (-1), SEEK_CUR);
for(i = 0; i < len; i++) token[i] = fgetc(fp);
token[i] = '\0';
chomp(token);
return token;
}
}
return NULL;
}
static int istoken(int c){
if (isalnum(c)) return 1;
if (c == '\'' || c == '-')
return 1;
return 0;
}
static void chomp(char *s){
if (*s == '\0') return;
while(*s != '\0') s++;
if (*(--s) == '\n') *s = '\0';
}
static WCTNode* new(void);
static char* dup(char *str);
WCTNode* wctnode_add(WCTNode *t, char *w){
int cmp;
if (t == NULL){
t = new();
t->word = dup(w);
}else{
cmp = strcmp(w, t->word);
if (cmp == 0){
t->count++;
}else if (cmp < 0){
t->left = wctnode_add(t->left, w);
}else{
t->right = wctnode_add(t->right, w);
}
}
return t;
}
void wctnode_print(WCTNode *t){
if (t != NULL){
wctnode_print(t->left);
printf("%4d\t%s\n", t->count, t->word);
wctnode_print(t->right);
}
}
static WCTNode* new(void){
WCTNode *t;
t = (WCTNode*)malloc(sizeof(WCTNode));
if (t != NULL){
t->count = 1;
t->word = NULL;
t->left = t->right = NULL;
}
return t;
}
static char* dup(char *str){
char *s;
s = (char*)malloc(strlen(str) + 1);
if (s != NULL) strcpy(s, str);
return s;
}