Commit 844156f3 by jobrod

Add new file

parent 0bc31298
Showing with 201 additions and 0 deletions
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAX_LINE 1024
#define MAX_WORDS 50000 // Increased limit for unique words
#define MAX_WORD_LENGTH 100
// Structure to store unique words and their counts
typedef struct {
char word[MAX_WORD_LENGTH];
int count;
} WordCount;
// Function prototypes
void process_file(const char *filename, const char *output_filename);
int count_unique_words(char *line, WordCount *word_counts, int *unique_word_count);
int find_word(WordCount *word_counts, int unique_word_count, const char *word);
void print_word_counts(WordCount *word_counts, int unique_word_count, FILE *output_file);
void sort_word_counts(WordCount *word_counts, int unique_word_count);
void remove_punctuation(char *word);
void split_and_process_word(char *word, WordCount *word_counts, int *unique_word_count);
int contains_number(const char *word);
int main()
{
process_file("raamat1184.txt", "wordtable.txt");
return 0;
}
// Function to process the file and count unique words
void process_file(const char *filename, const char *output_filename)
{
FILE *file = fopen(filename, "r");
FILE *output_file = fopen(output_filename, "w");
if (file == NULL)
{
printf("Error: Could not open file.\n");
return;
}
if (output_file == NULL)
{
printf("Error: Could not open output file.\n");
fclose(file);
return;
}
char line[MAX_LINE];
int unique_word_count = 0;
WordCount word_counts[MAX_WORDS] = {0}; // Array to store unique words and their counts
while (fgets(line, sizeof(line), file))
{
count_unique_words(line, word_counts, &unique_word_count);
}
fclose(file);
// Sort word counts alphabetically
sort_word_counts(word_counts, unique_word_count);
// Print word counts to output file
print_word_counts(word_counts, unique_word_count, output_file);
fclose(output_file);
printf("There are %d different words in this text.\n", unique_word_count);
printf("Output written to wordtable.txt\n");
}
// Function to count unique words in a line
int count_unique_words(char *line, WordCount *word_counts, int *unique_word_count)
{
char *word = strtok(line, " \t\n");
while (word != NULL)
{
split_and_process_word(word, word_counts, unique_word_count);
word = strtok(NULL, " \t\n");
}
return *unique_word_count;
}
// Function to find a word in the word_counts array
int find_word(WordCount *word_counts, int unique_word_count, const char *word)
{
for (int i = 0; i < unique_word_count; i++)
{
if (strcmp(word_counts[i].word, word) == 0)
{
return i;
}
}
return -1;
}
// Function to sort word counts alphabetically
void sort_word_counts(WordCount *word_counts, int unique_word_count)
{
for (int i = 0; i < unique_word_count - 1; i++)
{
for (int j = i + 1; j < unique_word_count; j++)
{
if (strcmp(word_counts[i].word, word_counts[j].word) > 0)
{
WordCount temp = word_counts[i];
word_counts[i] = word_counts[j];
word_counts[j] = temp;
}
}
}
}
// Function to print word counts
void print_word_counts(WordCount *word_counts, int unique_word_count, FILE *output_file)
{
fprintf(output_file, "Word Count Table:\n");
fprintf(output_file, "%-20s | %s\n", "Word", "Count");
fprintf(output_file, "--------------------|------\n");
for (int i = 0; i < unique_word_count; i++)
{
fprintf(output_file, "%-20s | %d\n", word_counts[i].word, word_counts[i].count);
}
}
// Function to remove punctuation from a word
void remove_punctuation(char *word)
{
// Remove punctuation from the beginning of the word, except for apostrophes
while (*word && ispunct((unsigned char)*word) && *word != '\'')
{
memmove(word, word + 1, strlen(word));
}
// Remove punctuation from the end of the word
char *end = word + strlen(word) - 1;
while (end >= word && ispunct((unsigned char)*end))
{
*end = '\0';
end--;
}
// Remove leading apostrophes
while (*word == '\'')
{
memmove(word, word + 1, strlen(word));
}
}
// Function to split and process words with hyphens or underscores
void split_and_process_word(char *word, WordCount *word_counts, int *unique_word_count)
{
char *subword = strtok(word, "-_");
while (subword != NULL)
{
remove_punctuation(subword);
if (!contains_number(subword))
{
int index = find_word(word_counts, *unique_word_count, subword);
if (index == -1)
{
// New unique word
if (*unique_word_count < MAX_WORDS)
{
strncpy(word_counts[*unique_word_count].word, subword, MAX_WORD_LENGTH - 1);
word_counts[*unique_word_count].word[MAX_WORD_LENGTH - 1] = '\0'; // Ensure null-termination
word_counts[*unique_word_count].count = 1;
(*unique_word_count)++;
}
else
{
printf("Error: Exceeded maximum number of unique words.\n");
return;
}
}
else
{
// Existing word
word_counts[index].count++;
}
}
subword = strtok(NULL, "-_");
}
}
// Function to check if a word contains a number
int contains_number(const char *word)
{
for (int i = 0; word[i] != '\0'; i++)
{
if (isdigit((unsigned char)word[i]))
{
return 1;
}
}
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment