Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
jobrod
/
Week3Programming2JordanBrodie
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Members
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit
844156f3
authored
Feb 21, 2025
by
jobrod
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add new file
parent
0bc31298
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
201 additions
and
0 deletions
wordtable.c
wordtable.c
0 → 100644
View file @
844156f3
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAX_LINE 1024
#define MAX_WORDS 50000 // Increased limit for unique words
#define MAX_WORD_LENGTH 100
// Structure to store unique words and their counts
typedef
struct
{
char
word
[
MAX_WORD_LENGTH
];
int
count
;
}
WordCount
;
// Function prototypes
void
process_file
(
const
char
*
filename
,
const
char
*
output_filename
);
int
count_unique_words
(
char
*
line
,
WordCount
*
word_counts
,
int
*
unique_word_count
);
int
find_word
(
WordCount
*
word_counts
,
int
unique_word_count
,
const
char
*
word
);
void
print_word_counts
(
WordCount
*
word_counts
,
int
unique_word_count
,
FILE
*
output_file
);
void
sort_word_counts
(
WordCount
*
word_counts
,
int
unique_word_count
);
void
remove_punctuation
(
char
*
word
);
void
split_and_process_word
(
char
*
word
,
WordCount
*
word_counts
,
int
*
unique_word_count
);
int
contains_number
(
const
char
*
word
);
int
main
()
{
process_file
(
"raamat1184.txt"
,
"wordtable.txt"
);
return
0
;
}
// Function to process the file and count unique words
void
process_file
(
const
char
*
filename
,
const
char
*
output_filename
)
{
FILE
*
file
=
fopen
(
filename
,
"r"
);
FILE
*
output_file
=
fopen
(
output_filename
,
"w"
);
if
(
file
==
NULL
)
{
printf
(
"Error: Could not open file.
\n
"
);
return
;
}
if
(
output_file
==
NULL
)
{
printf
(
"Error: Could not open output file.
\n
"
);
fclose
(
file
);
return
;
}
char
line
[
MAX_LINE
];
int
unique_word_count
=
0
;
WordCount
word_counts
[
MAX_WORDS
]
=
{
0
};
// Array to store unique words and their counts
while
(
fgets
(
line
,
sizeof
(
line
),
file
))
{
count_unique_words
(
line
,
word_counts
,
&
unique_word_count
);
}
fclose
(
file
);
// Sort word counts alphabetically
sort_word_counts
(
word_counts
,
unique_word_count
);
// Print word counts to output file
print_word_counts
(
word_counts
,
unique_word_count
,
output_file
);
fclose
(
output_file
);
printf
(
"There are %d different words in this text.
\n
"
,
unique_word_count
);
printf
(
"Output written to wordtable.txt
\n
"
);
}
// Function to count unique words in a line
int
count_unique_words
(
char
*
line
,
WordCount
*
word_counts
,
int
*
unique_word_count
)
{
char
*
word
=
strtok
(
line
,
"
\t\n
"
);
while
(
word
!=
NULL
)
{
split_and_process_word
(
word
,
word_counts
,
unique_word_count
);
word
=
strtok
(
NULL
,
"
\t\n
"
);
}
return
*
unique_word_count
;
}
// Function to find a word in the word_counts array
int
find_word
(
WordCount
*
word_counts
,
int
unique_word_count
,
const
char
*
word
)
{
for
(
int
i
=
0
;
i
<
unique_word_count
;
i
++
)
{
if
(
strcmp
(
word_counts
[
i
].
word
,
word
)
==
0
)
{
return
i
;
}
}
return
-
1
;
}
// Function to sort word counts alphabetically
void
sort_word_counts
(
WordCount
*
word_counts
,
int
unique_word_count
)
{
for
(
int
i
=
0
;
i
<
unique_word_count
-
1
;
i
++
)
{
for
(
int
j
=
i
+
1
;
j
<
unique_word_count
;
j
++
)
{
if
(
strcmp
(
word_counts
[
i
].
word
,
word_counts
[
j
].
word
)
>
0
)
{
WordCount
temp
=
word_counts
[
i
];
word_counts
[
i
]
=
word_counts
[
j
];
word_counts
[
j
]
=
temp
;
}
}
}
}
// Function to print word counts
void
print_word_counts
(
WordCount
*
word_counts
,
int
unique_word_count
,
FILE
*
output_file
)
{
fprintf
(
output_file
,
"Word Count Table:
\n
"
);
fprintf
(
output_file
,
"%-20s | %s
\n
"
,
"Word"
,
"Count"
);
fprintf
(
output_file
,
"--------------------|------
\n
"
);
for
(
int
i
=
0
;
i
<
unique_word_count
;
i
++
)
{
fprintf
(
output_file
,
"%-20s | %d
\n
"
,
word_counts
[
i
].
word
,
word_counts
[
i
].
count
);
}
}
// Function to remove punctuation from a word
void
remove_punctuation
(
char
*
word
)
{
// Remove punctuation from the beginning of the word, except for apostrophes
while
(
*
word
&&
ispunct
((
unsigned
char
)
*
word
)
&&
*
word
!=
'\''
)
{
memmove
(
word
,
word
+
1
,
strlen
(
word
));
}
// Remove punctuation from the end of the word
char
*
end
=
word
+
strlen
(
word
)
-
1
;
while
(
end
>=
word
&&
ispunct
((
unsigned
char
)
*
end
))
{
*
end
=
'\0'
;
end
--
;
}
// Remove leading apostrophes
while
(
*
word
==
'\''
)
{
memmove
(
word
,
word
+
1
,
strlen
(
word
));
}
}
// Function to split and process words with hyphens or underscores
void
split_and_process_word
(
char
*
word
,
WordCount
*
word_counts
,
int
*
unique_word_count
)
{
char
*
subword
=
strtok
(
word
,
"-_"
);
while
(
subword
!=
NULL
)
{
remove_punctuation
(
subword
);
if
(
!
contains_number
(
subword
))
{
int
index
=
find_word
(
word_counts
,
*
unique_word_count
,
subword
);
if
(
index
==
-
1
)
{
// New unique word
if
(
*
unique_word_count
<
MAX_WORDS
)
{
strncpy
(
word_counts
[
*
unique_word_count
].
word
,
subword
,
MAX_WORD_LENGTH
-
1
);
word_counts
[
*
unique_word_count
].
word
[
MAX_WORD_LENGTH
-
1
]
=
'\0'
;
// Ensure null-termination
word_counts
[
*
unique_word_count
].
count
=
1
;
(
*
unique_word_count
)
++
;
}
else
{
printf
(
"Error: Exceeded maximum number of unique words.
\n
"
);
return
;
}
}
else
{
// Existing word
word_counts
[
index
].
count
++
;
}
}
subword
=
strtok
(
NULL
,
"-_"
);
}
}
// Function to check if a word contains a number
int
contains_number
(
const
char
*
word
)
{
for
(
int
i
=
0
;
word
[
i
]
!=
'\0'
;
i
++
)
{
if
(
isdigit
((
unsigned
char
)
word
[
i
]))
{
return
1
;
}
}
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment