Commit c4c7beaa by birdperson

merging

parent ad4b27c9
File deleted
......@@ -5,9 +5,9 @@
#include <unistd.h>
#include <dirent.h>
#include <error.h>
#define RIVSIZE 200000
#define NONZEROS 2
#define CACHESIZE 1000
#define RIVSIZE 100000
#define NONZEROS 8
#define CACHESIZE 10000
#include "../RIVtools.h"
......@@ -23,7 +23,7 @@ int main(int argc, char *argv[]){
char pathString[1000];
//we open the lexicon, if it does not yet exist, it will be created
lexOpen("lexicon200-2");
lexOpen("lexicon100-8");
//we format the root directory, preparing to scan its contents
......@@ -58,7 +58,7 @@ void directoryGrind(char *rootString){
while((files=readdir(directory))){
if(!files->d_name[0]) break;
if(!files->d_name) break;
while(*(files->d_name)=='.'){
files = readdir(directory);
}
......
......@@ -5,13 +5,13 @@ clean(){
else
python shittyballs.py "$1"
./RIVread1 cleanbooks/
./RIVread2 cleanbooks/
./RIVread3 cleanbooks/
./RIVread4 cleanbooks/
./RIVread5 cleanbooks/
./RIVread6 cleanbooks/
./RIVread7 cleanbooks/
./RIVread25-2 cleanbooks/ > somewhere.txt
./RIVread50-2 cleanbooks/ > somewhere.txt
./RIVread50-8 cleanbooks/ > somewhere.txt
./RIVread75-4 cleanbooks/ > somewhere.txt
./RIVread100-2 cleanbooks/ > somewhere.txt
./RIVread100-8 cleanbooks/ > somewhere.txt
./RIVread125-2 cleanbooks/ > somewhere.txt
rm -r cleanbooks/
#rm "$1"
......@@ -22,4 +22,4 @@ clean(){
clean ../../books/*
clean ../../../Documents/PGunzips/*
......@@ -102,7 +102,7 @@ if not os.path.exists('lexicon'):
if not os.path.exists(pathString):
os.makedirs(pathString)
call(["python", "blacklist.py"])
i=0
skip = 1
with open(sourceString, 'U') as fileIn:
......
File deleted
......@@ -3,10 +3,11 @@
#define CACHEEXCLUSIVE 1
#define RIVSIZE 50000
#include "RIVtools.h"
char* clean(char* word);
char* stem(char* word);
int main(){
lexOpen("consolidatedLexicon50-8");
FILE* text = fopen("../books/pg56902.txt", "r");
lexOpen("consolidatedLexicon50-4");
FILE* text = fopen("../../Downloads/pg62.txt", "r");
if(!text){
puts("no file");
return 1;
......@@ -17,14 +18,17 @@ int main(){
while(fscanf(text, "%99s", word)){
if(feof(text)) break;
if(!*word) break;
if(!*(clean(word))) continue;
if(stem(word)){
denseRIV* wordRIV = lexPull(word);
if(!wordRIV){
printf("%s, not in lexicon\n", word);
//printf("%s, not in lexicon\n", word);
continue;
}else{
temp = consolidateD2S(wordRIV->values);
addS2D(accumulate.values, temp);
......@@ -37,12 +41,13 @@ int main(){
}
}else{
printf("%s, not in wordNet\n", word);
// printf("%s, not in wordNet\n", word);
}
}
temp = consolidateD2S(accumulate.values);
printf("saturatoins, %d", temp.count);
return 0;
......@@ -54,35 +59,59 @@ char* stem(char* word){
char pathString[200];
int WNdata;
sprintf(pathString, "WN/%s", word);
sprintf(pathString, "../bookCleaner/WN/%s", word);
FILE* WNfile = fopen(pathString, "r");
if(!WNfile) return NULL;
fscanf(WNfile, "%d", &WNdata);
if(!WNdata) return NULL;
if(WNdata == 1) return word;
if(!WNdata) {
fclose(WNfile);
return NULL;
}
if(WNdata == 1){
fclose(WNfile);
return word;
}
if(WNdata == 2){
fscanf(WNfile, "%s", word);
fclose(WNfile);
sprintf(pathString, "WN/%s", word);
sprintf(pathString, "../bookCleaner/WN/%s", word);
WNfile = fopen(pathString, "r");
if(!WNfile) return NULL;
fscanf(WNfile, "%*d%s", word);
fclose(WNfile);
return word;
}
return NULL;
}
char* clean(char* word){
char* letter = word;
char output[100];
int i=0;
while(*letter){
if(*letter >96 && *letter < 123){
output[i++] = *letter;
}else if(*letter > 64 && *letter < 91){
output[i++] = *letter + 32;
}
letter++;
}
output[i] = 0;
strcpy(word, output);
return word;
}
......
......@@ -47,7 +47,7 @@ int main(int argc, char* argv[]){
i++;
}
lexClose();
lexOpen("consolidatedLexicon50-8");
lexOpen("consolidatedLexicon50-4");
for(int j=0; j<i; j++){
lexPush(output[j]);
......
File deleted
......@@ -3,10 +3,10 @@
#include <dirent.h>
#include <time.h>
#define RIVSIZE 25000
#define RIVSIZE 4000
#define CACHESIZE 0
#define NONZEROS 2
#define THRESHOLD 0.99
#define THRESHOLD 0.97
#include "RIVtools.h"
......@@ -30,49 +30,63 @@ int main(int argc, char *argv[]){
sparseRIV* fileRIVs_slider = fileRIVs;
sparseRIV* fileRIVs_stop = fileRIVs+fileCount;
while(fileRIVs_slider <fileRIVs_stop){
(*fileRIVs_slider).magnitude = getMagnitudeSparse(*fileRIVs_slider);
fileRIVs_slider->magnitude = getMagnitudeSparse(*fileRIVs_slider);
if(fileRIVs_slider->magnitude == 0){
if(remove(fileRIVs_slider->name)){
printf("%s failed", fileRIVs_slider->name);
}
}
fileRIVs_slider++;
}
clock_t beginnsquared = clock();
int thing = 0;
float cosine;
float minmag;
float maxmag;
denseRIV baseDense;
baseDense.values = malloc(RIVSIZE*sizeof(int));
char booleans[fileCount];
char* booleans_slider = booleans;
memset(booleans, 0, fileCount);
fileRIVs_slider = fileRIVs;
sparseRIV* comparators_slider;
int count = 0;
while(fileRIVs_slider<fileRIVs_stop){
//the comparators slider and booleans slider are both set back to the beginning of their scan
comparators_slider = fileRIVs;
booleans_slider = booleans;
memset(baseDense.values, 0, RIVSIZE*sizeof(int));
baseDense.values = addS2D(baseDense.values, *fileRIVs_slider);
addS2D(baseDense.values, *fileRIVs_slider);
baseDense.magnitude = (*fileRIVs_slider).magnitude;
minmag = baseDense.magnitude*.85;
maxmag = baseDense.magnitude*1.15;
while(comparators_slider < fileRIVs_slider){
if((*comparators_slider).magnitude < maxmag && (*comparators_slider).magnitude > minmag && (*comparators_slider).boolean){
if((*comparators_slider).magnitude < maxmag && (*comparators_slider).magnitude > minmag && !*booleans_slider){
cosine = cosCompare(baseDense, *comparators_slider);
count++;
if(cosine>THRESHOLD){
printf("%s\t%f\n",(*comparators_slider).name, cosine);
printf("%s\t%s\t%f\n",fileRIVs_slider->name, comparators_slider->name, cosine);
if(remove((*comparators_slider).name)){
printf(" well shit");
}
(*comparators_slider).boolean = 0;
*booleans_slider += 1;
thing++;
}
}
comparators_slider++;
booleans_slider++;
}
fileRIVs_slider++;
}
clock_t endnsquared = clock();
double time = (double)(endnsquared - beginnsquared) / CLOCKS_PER_SEC;
......@@ -106,13 +120,14 @@ void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
strcat(pathString, files->d_name);
strcat(pathString, "/");
directoryToL2s(pathString, fileRIVs, fileCount);
continue;
}
strcpy(pathString, rootString);
strcat(pathString, files->d_name);
FILE *input = fopen(pathString, "r");
puts(pathString);
//puts(pathString);
if(!input){
printf("file %s doesn't seem to exist, breaking out of loop", pathString);
return;
......@@ -126,4 +141,5 @@ void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
(*fileCount)++;
}
}
closedir(directory);
}
File deleted
File deleted
......@@ -228,7 +228,7 @@ sparseRIV normalize(denseRIV input, int factor){
values[count]= round(input.values[i]*multiplier);
/* drop any 0 values */
if(values[count])count++;
if(values[count]>1)count++;
}
sparseRIV output;
output.count = count;
......
This source diff could not be displayed because it is too large. You can view the blob instead.
RIVcull: RIVcullDestructive.c
gcc -o RIVcull RIVcullDestructive.c -lm -O3
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment