Commit d78631fd by simetk

sparse push/pull and added beginnings of morphicc RIVs

parent 9c6ceacf
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
#define RIVSIZE 25000 #define RIVSIZE 25000
#endif #endif
#if RIVSIZE<0
#error "RIVSIZE must be a positive number (preferably a large positive)"
#endif
/* NONZeros macro defines the number of non-zero values that will be generated /* NONZeros macro defines the number of non-zero values that will be generated
* for any level one (barcode) RIV. 2 is simple and lightweight to begin * for any level one (barcode) RIV. 2 is simple and lightweight to begin
*/ */
...@@ -18,6 +22,11 @@ ...@@ -18,6 +22,11 @@
#define NONZEROS 2 #define NONZEROS 2
#endif #endif
#if NONZEROS%2 || NONZEROS<1
#error "NONZEROS must be an even, greater than 0 number"
#endif
/* CACHESIZE macro defines the number of RIVs the system will cache. /* CACHESIZE macro defines the number of RIVs the system will cache.
* a larger cache means more memory consumption, but will also be significantly * a larger cache means more memory consumption, but will also be significantly
* faster in aggregation and reading applications. doesn't affect systems * faster in aggregation and reading applications. doesn't affect systems
...@@ -27,6 +36,11 @@ ...@@ -27,6 +36,11 @@
#define CACHESIZE 20 #define CACHESIZE 20
#endif #endif
#if CACHESIZE<0
#error "CACHESIZE cannot be a negative number"
#endif
/* the sparseRIV is a RIV form optimized for RIVs that will be mostly 0s /* the sparseRIV is a RIV form optimized for RIVs that will be mostly 0s
* as this is often an ideal case, it is adviseable as the default * as this is often an ideal case, it is adviseable as the default
* unless we are doing long term RIV aggregation. * unless we are doing long term RIV aggregation.
...@@ -63,14 +77,11 @@ typedef struct{ ...@@ -63,14 +77,11 @@ typedef struct{
* memory blocks which the system will use in the background * memory blocks which the system will use in the background
*/ */
struct RIVData{ struct RIVData{
size_t RIVsize;
int nonZeros;
int I2SThreshold; int I2SThreshold;
int *h_tempBlock; int *h_tempBlock;
int tempSize; int tempSize;
int thing; int thing;
denseRIV* RIVCache; denseRIV RIVCache[CACHESIZE];
int cacheSize;
}static RIVKey; }static RIVKey;
/* RIVinit should be the first function called in any usage of this library /* RIVinit should be the first function called in any usage of this library
...@@ -151,7 +162,7 @@ int* addS2D(int* destination, sparseRIV input){// #TODO fix destination paramete ...@@ -151,7 +162,7 @@ int* addS2D(int* destination, sparseRIV input){// #TODO fix destination paramete
int* mapI2D(int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination int* mapI2D(int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination
int *destination = (int*)calloc(RIVKey.RIVsize,sizeof(int)); int *destination = (int*)calloc(RIVSIZE,sizeof(int));
int *locations_slider = locations; int *locations_slider = locations;
int *locations_stop = locations_slider+valueCount; int *locations_stop = locations_slider+valueCount;
...@@ -198,8 +209,8 @@ sparseRIV consolidateI2SIndirect(int *implicit, size_t valueCount){ ...@@ -198,8 +209,8 @@ sparseRIV consolidateI2SIndirect(int *implicit, size_t valueCount){
} }
sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount){ sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount){
sparseRIV sparseOut; sparseRIV sparseOut;
int *locationsTemp = RIVKey.h_tempBlock+RIVKey.RIVsize; int *locationsTemp = RIVKey.h_tempBlock+RIVSIZE;
int *valuesTemp = RIVKey.h_tempBlock+2*RIVKey.RIVsize; int *valuesTemp = RIVKey.h_tempBlock+2*RIVSIZE;
sparseOut.count = 0; sparseOut.count = 0;
int add = 1; int add = 1;
int found; int found;
...@@ -229,11 +240,11 @@ sparseRIV consolidateD2S(int *denseInput){ ...@@ -229,11 +240,11 @@ sparseRIV consolidateD2S(int *denseInput){
sparseRIV output; sparseRIV output;
output.count = 0; output.count = 0;
/* key/value pairs will be loaded to a worst-case sized temporary slot */ /* key/value pairs will be loaded to a worst-case sized temporary slot */
int* locations = RIVKey.h_tempBlock+RIVKey.RIVsize; int* locations = RIVKey.h_tempBlock+RIVSIZE;
int* values = locations+RIVKey.RIVsize; int* values = locations+RIVSIZE;
int* locations_slider = locations; int* locations_slider = locations;
int* values_slider = values; int* values_slider = values;
for(int i=0; i<RIVKey.RIVsize; i++){ for(int i=0; i<RIVSIZE; i++){
/* act only on non-zeros */ /* act only on non-zeros */
if(denseInput[i]){ if(denseInput[i]){
...@@ -266,14 +277,7 @@ sparseRIV consolidateD2S(int *denseInput){ ...@@ -266,14 +277,7 @@ sparseRIV consolidateD2S(int *denseInput){
void RIVInit(){ void RIVInit(){
RIVKey.RIVsize = RIVSIZE; //#TODO decide about macros vs global variables
RIVKey.nonZeros = NONZEROS;
RIVKey.I2SThreshold = sqrt(RIVSIZE); RIVKey.I2SThreshold = sqrt(RIVSIZE);
if(RIVKey.nonZeros%2){
printf("your NONZEROS value must be an even number");
RIVKey.nonZeros++;
printf(", changed to %d", RIVKey.nonZeros);
}
/* open a slot at least large enough for worst case handling of /* open a slot at least large enough for worst case handling of
* sparse to dense conversion. may be enlarged by filetoL2 functions */ * sparse to dense conversion. may be enlarged by filetoL2 functions */
...@@ -284,12 +288,11 @@ void RIVInit(){ ...@@ -284,12 +288,11 @@ void RIVInit(){
sigaction(11,&action,NULL); sigaction(11,&action,NULL);
//} //}
RIVKey.h_tempBlock = (int*)malloc(3*RIVKey.RIVsize*sizeof(int)); RIVKey.h_tempBlock = (int*)malloc(3*RIVSIZE*sizeof(int));
RIVKey.tempSize = 3*RIVKey.RIVsize; RIVKey.tempSize = 3*RIVSIZE;
RIVKey.thing = 0; RIVKey.thing = 0;
RIVKey.cacheSize = CACHESIZE;
/* open a slot for a cache of dense RIVs, optimized for frequent accesses */ /* open a slot for a cache of dense RIVs, optimized for frequent accesses */
RIVKey.RIVCache = (denseRIV*)calloc(RIVKey.cacheSize,sizeof(denseRIV)); memset(RIVKey.RIVCache, 0, sizeof(denseRIV)*CACHESIZE);
} }
void RIVCleanup(){ void RIVCleanup(){
...@@ -297,9 +300,6 @@ void RIVCleanup(){ ...@@ -297,9 +300,6 @@ void RIVCleanup(){
if(cacheDump()){ if(cacheDump()){
puts("cache dump failed, some lexicon data was lost"); puts("cache dump failed, some lexicon data was lost");
} }
#if CACHESIZE > 0
free(RIVKey.RIVCache);
#endif
free(RIVKey.h_tempBlock); free(RIVKey.h_tempBlock);
} }
...@@ -318,12 +318,12 @@ int wordtoSeed(unsigned char* word){ ...@@ -318,12 +318,12 @@ int wordtoSeed(unsigned char* word){
void makeSparseLocations(unsigned char* word, int *locations, size_t count){ void makeSparseLocations(unsigned char* word, int *locations, size_t count){
locations+=count; locations+=count;
srand(wordtoSeed(word)); srand(wordtoSeed(word));
int *locations_stop = locations+RIVKey.nonZeros; int *locations_stop = locations+NONZEROS;
while(locations<locations_stop){ while(locations<locations_stop){
/* unrolled for speed, guaranteed to be an even number of steps */ /* unrolled for speed, guaranteed to be an even number of steps */
*locations = rand()%RIVKey.RIVsize; *locations = rand()%RIVSIZE;
locations++; locations++;
*locations = rand()%RIVKey.RIVsize; *locations = rand()%RIVSIZE;
locations++; locations++;
} }
return; return;
...@@ -341,28 +341,39 @@ int fLexPush(denseRIV RIVout){ ...@@ -341,28 +341,39 @@ int fLexPush(denseRIV RIVout){
printf("lexicon push has failed for word: %s\nconsider cleaning inputs", pathString); printf("lexicon push has failed for word: %s\nconsider cleaning inputs", pathString);
return 1; return 1;
} }
sparseRIV temp = consolidateD2S(RIVout.values);
fwrite(RIVout.frequency, 1, 4, lexWord); fwrite(&temp.count, 1, sizeof(size_t), lexWord);
fwrite(&RIVout.magnitude, 1, 4, lexWord); fwrite(RIVout.frequency, 1, sizeof(float), lexWord);
fwrite(RIVout.values, RIVKey.RIVsize, 4, lexWord); fwrite(&RIVout.magnitude, 1, sizeof(float), lexWord);
fwrite(temp.locations, temp.count, sizeof(int), lexWord);
fwrite(temp.values, temp.count, sizeof(int), lexWord);
fclose(lexWord); fclose(lexWord);
free(RIVout.values); free(RIVout.values);
free(temp.locations);
return 0; return 0;
} }
denseRIV fLexPull(FILE* lexWord){ denseRIV fLexPull(FILE* lexWord){
denseRIV output; denseRIV output;
output.values = malloc( (RIVKey.RIVsize+1) *sizeof(int)); sparseRIV temp;
output.frequency = output.values+RIVKey.RIVsize;
output.values = calloc( (RIVSIZE+1) ,sizeof(int));
output.frequency = output.values+RIVSIZE;
int diagnostic = 0; int diagnostic = 0;
diagnostic += fread(output.frequency, 1, sizeof(int), lexWord); fread(&temp.count, 1, sizeof(size_t), lexWord);
diagnostic += fread(&(output.magnitude), 1, sizeof(int), lexWord); diagnostic += fread(&temp.frequency, 1, sizeof(int), lexWord);
diagnostic += fread(output.values, RIVKey.RIVsize, sizeof(int), lexWord); diagnostic += fread(&(temp.magnitude), 1, sizeof(int), lexWord);
temp.locations = malloc(temp.count*2*sizeof(int));
if(diagnostic != (RIVKey.RIVsize+2)){ temp.values = temp.locations+temp.count;
output.magnitude = -1; diagnostic += fread(temp.locations, temp.count, sizeof(int), lexWord);
} diagnostic += fread(temp.values, temp.count, sizeof(int), lexWord);
addS2D(output.values, temp);
*(output.frequency) = temp.frequency;
output.magnitude = temp.magnitude;
free(temp.locations);
output.cached = 0; output.cached = 0;
return output; return output;
...@@ -381,7 +392,7 @@ void signalSecure(int signum, siginfo_t *si, void* arg){ ...@@ -381,7 +392,7 @@ void signalSecure(int signum, siginfo_t *si, void* arg){
int cacheDump(){ int cacheDump(){
int flag = 0; int flag = 0;
denseRIV* cache_slider = RIVKey.RIVCache; denseRIV* cache_slider = RIVKey.RIVCache;
denseRIV* cache_stop = RIVKey.RIVCache+RIVKey.cacheSize; denseRIV* cache_stop = RIVKey.RIVCache+CACHESIZE;
while(cache_slider<cache_stop){ while(cache_slider<cache_stop){
if((*cache_slider).cached){ if((*cache_slider).cached){
flag += fLexPush(*cache_slider); flag += fLexPush(*cache_slider);
...@@ -394,9 +405,9 @@ int cacheDump(){ ...@@ -394,9 +405,9 @@ int cacheDump(){
denseRIV denseAllocate(){ denseRIV denseAllocate(){
/* allocates a 0 vector */ /* allocates a 0 vector */
denseRIV output; denseRIV output;
output.values = calloc(RIVKey.RIVsize+1, sizeof(int)); output.values = calloc(RIVSIZE+1, sizeof(int));
/* for compact memory use, frequency is placed immediately after values */ /* for compact memory use, frequency is placed immediately after values */
output.frequency = output.values+RIVKey.RIVsize; output.frequency = output.values+RIVSIZE;
output.magnitude = 0; output.magnitude = 0;
output.cached = 0; output.cached = 0;
return output; return output;
......
No preview for this file type
...@@ -27,9 +27,9 @@ ...@@ -27,9 +27,9 @@
#define CACHESIZE 20 #define CACHESIZE 20
#endif #endif
#define CACHED = 0x01 #define CACHED 0x02
#define SPARSE = 0x02 #define SPARSE 0x01
#define AVAILABLE = 0x04 #define AVAILABLE 0x04
typedef struct{ typedef struct{
char name[100]; char name[100];
...@@ -38,7 +38,9 @@ typedef struct{ ...@@ -38,7 +38,9 @@ typedef struct{
size_t count; size_t count;
unsigned int* frequency; unsigned int* frequency;
float magnitude; float magnitude;
char flags; int cached;
int boolean;
int flags;
}RIV; }RIV;
...@@ -152,7 +154,7 @@ int* addS2D(int* destination, sparseRIV input){// #TODO fix destination paramete ...@@ -152,7 +154,7 @@ int* addS2D(int* destination, sparseRIV input){// #TODO fix destination paramete
int* mapI2D(int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination int* mapI2D(int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination
int *destination = (int*)calloc(RIVKey.RIVsize,sizeof(int)); int *destination = (int*)calloc(RIVSIZE,sizeof(int));
int *locations_slider = locations; int *locations_slider = locations;
int *locations_stop = locations_slider+valueCount; int *locations_stop = locations_slider+valueCount;
...@@ -199,8 +201,8 @@ sparseRIV consolidateI2SIndirect(int *implicit, size_t valueCount){ ...@@ -199,8 +201,8 @@ sparseRIV consolidateI2SIndirect(int *implicit, size_t valueCount){
} }
sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount){ sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount){
sparseRIV sparseOut; sparseRIV sparseOut;
int *locationsTemp = RIVKey.h_tempBlock+RIVKey.RIVsize; int *locationsTemp = RIVKey.h_tempBlock+RIVSIZE;
int *valuesTemp = RIVKey.h_tempBlock+2*RIVKey.RIVsize; int *valuesTemp = RIVKey.h_tempBlock+2*RIVSIZE;
sparseOut.count = 0; sparseOut.count = 0;
int add = 1; int add = 1;
int found; int found;
...@@ -224,17 +226,18 @@ sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount){ ...@@ -224,17 +226,18 @@ sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount){
sparseOut.locations = malloc(2*sparseOut.count*sizeof(int)); sparseOut.locations = malloc(2*sparseOut.count*sizeof(int));
sparseOut.values = sparseOut.locations+sparseOut.count; sparseOut.values = sparseOut.locations+sparseOut.count;
memcpy(sparseOut.locations, locationsTemp, 2*sparseOut.count*sizeof(int)); memcpy(sparseOut.locations, locationsTemp, 2*sparseOut.count*sizeof(int));
sparseOut.flags |= SPARSE;
return sparseOut; return sparseOut;
} }
sparseRIV consolidateD2S(int *denseInput){ sparseRIV consolidateD2S(int *denseInput){
sparseRIV output; sparseRIV output;
output.count = 0; output.count = 0;
/* key/value pairs will be loaded to a worst-case sized temporary slot */ /* key/value pairs will be loaded to a worst-case sized temporary slot */
int* locations = RIVKey.h_tempBlock+RIVKey.RIVsize; int* locations = RIVKey.h_tempBlock+RIVSIZE;
int* values = locations+RIVKey.RIVsize; int* values = locations+RIVSIZE;
int* locations_slider = locations; int* locations_slider = locations;
int* values_slider = values; int* values_slider = values;
for(int i=0; i<RIVKey.RIVsize; i++){ for(int i=0; i<RIVSIZE; i++){
/* act only on non-zeros */ /* act only on non-zeros */
if(denseInput[i]){ if(denseInput[i]){
...@@ -261,20 +264,14 @@ sparseRIV consolidateD2S(int *denseInput){ ...@@ -261,20 +264,14 @@ sparseRIV consolidateD2S(int *denseInput){
/* copy values into opened slot */ /* copy values into opened slot */
memcpy(output.values, values, output.count*sizeof(int)); memcpy(output.values, values, output.count*sizeof(int));
output.flags |= SPARSE;
return output; return output;
} }
void RIVInit(){ void RIVInit(){
RIVKey.RIVsize = RIVSIZE; //#TODO decide about macros vs global variables
RIVKey.nonZeros = NONZEROS;
RIVKey.I2SThreshold = sqrt(RIVSIZE); RIVKey.I2SThreshold = sqrt(RIVSIZE);
if(RIVKey.nonZeros%2){
printf("your NONZEROS value must be an even number");
RIVKey.nonZeros++;
printf(", changed to %d", RIVKey.nonZeros);
}
/* open a slot at least large enough for worst case handling of /* open a slot at least large enough for worst case handling of
* sparse to dense conversion. may be enlarged by filetoL2 functions */ * sparse to dense conversion. may be enlarged by filetoL2 functions */
...@@ -285,8 +282,8 @@ void RIVInit(){ ...@@ -285,8 +282,8 @@ void RIVInit(){
sigaction(11,&action,NULL); sigaction(11,&action,NULL);
//} //}
RIVKey.h_tempBlock = (int*)malloc(3*RIVKey.RIVsize*sizeof(int)); RIVKey.h_tempBlock = (int*)malloc(3*RIVSIZE*sizeof(int));
RIVKey.tempSize = 3*RIVKey.RIVsize; RIVKey.tempSize = 3*RIVSIZE;
RIVKey.thing = 0; RIVKey.thing = 0;
RIVKey.cacheSize = CACHESIZE; RIVKey.cacheSize = CACHESIZE;
/* open a slot for a cache of dense RIVs, optimized for frequent accesses */ /* open a slot for a cache of dense RIVs, optimized for frequent accesses */
...@@ -319,12 +316,12 @@ int wordtoSeed(unsigned char* word){ ...@@ -319,12 +316,12 @@ int wordtoSeed(unsigned char* word){
void makeSparseLocations(unsigned char* word, int *locations, size_t count){ void makeSparseLocations(unsigned char* word, int *locations, size_t count){
locations+=count; locations+=count;
srand(wordtoSeed(word)); srand(wordtoSeed(word));
int *locations_stop = locations+RIVKey.nonZeros; int *locations_stop = locations+NONZEROS;
while(locations<locations_stop){ while(locations<locations_stop){
/* unrolled for speed, guaranteed to be an even number of steps */ /* unrolled for speed, guaranteed to be an even number of steps */
*locations = rand()%RIVKey.RIVsize; *locations = rand()%RIVSIZE;
locations++; locations++;
*locations = rand()%RIVKey.RIVsize; *locations = rand()%RIVSIZE;
locations++; locations++;
} }
return; return;
...@@ -345,7 +342,7 @@ int fLexPush(denseRIV RIVout){ ...@@ -345,7 +342,7 @@ int fLexPush(denseRIV RIVout){
fwrite(RIVout.frequency, 1, 4, lexWord); fwrite(RIVout.frequency, 1, 4, lexWord);
fwrite(&RIVout.magnitude, 1, 4, lexWord); fwrite(&RIVout.magnitude, 1, 4, lexWord);
fwrite(RIVout.values, RIVKey.RIVsize, 4, lexWord); fwrite(RIVout.values, RIVSIZE, 4, lexWord);
fclose(lexWord); fclose(lexWord);
free(RIVout.values); free(RIVout.values);
...@@ -354,17 +351,17 @@ int fLexPush(denseRIV RIVout){ ...@@ -354,17 +351,17 @@ int fLexPush(denseRIV RIVout){
denseRIV fLexPull(FILE* lexWord){ denseRIV fLexPull(FILE* lexWord){
denseRIV output; denseRIV output;
output.values = malloc( (RIVKey.RIVsize+1) *sizeof(int)); output.values = malloc( (RIVSIZE+1) *sizeof(int));
output.frequency = output.values+RIVKey.RIVsize; output.frequency = (unsigned int*)(output.values+RIVSIZE);
int diagnostic = 0; int diagnostic = 0;
diagnostic += fread(output.frequency, 1, sizeof(int), lexWord); diagnostic += fread(output.frequency, 1, sizeof(int), lexWord);
diagnostic += fread(&(output.magnitude), 1, sizeof(int), lexWord); diagnostic += fread(&(output.magnitude), 1, sizeof(int), lexWord);
diagnostic += fread(output.values, RIVKey.RIVsize, sizeof(int), lexWord); diagnostic += fread(output.values, RIVSIZE, sizeof(int), lexWord);
if(diagnostic != (RIVKey.RIVsize+2)){ if(diagnostic != (RIVSIZE+2)){
output.magnitude = -1; output.magnitude = -1;
} }
output.flags = 0; output.cached = 0;
return output; return output;
} }
...@@ -395,9 +392,9 @@ int cacheDump(){ ...@@ -395,9 +392,9 @@ int cacheDump(){
denseRIV denseAllocate(){ denseRIV denseAllocate(){
/* allocates a 0 vector */ /* allocates a 0 vector */
denseRIV output; denseRIV output;
output.values = calloc(RIVKey.RIVsize+1, sizeof(int)); output.values = calloc(RIVSIZE+1, sizeof(int));
/* for compact memory use, frequency is placed immediately after values */ /* for compact memory use, frequency is placed immediately after values */
output.frequency = output.values+RIVKey.RIVsize; output.frequency = (unsigned int*)(output.values+RIVSIZE);
output.magnitude = 0; output.magnitude = 0;
output.cached = 0; output.cached = 0;
return output; return output;
......
No preview for this file type
No preview for this file type
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
RIVKey.thing++; \ RIVKey.thing++; \
}\ }\
}while(0) }while(0)
#include "RIVtoolsCPUlinux.h" #include "RIVtoolsMorphic.h"
void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount); void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
...@@ -48,13 +48,13 @@ int main(int argc, char *argv[]){ ...@@ -48,13 +48,13 @@ int main(int argc, char *argv[]){
float minmag; float minmag;
float maxmag; float maxmag;
denseRIV baseDense; denseRIV baseDense;
baseDense.values = malloc(RIVKey.RIVsize*sizeof(int)); baseDense.values = malloc(RIVSIZE*sizeof(int));
fileRIVs_slider = fileRIVs; fileRIVs_slider = fileRIVs;
sparseRIV* comparators_slider; sparseRIV* comparators_slider;
while(fileRIVs_slider<fileRIVs_stop){ while(fileRIVs_slider<fileRIVs_stop){
comparators_slider = fileRIVs; comparators_slider = fileRIVs;
memset(baseDense.values, 0, RIVKey.RIVsize*sizeof(int)); memset(baseDense.values, 0, RIVSIZE*sizeof(int));
baseDense.values = addS2D(baseDense.values, *fileRIVs_slider); baseDense.values = addS2D(baseDense.values, *fileRIVs_slider);
baseDense.magnitude = (*fileRIVs_slider).magnitude; baseDense.magnitude = (*fileRIVs_slider).magnitude;
minmag = baseDense.magnitude*.85; minmag = baseDense.magnitude*.85;
......
No preview for this file type
No preview for this file type
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <time.h> #include <time.h>
#define CACHESIZE 10000 #define CACHESIZE 100000
#include "RIVtoolsCPUlinux.h" #include "RIVtoolsCPUlinux.h"
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
...@@ -84,7 +84,7 @@ void directoryGrind(char *rootString){ ...@@ -84,7 +84,7 @@ void directoryGrind(char *rootString){
} }
strcpy(pathString, rootString); strcpy(pathString, rootString);
strcat(pathString, files->d_name); strcat(pathString, files->d_name);
//printf("%s\n", pathString); printf("%s\n", pathString);
FILE *input = fopen(pathString, "r+"); FILE *input = fopen(pathString, "r+");
if(input){ if(input){
fileGrind(input); fileGrind(input);
......
File added
...@@ -52,23 +52,23 @@ sparseRIV text2L2(char *text){ ...@@ -52,23 +52,23 @@ sparseRIV text2L2(char *text){
break; break;
} }
blockSize = locationCount+RIVKey.nonZeros; blockSize = locationCount+NONZEROS;
/* if this word would overflow the locations block, grow it */ /* if this word would overflow the locations block, grow it */
if(blockSize>RIVKey.tempSize){ if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int)); RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock; locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros; RIVKey.tempSize+=NONZEROS;
} }
/* add word's L1 RIV to the accumulating implicit RIV */ /* add word's L1 RIV to the accumulating implicit RIV */
makeSparseLocations((unsigned char*)word, locations, locationCount); makeSparseLocations((unsigned char*)word, locations, locationCount);
locationCount+= RIVKey.nonZeros; locationCount+= NONZEROS;
} }
sparseRIV output = consolidateI2S(locations, locationCount); sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */ /* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros; output.frequency = locationCount/NONZEROS;
output.boolean = 1; output.boolean = 1;
return output; return output;
} }
...@@ -92,24 +92,24 @@ sparseRIV fileToL2(FILE *data){ ...@@ -92,24 +92,24 @@ sparseRIV fileToL2(FILE *data){
break; break;
} }
blockSize = locationCount+RIVKey.nonZeros; blockSize = locationCount+NONZEROS;
/* if this word would overflow the locations block, grow it */ /* if this word would overflow the locations block, grow it */
if(blockSize>RIVKey.tempSize){ if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int)); RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock; locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros; RIVKey.tempSize+=NONZEROS;
} }
/* add word's L1 RIV to the accumulating implicit RIV */ /* add word's L1 RIV to the accumulating implicit RIV */
makeSparseLocations(word, locations, locationCount); makeSparseLocations(word, locations, locationCount);
locationCount+= RIVKey.nonZeros; locationCount+= NONZEROS;
} }
sparseRIV output = consolidateI2S(locations, locationCount); sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */ /* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros; output.frequency = locationCount/NONZEROS;
output.boolean = 1; output.boolean = 1;
return output; return output;
...@@ -137,22 +137,22 @@ sparseRIV fileToL2Clean(FILE *data){ ...@@ -137,22 +137,22 @@ sparseRIV fileToL2Clean(FILE *data){
if(!isWordClean((char*)word)){ if(!isWordClean((char*)word)){
continue; continue;
} }
blockSize = locationCount+RIVKey.nonZeros; blockSize = locationCount+NONZEROS;
if(blockSize>RIVKey.tempSize){ if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*)realloc(RIVKey.h_tempBlock, blockSize*sizeof(int)); RIVKey.h_tempBlock = (int*)realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock; locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros; RIVKey.tempSize+=NONZEROS;
} }
makeSparseLocations(word, locations, locationCount); makeSparseLocations(word, locations, locationCount);
locationCount+= RIVKey.nonZeros; locationCount+= NONZEROS;
} }
sparseRIV output = consolidateI2S(locations, locationCount); sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */ /* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros; output.frequency = locationCount/NONZEROS;
output.boolean = 1; output.boolean = 1;
return output; return output;
} }
...@@ -169,10 +169,10 @@ void aggregateWord2D(denseRIV destination, char* word){ ...@@ -169,10 +169,10 @@ void aggregateWord2D(denseRIV destination, char* word){
//makeSparseLocations((unsigned char*)word, locationSlot, 0); //makeSparseLocations((unsigned char*)word, locationSlot, 0);
srand(wordtoSeed((unsigned char*)word)); srand(wordtoSeed((unsigned char*)word));
for(int i=0; i<RIVKey.nonZeros; i++){ for(int i=0; i<NONZEROS; i++){
destination.values[(rand()%RIVKey.RIVsize)] +=1; destination.values[(rand()%RIVSIZE)] +=1;
destination.values[(rand()%RIVKey.RIVsize)] -= 1; destination.values[(rand()%RIVSIZE)] -= 1;
} }
} }
...@@ -214,7 +214,7 @@ denseRIV lexPull(char* word){ ...@@ -214,7 +214,7 @@ denseRIV lexPull(char* word){
/* if there is a cache, first check if the word is cached */ /* if there is a cache, first check if the word is cached */
srand(wordtoSeed((unsigned char*)word)); srand(wordtoSeed((unsigned char*)word));
int hash = rand()%RIVKey.cacheSize; int hash = rand()%CACHESIZE;
if(!strcmp(word, RIVKey.RIVCache[hash].name)){ if(!strcmp(word, RIVKey.RIVCache[hash].name)){
/* if word is cached, pull from cache and exit */ /* if word is cached, pull from cache and exit */
...@@ -259,7 +259,7 @@ int lexPush(denseRIV RIVout){ ...@@ -259,7 +259,7 @@ int lexPush(denseRIV RIVout){
} }
srand(wordtoSeed((unsigned char*)RIVout.name)); srand(wordtoSeed((unsigned char*)RIVout.name));
int hash = rand()%RIVKey.cacheSize; int hash = rand()%CACHESIZE;
if(!RIVKey.RIVCache[hash].cached){ if(!RIVKey.RIVCache[hash].cached){
RIVKey.RIVCache[hash] = RIVout; RIVKey.RIVCache[hash] = RIVout;
......
No preview for this file type
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <math.h> #include <math.h>
#include "RIVLower.h" #include "RIVLowerMorphic.h"
#include "RIVaccessories.h" #include "RIVaccessories.h"
/* lexPush writes a denseRIV to a file for permanent storage */ /* lexPush writes a denseRIV to a file for permanent storage */
...@@ -39,7 +39,7 @@ sparseRIV text2L2(char *text){ ...@@ -39,7 +39,7 @@ sparseRIV text2L2(char *text){
/* locations (implicit RIV) are temp stored in temp block, and moved /* locations (implicit RIV) are temp stored in temp block, and moved
* to permanent home in consolidation */ * to permanent home in consolidation */
int *locations = RIVKey.h_tempBlock; int *locations = RIVKey.h_tempBlock;
int locationCount = 0; unsigned int locationCount = 0;
int displacement; int displacement;
while(sscanf(text, "%99s%n", word, &displacement)){ while(sscanf(text, "%99s%n", word, &displacement)){
...@@ -52,23 +52,23 @@ sparseRIV text2L2(char *text){ ...@@ -52,23 +52,23 @@ sparseRIV text2L2(char *text){
break; break;
} }
blockSize = locationCount+RIVKey.nonZeros; blockSize = locationCount+NONZEROS;
/* if this word would overflow the locations block, grow it */ /* if this word would overflow the locations block, grow it */
if(blockSize>RIVKey.tempSize){ if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int)); RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock; locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros; RIVKey.tempSize+=NONZEROS;
} }
/* add word's L1 RIV to the accumulating implicit RIV */ /* add word's L1 RIV to the accumulating implicit RIV */
makeSparseLocations((unsigned char*)word, locations, locationCount); makeSparseLocations((unsigned char*)word, locations, locationCount);
locationCount+= RIVKey.nonZeros; locationCount+= NONZEROS;
} }
sparseRIV output = consolidateI2S(locations, locationCount); sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */ /* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros; *(output.frequency) = locationCount/NONZEROS;
output.boolean = 1; output.boolean = 1;
return output; return output;
} }
...@@ -92,26 +92,25 @@ sparseRIV fileToL2(FILE *data){ ...@@ -92,26 +92,25 @@ sparseRIV fileToL2(FILE *data){
break; break;
} }
blockSize = locationCount+RIVKey.nonZeros; blockSize = locationCount+NONZEROS;
/* if this word would overflow the locations block, grow it */ /* if this word would overflow the locations block, grow it */
if(blockSize>RIVKey.tempSize){ if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int)); RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock; locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros; RIVKey.tempSize+=NONZEROS;
} }
/* add word's L1 RIV to the accumulating implicit RIV */ /* add word's L1 RIV to the accumulating implicit RIV */
makeSparseLocations(word, locations, locationCount); makeSparseLocations(word, locations, locationCount);
locationCount+= RIVKey.nonZeros; locationCount+= NONZEROS;
} }
sparseRIV output = consolidateI2S(locations, locationCount); sparseRIV output = consolidateI2S(locations, locationCount);
output.frequency = malloc(1*sizeof(int));
/* frequency records the number of words in this file */ /* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros; *(output.frequency) = locationCount/NONZEROS;
output.boolean = 1; output.boolean = 1;
return output; return output;
} }
...@@ -137,22 +136,22 @@ sparseRIV fileToL2Clean(FILE *data){ ...@@ -137,22 +136,22 @@ sparseRIV fileToL2Clean(FILE *data){
if(!isWordClean((char*)word)){ if(!isWordClean((char*)word)){
continue; continue;
} }
blockSize = locationCount+RIVKey.nonZeros; blockSize = locationCount+NONZEROS;
if(blockSize>RIVKey.tempSize){ if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*)realloc(RIVKey.h_tempBlock, blockSize*sizeof(int)); RIVKey.h_tempBlock = (int*)realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock; locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=RIVKey.nonZeros; RIVKey.tempSize+=NONZEROS;
} }
makeSparseLocations(word, locations, locationCount); makeSparseLocations(word, locations, locationCount);
locationCount+= RIVKey.nonZeros; locationCount+= NONZEROS;
} }
sparseRIV output = consolidateI2S(locations, locationCount); sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */ /* frequency records the number of words in this file */
output.frequency = locationCount/RIVKey.nonZeros; *(output.frequency) = locationCount/NONZEROS;
output.boolean = 1; output.boolean = 1;
return output; return output;
} }
...@@ -169,10 +168,10 @@ void aggregateWord2D(denseRIV destination, char* word){ ...@@ -169,10 +168,10 @@ void aggregateWord2D(denseRIV destination, char* word){
//makeSparseLocations((unsigned char*)word, locationSlot, 0); //makeSparseLocations((unsigned char*)word, locationSlot, 0);
srand(wordtoSeed((unsigned char*)word)); srand(wordtoSeed((unsigned char*)word));
for(int i=0; i<RIVKey.nonZeros; i++){ for(int i=0; i<NONZEROS; i++){
destination.values[(rand()%RIVKey.RIVsize)] +=1; destination.values[(rand()%RIVSIZE)] +=1;
destination.values[(rand()%RIVKey.RIVsize)] -= 1; destination.values[(rand()%RIVSIZE)] -= 1;
} }
} }
...@@ -195,10 +194,17 @@ float cosCompare(denseRIV baseRIV, sparseRIV comparator){ ...@@ -195,10 +194,17 @@ float cosCompare(denseRIV baseRIV, sparseRIV comparator){
return cosine; return cosine;
} }
float getMagnitudeSparse(sparseRIV input){ float getMagnitudeSparse(RIV input){
size_t count;
if(input.flags & SPARSE){
count = input.count;
}else{
count = RIVSIZE;
}
unsigned long long int temp = 0; unsigned long long int temp = 0;
int *values = input.values; int *values = input.values;
int *values_stop = values+input.count; int *values_stop = values+count;
while(values<values_stop){ while(values<values_stop){
temp += (*values)*(*values); temp += (*values)*(*values);
values++; values++;
...@@ -309,7 +315,7 @@ sparseRIV fileToL2direct(FILE *data){; ...@@ -309,7 +315,7 @@ sparseRIV fileToL2direct(FILE *data){;
sparseRIV output = consolidateD2S(denseTemp.values); sparseRIV output = consolidateD2S(denseTemp.values);
// frequency records the number of words in this file // frequency records the number of words in this file
output.frequency = count; *(output.frequency) = count;
output.boolean = 1; output.boolean = 1;
return output; return output;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment