Commit f258c284 by simetk

begining GPU integration

parent f2876a83
...@@ -145,5 +145,316 @@ denseRIV denseAllocate(); ...@@ -145,5 +145,316 @@ denseRIV denseAllocate();
void signalSecure(int signum, siginfo_t *si, void* arg); void signalSecure(int signum, siginfo_t *si, void* arg);
/* begin definitions */ /* begin definitions */
int* addS2D(int* destination, sparseRIV input){// #TODO fix destination parameter vs calloc of destination
int *locations_slider = input.locations;
int *values_slider = input.values;
int *locations_stop = locations_slider+input.count;
/* apply values at an index based on locations */
while(locations_slider<locations_stop){
destination[*locations_slider] += *values_slider;
locations_slider++;
values_slider++;
}
return destination;
}
int* mapI2D(int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination
int *destination = (int*)calloc(RIVSIZE,sizeof(int));
int *locations_slider = locations;
int *locations_stop = locations_slider+valueCount;
/*apply values +1 or -1 at an index based on locations */
while(locations_slider<locations_stop){
destination[*locations_slider] +=1;
locations_slider++;
destination[*locations_slider] -= 1;
locations_slider++;
}
return destination;
}
int* addI2D(int* destination, int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination
int *locations_slider = locations;
int *locations_stop = locations_slider+valueCount;
/*apply values +1 or -1 at an index based on locations */
while(locations_slider<locations_stop){
destination[*locations_slider] +=1;
locations_slider++;
destination[*locations_slider] -= 1;
locations_slider++;
}
return destination;
}
sparseRIV consolidateI2SIndirect(int *implicit, size_t valueCount){
int *denseTemp = mapI2D(implicit, valueCount);
sparseRIV sparseOut = consolidateD2S(denseTemp);
free(denseTemp);
return sparseOut;
}
sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount){
sparseRIV sparseOut;
int *locationsTemp = RIVKey.h_tempBlock+RIVSIZE;
int *valuesTemp = RIVKey.h_tempBlock+2*RIVSIZE;
sparseOut.count = 0;
int add = 1;
int found;
for(int i=0; i<valueCount; i++){
found = 0;
for(int j=0; j<sparseOut.count; j++){
if(implicit[i] == locationsTemp[j]){
valuesTemp[i] += add;
add *= -1;
found = 1;
}
}
if(!found){
locationsTemp[sparseOut.count] = implicit[i];
valuesTemp[sparseOut.count] = add;
sparseOut.count++;
add*= -1;
}
}
sparseOut.locations = (int*)malloc(2*sparseOut.count*sizeof(int));
sparseOut.values = sparseOut.locations+sparseOut.count;
memcpy(sparseOut.locations, locationsTemp, sparseOut.count*sizeof(int));
memcpy(sparseOut.values, valuesTemp, sparseOut.count*sizeof(int));
return sparseOut;
}
sparseRIV consolidateD2S(int *denseInput){
sparseRIV output;
output.count = 0;
/* key/value pairs will be loaded to a worst-case sized temporary slot */
int* locations = RIVKey.h_tempBlock+RIVSIZE;
int* values = locations+RIVSIZE;
int* locations_slider = locations;
int* values_slider = values;
for(int i=0; i<RIVSIZE; i++){
/* act only on non-zeros */
if(denseInput[i]){
/* assign index to locations */
*(locations_slider++) = i;
/* assign value to values */
*(values_slider++) = denseInput[i];
/* track size of forming sparseRIV */
output.count++;
}
}
/* a slot is opened for the locations/values pair */
output.locations = (int*) malloc(output.count*2*sizeof(int));
if(!output.locations){
printf("memory allocation failed"); //*TODO enable fail point knowledge
}
/* copy locations values into opened slot */
memcpy(output.locations, locations, output.count*sizeof(int));
output.values = output.locations + output.count;
/* copy values into opened slot */
memcpy(output.values, values, output.count*sizeof(int));
return output;
}
void RIVInit(){
RIVKey.I2SThreshold = sqrt(RIVSIZE);
/* open a slot at least large enough for worst case handling of
* sparse to dense conversion. may be enlarged by filetoL2 functions */
struct sigaction action;
action.sa_sigaction = signalSecure;
action.sa_flags = SA_SIGINFO;
//for(int i=1; i<27; i++){
sigaction(11,&action,NULL);
//}
RIVKey.h_tempBlock = (int*)malloc(3*RIVSIZE*sizeof(int));
RIVKey.tempSize = 3*RIVSIZE;
RIVKey.thing = 0;
/* open a slot for a cache of dense RIVs, optimized for frequent accesses */
memset(RIVKey.RIVCache, 0, sizeof(denseRIV)*CACHESIZE);
}
void RIVCleanup(){
if(cacheDump()){
puts("cache dump failed, some lexicon data was lost");
}
free(RIVKey.h_tempBlock);
}
int wordtoSeed(unsigned char* word){
int i=0;
int seed = 0;
while(*word){
/* left-shift 5 each time *should* make seeds unique to words
* this means letters are taken as characters couned in base 32, which
* should be large enough to hold all english characters plus a few outliers
* */
seed += (*(word))<<(i*5);
word++;
i++;
}
return seed;
}
void makeSparseLocations(unsigned char* word, int *locations, size_t count){
locations+=count;
srand(wordtoSeed(word));
int *locations_stop = locations+NONZEROS;
while(locations<locations_stop){
/* unrolled for speed, guaranteed to be an even number of steps */
*locations = rand()%RIVSIZE;
locations++;
*locations = rand()%RIVSIZE;
locations++;
}
return;
}
int fLexPush(denseRIV RIVout){
char pathString[200] = {0};
/* word data will be placed in a (new?) file under the lexicon directory
* in a file named after the word itself */
sprintf(pathString, "lexicon/%s", RIVout.name);
FILE *lexWord = fopen(pathString, "wb");
if(!lexWord){
printf("lexicon push has failed for word: %s\nconsider cleaning inputs", pathString);
return 1;
}
sparseRIV temp = consolidateD2S(RIVout.values);
if(temp.count<(RIVSIZE/2)){
/* smaller stored as sparse vector */
fwrite(&temp.count, 1, sizeof(size_t), lexWord);
fwrite(RIVout.frequency, 1, sizeof(int), lexWord);
fwrite(&RIVout.magnitude, 1, sizeof(float), lexWord);
fwrite(temp.locations, temp.count, sizeof(int), lexWord);
fwrite(temp.values, temp.count, sizeof(int), lexWord);
}else{
/* saturation is too high, better to store dense */
/* there's gotta be a better way to do this */
temp.count = 0;
fwrite(&temp.count, 1, sizeof(int), lexWord);
fwrite(RIVout.frequency, 1, sizeof(int), lexWord);
fwrite(&RIVout.magnitude, 1, sizeof(float), lexWord);
fwrite(RIVout.values, RIVSIZE, sizeof(int), lexWord);
}
fclose(lexWord);
free(RIVout.values);
free(temp.locations);
return 0;
}
denseRIV fLexPull(FILE* lexWord){
denseRIV output = denseAllocate();
int typeCheck;
/* get metadata for vector */
fread(&typeCheck, 1, sizeof(unsigned int), lexWord);
fread(output.frequency, 1, sizeof(int), lexWord);
fread(&(output.magnitude), 1, sizeof(int), lexWord);
/* first value stored is the value count if sparse, and 0 if dense */
if (typeCheck){
/* pull as sparseVector */
sparseRIV temp;
/* value was not 0, so it's the value count */
temp.count = typeCheck;
temp.locations = (int*)malloc(temp.count*2*sizeof(int));
temp.values = temp.locations+temp.count;
fread(temp.locations, temp.count, sizeof(int), lexWord);
fread(temp.values, temp.count, sizeof(int), lexWord);
addS2D(output.values, temp);
free(temp.locations);
}else{
/* typecheck is thrown away, just a flag in this case */
fread(output.values, RIVSIZE, sizeof(int), lexWord);
}
output.cached = 0;
return output;
}
void signalSecure(int signum, siginfo_t *si, void* arg){
if(cacheDump()){
puts("cache dump failed, some lexicon data lost");
}else{
puts("cache dumped successfully");
}
signal(signum, SIG_DFL);
kill(getpid(), signum);
}
int cacheDump(){
int i=0;
int j=0;
int flag = 0;
denseRIV* cache_slider = RIVKey.RIVCache;
denseRIV* cache_stop = RIVKey.RIVCache+CACHESIZE;
while(cache_slider<cache_stop){
if((*cache_slider).cached){
j++;
flag += fLexPush(*cache_slider);
}
else{
i++;
}
cache_slider++;
}
printf("%d cacheslots unused\n%d, cacheslots used", i, j);
return flag;
}
denseRIV denseAllocate(){
/* allocates a 0 vector */
denseRIV output;
output.values = (int*)calloc(RIVSIZE+1, sizeof(int));
/* for compact memory use, frequency is placed immediately after values */
output.frequency = output.values+RIVSIZE;
output.magnitude = 0;
output.cached = 0;
return output;
}
/*TODO add a simplified free function*/
#endif #endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
#include <math.h>
/* RIVSIZE macro defines the dimensionality off the RIVs we will use
* 25000 is the standard, but can be redefined specifically
*/
#ifndef RIVSIZE
#define RIVSIZE 25000
#endif
/* NONZeros macro defines the number of non-zero values that will be generated
* for any level one (barcode) RIV. 2 is simple and lightweight to begin
*/
#ifndef NONZEROS
#define NONZEROS 2
#endif
/* CACHESIZE macro defines the number of RIVs the system will cache.
* a larger cache means more memory consumption, but will also be significantly
* faster in aggregation and reading applications. doesn't affect systems
* that do not use lexpull/push
*/
#ifndef CACHESIZE
#define CACHESIZE 20
#endif
#define CACHED 0x02
#define SPARSE 0x01
#define AVAILABLE 0x04
typedef struct{
char name[100];
int *values;
int *locations;
size_t count;
unsigned int* frequency;
float magnitude;
int cached;
int boolean;
int flags;
}RIV;
/* the sparseRIV is a RIV form optimized for RIVs that will be mostly 0s
* as this is often an ideal case, it is adviseable as the default
* unless we are doing long term RIV aggregation.
* specifically, a sparseRIV contains a pair of arrays,
* containing locations and values, where pairs are found in like array
* indices.
*/
typedef RIV sparseRIV;
/* the denseRIV is a RIV form optimized for overwhelmingly non-0 vectors
* this is rarely the case, but its primary use is for performing vector
* math, as comparisons and arithmetic between vectors are ideally
* performed between sparse and dense (hetero-arithmetic)
*/
typedef RIV denseRIV;
/*RIVKey, holds globally important data that should not be changed partway through
* first function call in the program should always be:
* RIVinit();
* this will set these variables, check for incompatible choices, and open up
* memory blocks which the system will use in the background
*/
struct RIVData{
size_t RIVsize;
int nonZeros;
int I2SThreshold;
int *h_tempBlock;
int tempSize;
int thing;
denseRIV* RIVCache;
int cacheSize;
}static RIVKey;
/* RIVinit should be the first function called in any usage of this library
* it sets global variables that practically all functions will reference,
* it checks that your base parameters are valid, and allocates memory for
* the functions to use, so that we can move fast with rare allocations.
*/
void RIVInit();
/* RIVCleanup should always be called to close a RIV program. it frees
* blocks allocated by RIVinit, and dumps the cached data to appropriate lexicon files
*/
void RIVCleanup();
/*consolidateD2S takes a denseRIV value-set input, and returns a sparse RIV with
* all 0s removed. it does not automatically carry metadata, which must be assigned
* to a denseRIV after the fact. often denseRIVs are only temporary, and don't
* need to carry metadata
*/
sparseRIV consolidateD2S(int *denseInput); //#TODO fix int*/denseRIV confusion
/* mapS2D expands a sparseRIV out to denseRIV values, filling array locations
* based on location-value pairs
*/
/* makeSparseLocations must be called repeatedly in the processing of a
* file to produce a series of locations from the words of the file
* this produces an "implicit" RIV which can be used with the mapI2D function
* to create a denseRIV.
*/
void makesparseLocations(unsigned char* word, int *seeds, size_t seedCount);
/* fLexPush pushes the data contained in a denseRIV out to a lexicon file,
* saving it for long-term aggregation. function is called by "lexpush",
* which is what users should actually use. lexPush, unlike fLexPush,
* has cache logic under the hood for speed and harddrive optimization
*/
int fLexPush(denseRIV RIVout);
denseRIV fLexPull(FILE* lexWord);
/* creates a standard seed from the characters in a word, hopefully unique */
int wordtoSeed(unsigned char* word);
/* mapI2D maps an "implicit RIV" that is, an array of index values,
* arranged by chronological order of generation (as per makesparseLocations)
* it assigns, in the process of mapping, values according to ordering
*/
int* mapI2D(int *locations, size_t seedCount);
sparseRIV consolidateI2SIndirect(int *implicit, size_t valueCount);
sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount);
int cacheDump();
int* addI2D(int* destination, int* locations, size_t seedCount);
denseRIV denseAllocate();
void signalSecure(int signum, siginfo_t *si, void* arg);
/* begin definitions */
int* addS2D(int* destination, sparseRIV input){// #TODO fix destination parameter vs calloc of destination
int *locations_slider = input.locations;
int *values_slider = input.values;
int *locations_stop = locations_slider+input.count;
/* apply values at an index based on locations */
while(locations_slider<locations_stop){
destination[*locations_slider] += *values_slider;
locations_slider++;
values_slider++;
}
return destination;
}
int* mapI2D(int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination
int *destination = (int*)calloc(RIVSIZE,sizeof(int));
int *locations_slider = locations;
int *locations_stop = locations_slider+valueCount;
/*apply values +1 or -1 at an index based on locations */
while(locations_slider<locations_stop){
destination[*locations_slider] +=1;
locations_slider++;
destination[*locations_slider] -= 1;
locations_slider++;
}
return destination;
}
int* addI2D(int* destination, int *locations, size_t valueCount){// #TODO fix destination parameter vs calloc of destination
int *locations_slider = locations;
int *locations_stop = locations_slider+valueCount;
/*apply values +1 or -1 at an index based on locations */
while(locations_slider<locations_stop){
destination[*locations_slider] +=1;
locations_slider++;
destination[*locations_slider] -= 1;
locations_slider++;
}
return destination;
}
sparseRIV consolidateI2SIndirect(int *implicit, size_t valueCount){
int *denseTemp = mapI2D(implicit, valueCount);
sparseRIV sparseOut = consolidateD2S(denseTemp);
/* sparseOut is flagged as sparse in consolidate step */
free(denseTemp);
return sparseOut;
}
sparseRIV consolidateI2SDirect(int *implicit, size_t valueCount){
sparseRIV sparseOut;
int *locationsTemp = RIVKey.h_tempBlock+RIVSIZE;
int *valuesTemp = RIVKey.h_tempBlock+2*RIVSIZE;
sparseOut.count = 0;
int add = 1;
int found;
for(int i=0; i<valueCount; i++){
found = 0;
for(int j=0; j<sparseOut.count; j++){
if(implicit[i] == locationsTemp[j]){
valuesTemp[i] += add;
add *= -1;
found = 1;
}
}
if(!found){
locationsTemp[sparseOut.count] = implicit[i];
valuesTemp[sparseOut.count] = add;
sparseOut.count++;
add*= -1;
}
}
sparseOut.locations = malloc(2*sparseOut.count*sizeof(int));
sparseOut.values = sparseOut.locations+sparseOut.count;
memcpy(sparseOut.locations, locationsTemp, 2*sparseOut.count*sizeof(int));
sparseOut.flags |= SPARSE;
return sparseOut;
}
sparseRIV consolidateD2S(int *denseInput){
sparseRIV output;
output.count = 0;
/* key/value pairs will be loaded to a worst-case sized temporary slot */
int* locations = RIVKey.h_tempBlock+RIVSIZE;
int* values = locations+RIVSIZE;
int* locations_slider = locations;
int* values_slider = values;
for(int i=0; i<RIVSIZE; i++){
/* act only on non-zeros */
if(denseInput[i]){
/* assign index to locations */
*(locations_slider++) = i;
/* assign value to values */
*(values_slider++) = denseInput[i];
/* track size of forming sparseRIV */
output.count++;
}
}
/* a slot is opened for the locations/values pair */
output.locations = (int*) malloc(output.count*2*sizeof(int));
if(!output.locations){
printf("memory allocation failed"); //*TODO enable fail point knowledge
}
/* copy locations values into opened slot */
memcpy(output.locations, locations, output.count*sizeof(int));
output.values = output.locations + output.count;
/* copy values into opened slot */
memcpy(output.values, values, output.count*sizeof(int));
output.flags |= SPARSE;
return output;
}
void RIVInit(){
RIVKey.I2SThreshold = sqrt(RIVSIZE);
/* open a slot at least large enough for worst case handling of
* sparse to dense conversion. may be enlarged by filetoL2 functions */
struct sigaction action;
action.sa_sigaction = signalSecure;
action.sa_flags = SA_SIGINFO;
//for(int i=1; i<27; i++){
sigaction(11,&action,NULL);
//}
RIVKey.h_tempBlock = (int*)malloc(3*RIVSIZE*sizeof(int));
RIVKey.tempSize = 3*RIVSIZE;
RIVKey.thing = 0;
RIVKey.cacheSize = CACHESIZE;
/* open a slot for a cache of dense RIVs, optimized for frequent accesses */
RIVKey.RIVCache = (denseRIV*)calloc(RIVKey.cacheSize,sizeof(denseRIV));
}
void RIVCleanup(){
if(cacheDump()){
puts("cache dump failed, some lexicon data was lost");
}
#if CACHESIZE > 0
free(RIVKey.RIVCache);
#endif
free(RIVKey.h_tempBlock);
}
int wordtoSeed(unsigned char* word){
int i=0;
int seed = 0;
while(*word){
/* left-shift 5 each time *should* make seeds unique to words */
seed += (*(word))<<(i*5);
word++;
i++;
}
return seed;
}
void makeSparseLocations(unsigned char* word, int *locations, size_t count){
locations+=count;
srand(wordtoSeed(word));
int *locations_stop = locations+NONZEROS;
while(locations<locations_stop){
/* unrolled for speed, guaranteed to be an even number of steps */
*locations = rand()%RIVSIZE;
locations++;
*locations = rand()%RIVSIZE;
locations++;
}
return;
}
int fLexPush(denseRIV RIVout){
char pathString[200] = {0};
/* word data will be placed in a (new?) file under the lexicon directory
* in a file named after the word itself */
sprintf(pathString, "lexicon/%s", RIVout.name);
FILE *lexWord = fopen(pathString, "wb");
if(!lexWord){
printf("lexicon push has failed for word: %s\nconsider cleaning inputs", pathString);
return 1;
}
fwrite(RIVout.frequency, 1, 4, lexWord);
fwrite(&RIVout.magnitude, 1, 4, lexWord);
fwrite(RIVout.values, RIVSIZE, 4, lexWord);
fclose(lexWord);
free(RIVout.values);
return 0;
}
denseRIV fLexPull(FILE* lexWord){
denseRIV output;
output.values = malloc( (RIVSIZE+1) *sizeof(int));
output.frequency = (unsigned int*)(output.values+RIVSIZE);
int diagnostic = 0;
diagnostic += fread(output.frequency, 1, sizeof(int), lexWord);
diagnostic += fread(&(output.magnitude), 1, sizeof(int), lexWord);
diagnostic += fread(output.values, RIVSIZE, sizeof(int), lexWord);
if(diagnostic != (RIVSIZE+2)){
output.magnitude = -1;
}
output.cached = 0;
output.flags &= ~SPARSE;
return output;
}
void signalSecure(int signum, siginfo_t *si, void* arg){
if(cacheDump()){
puts("cache dump failed, some lexicon data lost");
}else{
puts("cache dumped successfully");
}
signal(signum, SIG_DFL);
kill(getpid(), signum);
}
int cacheDump(){
int flag = 0;
denseRIV* cache_slider = RIVKey.RIVCache;
denseRIV* cache_stop = RIVKey.RIVCache+RIVKey.cacheSize;
while(cache_slider<cache_stop){
if((*cache_slider).cached){
flag += fLexPush(*cache_slider);
}
cache_slider++;
}
return flag;
}
denseRIV denseAllocate(){
/* allocates a 0 vector */
denseRIV output;
output.values = calloc(RIVSIZE+1, sizeof(int));
/* for compact memory use, frequency is placed immediately after values */
output.frequency = (unsigned int*)(output.values+RIVSIZE);
output.magnitude = 0;
output.cached = 0;
output.flags &= ~SPARSE;
return output;
}
/*TODO add a simplified free function*/
...@@ -7,4 +7,24 @@ int isWordClean(char* word); ...@@ -7,4 +7,24 @@ int isWordClean(char* word);
/* used by wordClean */ /* used by wordClean */
int isLetter(char c); int isLetter(char c);
int isLetter(char c){
if((c>96 && c<123)||(c == 32) || (c == '_')) return 1;
else return 0;
}
int isWordClean(char* word){
char *letter = word;
char *word_stop = word+99;
while(letter<word_stop){
if(!(*letter)) break;
if(!(isLetter(*letter))){
return 0;
}
letter++;
}
return 1;
}
#endif #endif
File deleted
File added
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <time.h>
#define RIVSIZE 25000
#define CACHESIZE 0
#define NONZEROS 2
#define THRESHOLD 0.70
#include "RIVtools.h"
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__))
static void HandleError(cudaError_t err, const char *file, int line){
if(err !=cudaSuccess)
{
printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line);
exit(EXIT_FAILURE);
}
}
__global__ void d_mapS2D(int *d_denseSlot, int *d_sparseSlot, int count){
int id = blockIdx.x*blockDim.x + threadIdx.x;
if(!id<count) return;
int *target = d_sparseSlot+id;
d_denseSlot[*target] = *(target+count);
}
__global__ void cosines(int* d_denseBase, int* d_sparseBlock, int* output, int RIVcount){
int id =blockIdx.x*blockDim.x + threadIdx.x;
if(id>=RIVcount) return;
int count = *(d_sparseBlock+RIVSIZE*id);
int *locations = &count+1;
int *values = locations+count;
int dot = 0;
output+=id;
while(count--){
dot+= values[count]*d_denseBase[locations[count]];
}
*output = dot;
}
void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount);
float** cosineMatrix(sparseRIV* RIVs, int RIVcount);
int main(int argc, char *argv[]){
clock_t begintotal = clock();
int fileCount = 0;
RIVInit();
sparseRIV *fileRIVs = (sparseRIV*) malloc(1*sizeof(sparseRIV));
char rootString[2000];
if(argc <2){
printf("give me a directory");
return 1;
}
strcpy(rootString, argv[1]);
strcat(rootString, "/");
directoryToL2s(rootString, &fileRIVs, &fileCount);
printf("fileCount: %d\n", fileCount);
sparseRIV* fileRIVs_slider = fileRIVs;
sparseRIV* fileRIVs_stop = fileRIVs+fileCount;
while(fileRIVs_slider <fileRIVs_stop){
(*fileRIVs_slider).magnitude = getMagnitudeSparse(*fileRIVs_slider);
fileRIVs_slider++;
}
clock_t beginnsquared = clock();
float cosine;
float minmag;
float maxmag;
denseRIV baseDense;
baseDense.values = (int*)malloc(RIVSIZE*sizeof(int));
fileRIVs_slider = fileRIVs;
sparseRIV* comparators_slider;
int count = 0;
cosineMatrix(fileRIVs, fileCount);
clock_t endnsquared = clock();
double time = (double)(endnsquared - beginnsquared) / CLOCKS_PER_SEC;
printf("\nnsquared time:%lf\n\n", time);
printf("\ncosines: %d \n", count);
printf("\nsims: %d \n", RIVKey.thing);
clock_t endtotal = clock();
double time_spent = (double)(endtotal - begintotal) / CLOCKS_PER_SEC;
printf("total time:%lf\n\n", time_spent);
free(fileRIVs);
return 0;
}
void directoryToL2s(char *rootString, sparseRIV** fileRIVs, int *fileCount){
char pathString[2000];
DIR *directory;
struct dirent *files = 0;
if(!(directory = opendir(rootString))){
printf("location not found, %s\n", rootString);
return;
}
while((files=readdir(directory))){
if(*(files->d_name) == '.') continue;
if(files->d_type == DT_DIR){
strcpy(pathString, rootString);
strcat(pathString, files->d_name);
strcat(pathString, "/");
directoryToL2s(pathString, fileRIVs, fileCount);
}
strcpy(pathString, rootString);
strcat(pathString, files->d_name);
FILE *input = fopen(pathString, "r");
if(!input){
printf("file %s doesn't seem to exist, breaking out of loop", pathString);
return;
}else{
(*fileRIVs) = (sparseRIV*)realloc((*fileRIVs), ((*fileCount)+1)*sizeof(sparseRIV));
(*fileRIVs)[(*fileCount)] = fileToL2(input);
strcpy((*fileRIVs)[(*fileCount)].name, pathString);
fclose(input);
(*fileCount)++;
}
}
}
float** cosineMatrix(sparseRIV* RIVs, int RIVcount){
int *d_massiveBlock;
cudaMalloc((void**)&d_massiveBlock, 100000000*sizeof(int));
int *d_outputSlot = d_massiveBlock;
int *d_denseSlot = d_outputSlot+(RIVcount*RIVcount/2);
int *d_sparseSection =d_denseSlot+RIVSIZE;
int *d_sparse_slider = d_sparseSection;
for(int i=0; i<RIVcount; i++){
HANDLE_ERROR (cudaMemcpy (d_sparse_slider++, &RIVs[i].count, sizeof(int), cudaMemcpyHostToDevice));
HANDLE_ERROR (cudaMemcpy (d_sparse_slider, RIVs[i].locations, RIVs[i].count*2*sizeof(int), cudaMemcpyHostToDevice));
d_sparse_slider+=RIVs[i].count*2;
}
}
File deleted
File deleted
...@@ -47,4 +47,289 @@ sparseRIV consolidateI2S(int *implicit, size_t valueCount); ...@@ -47,4 +47,289 @@ sparseRIV consolidateI2S(int *implicit, size_t valueCount);
sparseRIV text2L2(char *text); sparseRIV text2L2(char *text);
float getMagnitudeSparse(sparseRIV input); float getMagnitudeSparse(sparseRIV input);
sparseRIV text2L2(char *text){
unsigned int blockSize;
char word[100] = {0};
/* locations (implicit RIV) are temp stored in temp block, and moved
* to permanent home in consolidation */
int *locations = RIVKey.h_tempBlock;
int locationCount = 0;
int displacement;
while(sscanf(text, "%99s%n", word, &displacement)){
text += displacement+1;
if(!displacement){
break;
}
if(!(*word)){
break;
}
blockSize = locationCount+NONZEROS;
/* if this word would overflow the locations block, grow it */
if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=NONZEROS;
}
/* add word's L1 RIV to the accumulating implicit RIV */
makeSparseLocations((unsigned char*)word, locations, locationCount);
locationCount+= NONZEROS;
}
sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file, untill frequency
* is needed to hold some more useful data point */
output.frequency = locationCount/NONZEROS;
output.boolean = 1;
return output;
}
sparseRIV fileToL2(FILE *data){
unsigned int blockSize;
unsigned char word[100] = {0};
/* locations (implicit RIV) are temp stored in temp block, and moved
* to permanent home in consolidation */
int *locations = RIVKey.h_tempBlock;
int locationCount = 0;
while(fscanf(data, "%99s", word)){
if(feof(data)){
break;
}
if(!(*word)){
break;
}
blockSize = locationCount+NONZEROS;
/* if this word would overflow the locations block, grow it */
if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*) realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=NONZEROS;
}
/* add word's L1 RIV to the accumulating implicit RIV */
makeSparseLocations(word, locations, locationCount);
locationCount+= NONZEROS;
}
sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */
output.frequency = locationCount/NONZEROS;
output.boolean = 1;
return output;
}
sparseRIV fileToL2Clean(FILE *data){
unsigned char word[100] = {0};
int *locations = RIVKey.h_tempBlock;
unsigned int blockSize;
int locationCount = 0;
while(fscanf(data, "%99s", word)){
if(feof(data)){
break;
}
if(!(*word)){
break;
}
/* if the word is not clean, skip it */
if(!isWordClean((char*)word)){
continue;
}
blockSize = locationCount+NONZEROS;
if(blockSize>RIVKey.tempSize){
RIVKey.h_tempBlock = (int*)realloc(RIVKey.h_tempBlock, blockSize*sizeof(int));
locations = RIVKey.h_tempBlock;
RIVKey.tempSize+=NONZEROS;
}
makeSparseLocations(word, locations, locationCount);
locationCount+= NONZEROS;
}
sparseRIV output = consolidateI2S(locations, locationCount);
/* frequency records the number of words in this file */
output.frequency = locationCount/NONZEROS;
output.boolean = 1;
return output;
}
sparseRIV consolidateI2S(int *implicit, size_t valueCount){
if(valueCount<RIVKey.I2SThreshold){
/*direct method is faster on small datasets, but has geometric scaling on large datasets */
return consolidateI2SDirect(implicit, valueCount);
}else{
/* optimized for large datasets */
return consolidateI2SIndirect(implicit, valueCount);
}
}
void aggregateWord2D(denseRIV destination, char* word){
srand(wordtoSeed((unsigned char*)word));
for(int i=0; i<NONZEROS; i++){
destination.values[(rand()%RIVSIZE)] +=1;
destination.values[(rand()%RIVSIZE)] -= 1;
}
}
float cosCompare(denseRIV baseRIV, sparseRIV comparator){
int dot = 0;
int n = comparator.count;
while(n){
n--;
/* we calculate the dot-product to derive the cosine
* comparing sparse to dense by index*/
//dot += values[i]*baseRIV.values[locations[i]];
dot += comparator.values[n] * baseRIV.values[comparator.locations[n]];
//printf("%d, %d, %d\n",baseRIV.values[comparator.locations[n]],comparator.values[n] , n);
}
/*dot divided by product of magnitudes */
float cosine = dot/(baseRIV.magnitude*comparator.magnitude);
return cosine;
}
float getMagnitudeSparse(sparseRIV input){
unsigned long long int temp = 0;
int *values = input.values;
int *values_stop = values+input.count;
while(values<values_stop){
temp += (*values)*(*values);
values++;
}
input.magnitude = sqrt(temp);
return input.magnitude;
}
denseRIV lexPull(char* word){
#if CACHESIZE > 0
/* if there is a cache, first check if the word is cached */
srand(wordtoSeed((unsigned char*)word));
int hash = rand()%CACHESIZE;
if(!strcmp(word, RIVKey.RIVCache[hash].name)){
/* if word is cached, pull from cache and exit */
return RIVKey.RIVCache[hash];
}
#endif /* CACHESIZE > 0 */
/* if not, attempt to pull the word data from lexicon file */
denseRIV output;
char pathString[200];
sprintf(pathString, "lexicon/%s", word);
FILE *lexWord = fopen(pathString, "rb");
/* if this lexicon file already exists */
if(lexWord){
/* pull data from file */
output = fLexPull(lexWord);
fclose(lexWord);
}else{
/*if file does not exist, return a 0 vector (word is new to the lexicon */ //#TODO enable NO-NEW features to protect mature lexicons?
output = denseAllocate();
}
strcpy(output.name, word);
return output;
}
int lexPush(denseRIV RIVout){
#if CACHESIZE == 0
/* if there is no cache, simply push to file */
fLexPush(RIVout);
return 0;
#else /* CACHESIZE != 0 */
/* if our RIV was cached, there are two options (hopefully)
* either the RIV is still cached, and the data has been updated
* to the cache or the RIV was pushed out from under it,
* in which case it has already been pushed! move on*/
if(RIVout.cached){
return 0;
}
srand(wordtoSeed((unsigned char*)RIVout.name));
int hash = rand()%CACHESIZE;
if(!RIVKey.RIVCache[hash].cached){
/* if there is no word in this cache slot, push to cache instead of file */
RIVKey.RIVCache[hash] = RIVout;
RIVKey.RIVCache[hash].cached = 1;
return 0;
/*if the current RIV is more frequent than the RIV holding its slot */
}else if(*(RIVout.frequency) > *(RIVKey.RIVCache[hash].frequency) ){
/* push the current cache entry to a file */
int diag = fLexPush(RIVKey.RIVCache[hash]);
/* push the current RIV to cache */
RIVKey.RIVCache[hash] = RIVout;
RIVKey.RIVCache[hash].cached = 1;
return diag;
}else{
/* push current RIV to file */
fLexPush(RIVout);
}
return 0;
#endif /* CACHESIZE == 0 */
}
sparseRIV fileToL2direct(FILE *data){;
unsigned char word[100] = {0};
denseRIV denseTemp;
// a temporary dense RIV is stored in the tempBlock
denseTemp.values = RIVKey.h_tempBlock;
memset(RIVKey.h_tempBlock, 0, RIVSIZE*sizeof(int));
int count = 0;
while(fscanf(data, "%99s", word)){
count++;
if(feof(data)){
break;
}
if(!(*word)){
break;
}
// add word's L1 RIV to the accumulating implicit RIV
aggregateWord2D(denseTemp, (char*)word);
}
sparseRIV output = consolidateD2S(denseTemp.values);
// frequency records the number of words in this file
output.frequency = count;
output.boolean = 1;
return output;
}
#endif #endif
#include <stdio.h>
#include <stdlib.h>
#include <strsafe.h>
#define SEEDMASK 25214903917
struct RIVData{
int RIVsize;
int nonZeros;
long long int *masks;
int *h_tempBlock;
int *h_stagingBlock;
int *h_staging_slider;
int *h_staging_stop;
int *h_displacements;
int *d_OpenSlot;
int *d_SlotEnd;
float *d_magnitudes;
int thing;
}RIVKeyData;
typedef struct{
char name[100];
int *values;
int *locations;
int count;
int frequency;
float magnitude;
int boolean;
}sparseRIV;
sparseRIV FileToL2(FILE *data);
void consolidateD2S(sparseRIV *destination, int *denseInput);
void setKeyData(int RIVsize, int nonZeros, int blockSize);
int* mapS2D(int * destination, sparseRIV input);
int* makeSparseLocations(int *seeds, int seedCount);
void makeSeeds(unsigned char* word, int **seeds, int *seedCount);
float* cosineCompare(sparseRIV baseRIV, sparseRIV *multipliers, int multiplierCount, float threshold);
void getMagnitudes(sparseRIV *inputs, int RIVCount);
int *mapI2D(int *locations, int seedCount);
sparseRIV text2L2(unsigned char *text);
unsigned char *sscanAdvance(unsigned char **string, unsigned char *word);
sparseRIV FileToL2(FILE *data){
unsigned char *word = (unsigned char*)calloc(2000, 1);
int *seeds = RIVKeyData.h_tempBlock;
int seedCount = 0;
while(fscanf(data, "%s", word)){
if(feof(data)){
break;
}
if(!(*word)){
break;
}
makeSeeds(word, &seeds, &seedCount);
memset(word, 0, 2000);
}
int *locations = makeSparseLocations(seeds, seedCount);
//printf("mcshittles");
int *L2dense;
L2dense = mapI2D(locations, seedCount);
sparseRIV output;
//printf("tits");
consolidateD2S( &output, L2dense);
free(L2dense);
output.boolean = 1;
RIVKeyData.thing++;
return output;
}
float* cosineCompare(sparseRIV baseRIV, sparseRIV *multipliers, int multiplierCount, float threshold){
int *baseDenseRIV = RIVKeyData.h_tempBlock;
mapS2D(baseDenseRIV, baseRIV);
float *outputs = (float*)malloc((multiplierCount)* sizeof(float));
float *output_slider = outputs;
sparseRIV *multipliersStop = multipliers+multiplierCount;
float minsize = baseRIV.magnitude * .75;
float maxsize = baseRIV.magnitude * 1.25;
while(multipliers<multipliersStop){
if(((*multipliers).boolean) /*&& (((*multipliers).magnitude < maxsize) && ((*multipliers).magnitude > minsize))*/){
int dot = 0;
int *values = (*multipliers).values;
int *locations = (*multipliers).locations;
int *locations_Stop = locations+(*multipliers).count;
while(locations<locations_Stop){
dot += (*values)*(*(baseDenseRIV+(*locations)));
locations++;
values++;
}
*output_slider= dot/((baseRIV.magnitude)*((*multipliers).magnitude));
if(*output_slider>=threshold){
printf("%s\t%s\n%f\n", (*multipliers).name, baseRIV.name, *output_slider);
(*multipliers).boolean = 0;
//RIVKeyData.thing ++;
}
}
multipliers++;
output_slider++;
}
return outputs;
}
void getMagnitudes(sparseRIV *inputs, int RIVCount){
for(int i=0; i<RIVCount; i++){
int temp = 0;
int *values = inputs[i].values;
int *values_stop = values+inputs[i].count;
while(values<values_stop){
temp += (*values)*(*values);
values++;
}
float magnitude = sqrt(temp);
inputs[i].magnitude = magnitude;
//printf("magnitude = %f, \n", magnitude);
}
}
int* mapS2D(int* destination, sparseRIV input){
memset(destination, 0, RIVKeyData.RIVsize*sizeof(int));
int *locations_slider = input.locations;
int *values_slider = input.values;
int *locations_stop = locations_slider+input.count;
while(locations_slider<locations_stop){
destination[*locations_slider] = *values_slider;
locations_slider++;
values_slider++;
}
//HANDLE_ERROR (cudaMemcpy (RIVKeyData.d_OpenSlot, destination, RIVKeyData.RIVsize*sizeof(int), cudaMemcpyHostToDevice));
return destination;
}
int* mapI2D(int *locations, int valueCount){
int *destination = (int*)calloc(RIVKeyData.RIVsize,sizeof(int));
int *locations_slider = locations;
int *locations_stop = locations_slider+valueCount;
int value = 1;
while(locations_slider<locations_stop){
destination[*locations_slider] +=value;
locations_slider++;
value = (value == 1)? -1: 1;
}
return destination;
}
void consolidateD2S(sparseRIV *destination, int *denseInput){
int count = 0;
(*destination).locations = (int*) malloc(RIVKeyData.RIVsize*sizeof(int));
(*destination).values = (int*) malloc(RIVKeyData.RIVsize*sizeof(int));
for(int i=0; i<RIVKeyData.RIVsize; i++){
if(denseInput[i]){
(*destination).locations[count] = i;
(*destination).values[count] = denseInput[i];
count++;
}
}
destination->count = count;
(*destination).locations = (int*) realloc((*destination).locations, (*destination).count*sizeof(int));
(*destination).values = (int*) realloc((*destination).values, (*destination).count*sizeof(int));
}
void setKeyData(int RIVsize, int nonZeros, int blockSize){
RIVKeyData.RIVsize = RIVsize;
if(nonZeros%2){
printf("your nonZeros must be an even number");
nonZeros++;
printf(", changed to %d", nonZeros);
}
RIVKeyData.nonZeros = nonZeros;
RIVKeyData.masks = (long long int*)malloc(nonZeros*sizeof(long long int));
for(int i = 0; i<nonZeros; i++){
RIVKeyData.masks[i] = SEEDMASK>>(5*i);
}
RIVKeyData.h_tempBlock = (int*)malloc(blockSize*sizeof(int));
//RIVKeyData.h_stagingBlock = (int*)malloc(blockSize*sizeof(int));
//RIVKeyData.h_staging_slider = RIVKeyData.h_stagingBlock;
RIVKeyData.thing = 0;
}
void makeSeeds(unsigned char* word, int **seeds, int *seedCount){
int i=0;
int seedbase = 0;
while(*word){
seedbase += (*(word))<<(i*5);
word++;
i++;
}
int *seedTrack = (*seeds)+*seedCount;
for(i =0 ; i<RIVKeyData.nonZeros; i++){
*seedTrack = (seedbase>>i)+(3*i);
seedTrack++;
}
*seedCount+=RIVKeyData.nonZeros;
return;
}
int* makeSparseLocations(int* seeds, int seedCount){
int *locations = RIVKeyData.h_tempBlock;
int *locations_slider = locations;
int *seeds_stop = seeds+seedCount;
long long int *mask = RIVKeyData.masks;
long long int *mask_stop = mask+RIVKeyData.nonZeros;
while(seeds<seeds_stop){
*locations_slider =(((*seeds)^(*mask)) & 2147483647) %(RIVKeyData.RIVsize);
mask++;
locations_slider++;
seeds++;
if(!(mask<mask_stop)) mask-=RIVKeyData.nonZeros;
}
return locations;
}
unsigned char *sscanAdvance(unsigned char **string, unsigned char *word){
unsigned char *word_slider = word;
while(*(*string)){
if(*(*string) == ' ') {
(*string)++;
break;
}
*word_slider = *(*string);
word_slider++;
(*string)++;
}
*word_slider = 0;
return word;
}
sparseRIV text2L2(unsigned char *text){
unsigned char *word = (unsigned char*)calloc(2000, 1);
int *seeds = ( int*)malloc(RIVKeyData.nonZeros*sizeof( int));
unsigned char *text_slider = text;
int seedCount = 0;
while(*text_slider){
sscanAdvance(&text_slider, word);
makeSeeds(word, &seeds, &seedCount);
memset(word, 0, 2000);
}
int *locations = makeSparseLocations(seeds, seedCount);
int *L2dense;
L2dense = mapI2D(locations, seedCount);
free(locations);
sparseRIV output;
consolidateD2S(&output, L2dense);
free(seeds);
return output;
}
#include <stdio.h>
#include <stdlib.h>
#include <strsafe.h>
#define SEEDMASK 25214903917
#define HANDLE_ERROR(err) (HandleError(err, __FILE__, __LINE__))
static void HandleError(cudaError_t err, const char *file, int line){
if(err !=cudaSuccess)
{
printf("%s in %s at line %d\n", cudaGetErrorString(err), file, line);
exit(EXIT_FAILURE);
}
}
__global__ void squirt(float *d_magnitudes, int N){
int id =(blockIdx.x*blockDim.x + threadIdx.x);
if(id>=N) return;
d_magnitudes[id] = sqrt(d_magnitudes[id]);
}
__global__ void generateLocations(int *d_seeds, long long int mask, int *d_locations, int RIVsize, int team, int seedCount, int nonZeros){
int id =nonZeros*(blockIdx.x*blockDim.x + threadIdx.x)+team;
if(id>=seedCount) return;
d_locations[id] = ((d_seeds[id]^mask) & 2147483647) %(RIVsize);
}
__global__ void D2S( int* d_DenseRIV, int* d_SparseValues, int* d_SparseLocations, int *d_NZCount, int d_DenseSize){
int id =(blockIdx.x*blockDim.x + threadIdx.x);
if(id>=d_DenseSize) return;
int value = *(d_DenseRIV+id);
if(!value) return;
int sparseSlot = atomicAdd(d_NZCount, 1);
*(d_SparseValues+sparseSlot) = value;
*(d_SparseLocations+sparseSlot) = id;
}
__global__ void S2D(int *d_locations, int *d_values, int *d_OpenSlot, int numberOfValues){
int id = blockIdx.x*blockDim.x + threadIdx.x;
if(id>=numberOfValues) return ;
atomicAdd( d_OpenSlot + *(d_locations+id) , *(d_values+id));
}
__global__ void I2D(int *d_locations, int *d_OpenSlot, int numberOfValues){
int id = blockIdx.x*blockDim.x + threadIdx.x;
//bitshift
int value = (id%2) ? -1: 1;
if(id>=numberOfValues) return ;
atomicAdd( d_OpenSlot + *(d_locations+id) , value);
}
void consolidateD2SStaged(sparseRIV *destination, int *denseInput);
void consolidateD2S_d(sparseRIV *destination, int *denseInput);
void setKeyData_d(int RIVsize, int nonZeros, int blockSize);
int* mapS2D_d(int * destination, sparseRIV input);
float *getMagnitudes_d(sparseRIV *inputs, int RIVCount);
int *mapI2D_d(int *locations, int seedCount);
int* makeSparseLocations_d(int* seeds, int seedCount);
float *getMagnitudes_d(sparseRIV *inputs, int RIVCount){
float *magnitudes;
HANDLE_ERROR (cudaMallocHost((float**)&magnitudes,RIVCount*sizeof(float)));
float *magnitudes_slider = magnitudes;
for(int i=0; i<RIVCount; i++){
int temp = 0;
int *values = inputs[i].values;
int *values_stop = values+inputs[i].count;
while(values<values_stop){
temp += (*values)*(*values);
values++;
}
*magnitudes_slider = temp;
magnitudes_slider++;
}
HANDLE_ERROR (cudaMalloc((void**)&RIVKeyData.d_magnitudes, RIVCount*sizeof(float)));
HANDLE_ERROR (cudaMemcpy (RIVKeyData.d_magnitudes, magnitudes, RIVCount*sizeof(float), cudaMemcpyHostToDevice));
int blockSize;
int minGridSize = 0;
int gridSize;
cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, squirt);
gridSize = ((RIVCount + blockSize -1) / blockSize)+1;
squirt<<<gridSize,blockSize >>> (RIVKeyData.d_magnitudes, RIVCount);
HANDLE_ERROR (cudaMemcpy (magnitudes, RIVKeyData.d_magnitudes, RIVCount*sizeof(float), cudaMemcpyDeviceToHost));
magnitudes_slider = magnitudes;
for(int i=0; i<RIVCount; i++){
inputs[i].magnitude = *magnitudes_slider;
magnitudes_slider++;
}
return magnitudes;
}
int *mapS2D_d(int* destination, sparseRIV input){
int *d_locations = RIVKeyData.d_OpenSlot+RIVKeyData.RIVsize;
int *d_values = d_locations+input.count;
HANDLE_ERROR (cudaMemset (RIVKeyData.d_OpenSlot, 0, RIVKeyData.RIVsize*sizeof(int)));
HANDLE_ERROR (cudaMemcpy (d_locations, input.locations, input.count*sizeof(int), cudaMemcpyHostToDevice));
HANDLE_ERROR (cudaMemcpy (d_values, input.values, input.count*sizeof(int), cudaMemcpyHostToDevice));
int blockSize;
int minGridSize = 0;
int gridSize;
cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, S2D);
gridSize = ((input.count + blockSize -1) / blockSize)+1;
S2D <<<gridSize,blockSize>>> (d_locations, d_values, RIVKeyData.d_OpenSlot, input.count);
HANDLE_ERROR (cudaMemcpy (destination, RIVKeyData.d_OpenSlot, RIVKeyData.RIVsize*sizeof(int), cudaMemcpyDeviceToHost));
return destination;
}
int* mapI2D_d(int *locations, int valueCount){
int *d_locations = RIVKeyData.d_OpenSlot+RIVKeyData.RIVsize;
HANDLE_ERROR (cudaMemset (RIVKeyData.d_OpenSlot, 0, RIVKeyData.RIVsize*sizeof(int)));
HANDLE_ERROR (cudaMemcpy (d_locations, locations, valueCount*sizeof(int), cudaMemcpyHostToDevice));
int blockSize;
int minGridSize = 0;
int gridSize;
cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, I2D);
gridSize = ((valueCount + blockSize -1) / blockSize)+1;
I2D <<<gridSize,blockSize>>> (d_locations, RIVKeyData.d_OpenSlot, valueCount);
int* valuesOut = RIVKeyData.h_tempBlock;
HANDLE_ERROR (cudaMemcpy (valuesOut, RIVKeyData.d_OpenSlot, RIVKeyData.RIVsize*sizeof(int), cudaMemcpyDeviceToHost));
return valuesOut;
}
void consolidateD2SStaged(sparseRIV *destination, int *denseInput){
int count = 0;
int *locations = RIVKeyData.h_tempBlock;
int *values = RIVKeyData.h_tempBlock + RIVKeyData.RIVsize;
for(int i=0; i<RIVKeyData.RIVsize; i++){
if(denseInput[i]){
locations[count] = i;
values[count] = denseInput[i];
count++;
}
}
int *locations_slider = locations+count;
while(locations_slider>=locations){
RIVKeyData.h_staging_slider--;
locations_slider--;
*RIVKeyData.h_staging_slider = *locations_slider;
}
(*destination).locations = RIVKeyData.h_staging_slider;
int *values_slider = values+count;
while(values_slider>=values){
RIVKeyData.h_staging_slider--;
values_slider--;
*RIVKeyData.h_staging_slider = *values_slider;
}
(*destination).values = RIVKeyData.h_staging_slider;
RIVKeyData.h_staging_slider--;
*RIVKeyData.h_staging_slider = count;
*RIVKeyData.h_displacements = RIVKeyData.h_staging_slider -RIVKeyData.h_stagingBlock;
RIVKeyData.h_displacements++;
}
void consolidateD2S_d(sparseRIV *destination, int *denseInput){
int *d_valueCount;
HANDLE_ERROR (cudaMalloc((void**)&d_valueCount, sizeof(int)));
HANDLE_ERROR(cudaMemset(d_valueCount, 0, sizeof(int)));
HANDLE_ERROR (cudaMemcpy (RIVKeyData.d_OpenSlot, denseInput, RIVKeyData.RIVsize*sizeof(int), cudaMemcpyHostToDevice));
int *d_outValues = RIVKeyData.d_OpenSlot+RIVKeyData.RIVsize;
int *d_outLocations = d_outValues+RIVKeyData.RIVsize;
int blockSize;
int minGridSize = 0;
int gridSize;
cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, D2S);
gridSize = ((RIVKeyData.RIVsize + blockSize -1) / blockSize)+1;
D2S <<<gridSize,blockSize>>> (RIVKeyData.d_OpenSlot, d_outValues, d_outLocations, d_valueCount, RIVKeyData.RIVsize);
cudaDeviceSynchronize();
HANDLE_ERROR (cudaMemcpy (&(*destination).count, d_valueCount, sizeof(int), cudaMemcpyDeviceToHost));
(*destination).locations = RIVKeyData.h_staging_slider;
RIVKeyData.h_staging_slider+=(*destination).count;
(*destination).values = RIVKeyData.h_staging_slider;
RIVKeyData.h_staging_slider+=(*destination).count;
HANDLE_ERROR (cudaMemcpy ((*destination).values, d_outValues, ((*destination).count)*sizeof(int), cudaMemcpyDeviceToHost));
HANDLE_ERROR (cudaMemcpy ((*destination).locations, d_outLocations, ((*destination).count)*sizeof(int), cudaMemcpyDeviceToHost));
cudaFree(d_valueCount);
}
void setKeyData_d(int RIVsize, int nonZeros, int blockSize){
RIVKeyData.RIVsize = RIVsize;
if(nonZeros%2){
printf("your nonZeros must be an even number");
nonZeros++;
printf(", changed to %d", nonZeros);
}
RIVKeyData.nonZeros = nonZeros;
RIVKeyData.masks = (long long int*)malloc(nonZeros*sizeof(long long int));
for(int i = 0; i<nonZeros; i++){
RIVKeyData.masks[i] = SEEDMASK>>(5*i);
}
HANDLE_ERROR (cudaMallocHost((void**)&RIVKeyData.h_tempBlock, blockSize*sizeof(int)));
HANDLE_ERROR (cudaMallocHost((void**)&RIVKeyData.h_stagingBlock, blockSize*sizeof(int)));
RIVKeyData.h_staging_stop = RIVKeyData.h_stagingBlock + blockSize;
RIVKeyData.h_staging_slider = RIVKeyData.h_staging_stop;
RIVKeyData.h_displacements = RIVKeyData.h_stagingBlock;
HANDLE_ERROR (cudaMalloc((void**)&RIVKeyData.d_OpenSlot, blockSize*sizeof(int)));
RIVKeyData.d_SlotEnd = RIVKeyData.d_OpenSlot+blockSize;
RIVKeyData.thing = 0;
}
int* makeSparseLocations_d(int* seeds, int seedCount){
int *d_locations = RIVKeyData.d_OpenSlot;
int *d_seeds = d_locations+seedCount;
HANDLE_ERROR (cudaMemcpy(d_seeds, seeds, seedCount*sizeof(int), cudaMemcpyHostToDevice));
int blockSize;
int minGridSize = 0;
int gridSize;
cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, generateLocations);
gridSize = ((seedCount + blockSize -1) / (RIVKeyData.nonZeros*blockSize))+1;
long long int *mask = RIVKeyData.masks;
for(int team=0; team<RIVKeyData.nonZeros; team++){
generateLocations <<<gridSize,blockSize,team>>> (d_seeds, *mask, d_locations, RIVKeyData.RIVsize, team, seedCount, RIVKeyData.nonZeros);
mask++;
}
cudaDeviceSynchronize();
int *locations = RIVKeyData.h_tempBlock;
HANDLE_ERROR (cudaMemcpy(locations, d_locations, seedCount*sizeof(int), cudaMemcpyDeviceToHost));
return locations;
}
void addS2DsBlocked(int *denseBlock, sparseRIV additive, int RIVCount){
int *d_locations= RIVKeyData.d_OpenSlot+RIVCount*RIVKeyData.RIVsize;
int *d_values = d_locations+additive.count;
HANDLE_ERROR (cudaMemcpy (d_locations, additive.locations, additive.count*sizeof(int), cudaMemcpyHostToDevice));
HANDLE_ERROR (cudaMemcpy (d_values, additive.values, additive.count*sizeof(int), cudaMemcpyHostToDevice));
int blockSize;
int minGridSize = 0;
int gridSize;
cudaOccupancyMaxPotentialBlockSize( &minGridSize, &blockSize, S2Ds);
gridSize = ((additive.count + blockSize -1) / blockSize)+1;
S2Ds<<<additive.count,1>>>(RIVKeyData.d_OpenSlot, d_locations, d_values, additive.count, RIVCount, RIVKeyData.RIVsize);
HANDLE_ERROR (cudaMemcpy (denseBlock, RIVKeyData.d_OpenSlot, RIVCount*RIVKeyData.RIVsize*sizeof(int), cudaMemcpyDeviceToHost));
}
File deleted
makefile : RIVcullGPU.cu RIVtools.c RIVLower.c RIVaccessories.c
nvcc RIVcullGPU.cu -o RIVcullG RIVtools.o RIVLower.o RIVaccessories.o -lm -O3
fileCount: 1000
../backup/0/doj_civ_fraud_enron4_0039ca34-8a9d-4327-ba3a-dffeef8297f8.txt ../backup/0/doj_civ_fraud_enron4_0030b5f1-4923-4ac5-8070-d0d459a55887.txt
0.763783
../backup/0/doj_civ_fraud_enron4_0068bc6a-ed41-47d8-afdb-3c98ca2c8a7e.txt ../backup/0/doj_civ_fraud_enron4_00682721-2d07-4a71-874e-d4f7d550fb68.txt
1.000000
../backup/0/doj_civ_fraud_enron4_0069a141-76bb-4f47-bbaa-1af8211bc783.txt ../backup/0/doj_civ_fraud_enron4_000fc924-6a76-4073-ae68-03feb4dfea7f.txt
1.000000
../backup/0/doj_civ_fraud_enron4_0071ed60-8e18-4dd8-8a14-69efa97991e6.txt ../backup/0/doj_civ_fraud_enron4_00286f4a-ffe5-4865-b6c8-22e8e6470a17.txt
0.765256
../backup/0/doj_civ_fraud_enron4_0076a278-13dd-4570-bd1b-a8667ff5a1e5.txt ../backup/0/doj_civ_fraud_enron4_001a9f7f-901d-4239-a6ce-045a75cacbad.txt
0.723166
../backup/0/doj_civ_fraud_enron4_007705c9-8a68-433b-b190-b596e0af5a44.txt ../backup/0/doj_civ_fraud_enron4_00564538-9215-4be1-9e5e-9c48c4891692.txt
0.711199
../backup/0/doj_civ_fraud_enron4_007e7a46-6d1f-4809-8bcd-8616a14a5ed2.txt ../backup/0/doj_civ_fraud_enron4_001ba443-8aa1-4901-99fa-769c921b7c97.txt
1.000000
../backup/0/doj_civ_fraud_enron4_0083815e-7e27-4e27-952a-beabf0eb2870.txt ../backup/0/doj_civ_fraud_enron4_007d7b1b-20f6-46e0-b68a-f808c72a68ee.txt
0.999466
../backup/0/doj_civ_fraud_enron4_00a6e533-5b54-4899-9247-04cd51eb7b06.txt ../backup/0/doj_civ_fraud_enron4_00537b5e-e8d2-4e7d-865c-d778fdf2388c.txt
0.742052
../backup/0/doj_civ_fraud_enron4_00ad70b2-0b8f-4f26-ad9b-294e9f80240f.txt ../backup/0/doj_civ_fraud_enron4_0071ed60-8e18-4dd8-8a14-69efa97991e6.txt
0.821186
../backup/0/doj_civ_fraud_enron4_00b2b937-21c5-452f-9540-f3d27e2caa91.txt ../backup/0/doj_civ_fraud_enron4_00aac34b-b402-4fb8-9f87-7e7ae7deff7d.txt
0.709276
../backup/0/doj_civ_fraud_enron4_00b3f2e0-cc84-4b30-8adb-15cee6374b80.txt ../backup/0/doj_civ_fraud_enron4_00713878-1dec-4366-b9ec-01effd4d4dab.txt
0.703661
../backup/0/doj_civ_fraud_enron4_00bfe59d-cb76-447b-b537-6f8beaecc4c3.txt ../backup/0/doj_civ_fraud_enron4_0049e8fe-5195-4331-9a2b-278ee42678a0.txt
0.936382
../backup/0/doj_civ_fraud_enron4_00c574de-834d-4f72-87b6-08fdbc608357.txt ../backup/0/doj_civ_fraud_enron4_000f0eea-05ef-490c-b52e-6030e5e9dc72.txt
1.000000
../backup/0/doj_civ_fraud_enron4_00c5f62a-92c7-4292-aa24-6c400328903a.txt ../backup/0/doj_civ_fraud_enron4_00ad19db-267d-495b-b45f-21090d0ef147.txt
0.962357
../backup/0/doj_civ_fraud_enron4_00ce86f8-a931-4be3-ac2d-cc69edfd781a.txt ../backup/0/doj_civ_fraud_enron4_003d83a6-923c-48f1-ac72-b74367fd120d.txt
0.714172
../backup/0/doj_civ_fraud_enron4_00ce86f8-a931-4be3-ac2d-cc69edfd781a.txt ../backup/0/doj_civ_fraud_enron4_0045c52f-e8d2-430b-a2fd-42876223b9f0.txt
0.723129
../backup/0/doj_civ_fraud_enron4_00db9dff-0ba0-4e70-93de-a751e5e34cd7.txt ../backup/0/doj_civ_fraud_enron4_00c7da83-8dc2-4418-a448-f574fc86f61a.txt
0.722726
../backup/0/doj_civ_fraud_enron4_00ddb089-dedb-4cc4-a8ba-f89de10d8e5c.txt ../backup/0/doj_civ_fraud_enron4_00a6e533-5b54-4899-9247-04cd51eb7b06.txt
0.718547
../backup/0/doj_civ_fraud_enron4_00ddb089-dedb-4cc4-a8ba-f89de10d8e5c.txt ../backup/0/doj_civ_fraud_enron4_00c113f9-ac53-4492-a743-1b72236407aa.txt
0.770782
../backup/0/doj_civ_fraud_enron4_00e27fb1-0685-4e33-b4cc-3cad999b45e5.txt ../backup/0/doj_civ_fraud_enron4_00d76bea-9be0-4a6b-8b1d-fc8677ea8827.txt
1.000000
../backup/0/doj_civ_fraud_enron4_00f080b9-460c-4eb1-a47a-0372acb6a555.txt ../backup/0/doj_civ_fraud_enron4_002290b5-791a-4f5c-9527-c929869d64c4.txt
0.722857
../backup/0/doj_civ_fraud_enron4_00ff57e6-64fe-40d1-b97f-61c1f2917be3.txt ../backup/0/doj_civ_fraud_enron4_00ddb089-dedb-4cc4-a8ba-f89de10d8e5c.txt
0.759524
nsquared time:0.008044
23 <total time:0.285595
File deleted
File deleted
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment