#include "Setting.h"#include "Random.h"#include "Reader.h"#include "Corrupt.h"#include "Test.h"#include <cstdlib>#include <pthread.h>extern "C" __declspec(dllexport)void setInPath(char *path);extern "C" __declspec(dllexport)void setTrainPath(char *path);extern "C" __declspec(dllexport)void setValidPath(char *path);extern "C" __declspec(dllexport)void setTestPath(char *path);extern "C" __declspec(dllexport)void setEntPath(char *path);extern "C" __declspec(dllexport)void setRelPath(char *path);extern "C" __declspec(dllexport)void setOutPath(char *path);extern "C" __declspec(dllexport)void setWorkThreads(INT threads);extern "C" __declspec(dllexport)void setBern(INT con);extern "C" __declspec(dllexport)INT getWorkThreads();extern "C" __declspec(dllexport)INT getEntityTotal();extern "C" __declspec(dllexport)INT getRelationTotal();extern "C" __declspec(dllexport)INT getTripleTotal();extern "C" __declspec(dllexport)INT getTrainTotal();extern "C" __declspec(dllexport)INT getTestTotal();extern "C" __declspec(dllexport)INT getValidTotal();extern "C" __declspec(dllexport)void randReset();extern "C" __declspec(dllexport)void importTrainFiles();struct Parameter { INT id; INT *batch_h; INT *batch_t; INT *batch_r; REAL *batch_y; INT batchSize; INT negRate; INT negRelRate; bool p; bool val_loss; INT mode; bool filter_flag;};void* getBatch(void* con) { // 将传入的参数直接变成结构体形式进行接收 Parameter *para = (Parameter *)(con); // 将para中对应的值放到局部变量中 INT id = para -> id; INT *batch_h = para -> batch_h; INT *batch_t = para -> batch_t; INT *batch_r = para -> batch_r; REAL *batch_y = para -> batch_y; INT batchSize = para -> batchSize; INT negRate = para -> negRate; INT negRelRate = para -> negRelRate; bool p = para -> p; // 判断模式是否为训练 bool val_loss = para -> val_loss; INT mode = para -> mode; bool filter_flag = para -> filter_flag; // 既然要并行计算嘛,那么不同线程处理的数据范围肯定是不一样的,这里就是划分了一下每个线程的操作范围 INT lef, rig; if (batchSize % workThreads == 0) { lef = id * (batchSize / workThreads); rig = (id + 1) * (batchSize / workThreads); } else { lef = id * (batchSize / workThreads + 1); rig = (id + 1) * (batchSize / workThreads + 1); if (rig > batchSize) rig = batchSize; } // 一个阈值,决定负采样头部与负采样尾部的比率,500表示各自一半 REAL prob = 500; // 开始采样负例三元组,用于训练 if (val_loss == false) { for (INT batch = lef; batch < rig; batch++) { // rand的实现是赋予每个线程一个种子,这样每个线程可以生成自己的随机数(可能是因为所有线程都拿当前时间作为seed的话可能会造成重复?) INT i = rand_max(id, trainTotal); // 得到了处于0~三元组总数之间的一个随机id batch_h[batch] = trainList[i].h; batch_t[batch] = trainList[i].t; batch_r[batch] = trainList[i].r; batch_y[batch] = 1; INT last = batchSize; // 负采样实体negRate次 for (INT times = 0; times < negRate; times ++) { // 模式为普通采样,即负采样头部与负采样尾部各占一半 if (mode == 0){ if (bernFlag) prob = 1000 * right_mean[trainList[i].r] / (right_mean[trainList[i].r] + left_mean[trainList[i].r]); if (randd(id) % 1000 < prob) { batch_h[batch + last] = trainList[i].h; batch_t[batch + last] = corrupt_head(id, trainList[i].h, trainList[i].r); batch_r[batch + last] = trainList[i].r; } else { batch_h[batch + last] = corrupt_tail(id, trainList[i].t, trainList[i].r); batch_t[batch + last] = trainList[i].t; batch_r[batch + last] = trainList[i].r; } batch_y[batch + last] = -1; last += batchSize; } // 模式为头部采样,即仅负采样头部 else { if(mode == -1){ batch_h[batch + last] = corrupt_tail(id, trainList[i].t, trainList[i].r); batch_t[batch + last] = trainList[i].t; batch_r[batch + last] = trainList[i].r; } // 模式为尾部采样,即仅负采样尾部 else { batch_h[batch + last] = trainList[i].h; batch_t[batch + last] = corrupt_head(id, trainList[i].h, trainList[i].r); batch_r[batch + last] = trainList[i].r; } batch_y[batch + last] = -1; last += batchSize; } } // 负采样关系negRelRate次 for (INT times = 0; times < negRelRate; times++) { batch_h[batch + last] = trainList[i].h; batch_t[batch + last] = trainList[i].t; batch_r[batch + last] = corrupt_rel(id, trainList[i].h, trainList[i].t, trainList[i].r, p); batch_y[batch + last] = -1; last += batchSize; } } } else { for (INT batch = lef; batch < rig; batch++) { batch_h[batch] = validList[batch].h; batch_t[batch] = validList[batch].t; batch_r[batch] = validList[batch].r; batch_y[batch] = 1; } } pthread_exit(NULL); return ((void*)0);}extern "C" __declspec(dllexport)void sampling( INT *batch_h, INT *batch_t, INT *batch_r, REAL *batch_y, INT batchSize, INT negRate = 1, INT negRelRate = 0, // mode=0代表普通采样,mode=1代表尾部采样,mode=-1代表头部采样 INT mode = 0, bool filter_flag = true, bool p = false, // val_loss应该代表的是是否为训练模式 bool val_loss = false) { // 根据线程数量,向内存分配指定的大小 pthread_t *pt = (pthread_t *)malloc(workThreads * sizeof(pthread_t)); // 根据线程数量,以及Parameter结构体的大小,向内存分配指定的大小 Parameter *para = (Parameter *)malloc(workThreads * sizeof(Parameter)); //初始化para结构体 for (INT threads = 0; threads < workThreads; threads++) { para[threads].id = threads; para[threads].batch_h = batch_h; para[threads].batch_t = batch_t; para[threads].batch_r = batch_r; para[threads].batch_y = batch_y; para[threads].batchSize = batchSize; para[threads].negRate = negRate; para[threads].negRelRate = negRelRate; para[threads].p = p; para[threads].val_loss = val_loss; para[threads].mode = mode; para[threads].filter_flag = filter_flag; /* 创建线程 int pthread_create( pthread_t *restrict tidp, //新创建的线程ID指向的内存单元。 const pthread_attr_t *restrict attr, //线程属性,默认为NULL void *(*start_rtn)(void *), //新创建的线程从start_rtn函数的地址开始运行 void *restrict arg //默认为NULL。若上述函数需要参数,将参数放入结构中并将地址作为arg传入。 ); */ pthread_create(&pt[threads], NULL, getBatch, (void*)(para+threads)); } // 收工 for (INT threads = 0; threads < workThreads; threads++) pthread_join(pt[threads], NULL); free(pt); free(para);}int main() { importTrainFiles(); return 0;}