-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgen.cpp
95 lines (76 loc) · 2.01 KB
/
gen.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include <fcntl.h>
#include "sys/mman.h"
#include "sys/stat.h"
#include <assert.h>
#include <time.h>
#include <vector>
#include <iostream>
extern "C" {
#include "compiled/llama2.h"
}
#define VOCAB_SIZE 32000
#define MAX_GEN_LEN 1024
int random_choice(const std::vector<float>& probs) {
float r = (float)rand() / (float)RAND_MAX;
float cumsum2 = 0;
for (unsigned int i = 0; i < probs.size(); i++) {
cumsum2 += probs[i];
if (r < cumsum2) {
return i;
}
}
return 0;
}
int main(int argc, char* argv[]) {
srand(time(NULL));
std::string model_path;
for (int i=1; i<argc; ++i) {
std::string arg = argv[i];
if (arg == "--model-path" && i + 1 < argc) {
model_path = argv[++i];
} else {
std::cerr << "Unknown argument: " << arg << std::endl;
return 1;
}
}
if (model_path.empty()) {
std::cerr << "Usage: " << argv[0] << " --model-path <path_to_model>" << std::endl;
return 1;
}
std::cout << "Model path provided: " << model_path << std::endl;
std::vector<float> input0{1, 15043, 29892, 590, 1024, 338};
std::vector<float> outputs(VOCAB_SIZE);
std::cout << "loading weights\n";
int fd = -1;
if ((fd = open(model_path.c_str(), O_RDONLY)) == -1) {
std::cerr << "failed to open\n";
return EXIT_FAILURE;
}
struct stat fesb;
fstat(fd, &fesb);
void *weights = mmap(NULL, fesb.st_size, PROT_READ, MAP_SHARED, fd, 0);
assert(weights != MAP_FAILED);
model_t *llama = (model_t *)malloc(sizeof(model_t));
llama->weights = weights;
std::cout << "weights loaded: " << llama->weights << "\n";
init();
std::cout << "allocated scratch bufs\n";
// run
int toks_to_gen = 50;
assert (MAX_GEN_LEN >= toks_to_gen);
unsigned int i = 0;
while (1) {
net(input0.data(), outputs.data(), llama);
int tok = random_choice(outputs);
input0.push_back(tok);
printf("%i,", tok);
fflush(stdout);
++i;
if (i >= toks_to_gen)
break;
}
// cleanup
deinit();
munmap(llama->weights, fesb.st_size);
free(llama);
}