Skip to content

Commit

Permalink
Reclaim mapped memory
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed May 24, 2024
1 parent 8b2f8d8 commit 4451c6d
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 81 deletions.
3 changes: 2 additions & 1 deletion llama.cpp/llama-bench/llama-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ static std::string get_cpu_info() {
if (f) {
char buf[1024];
while (fgets(buf, sizeof(buf), f)) {
if (strncmp(buf, "model name", 10) == 0) {
if (strncmp(buf, "model name", 10) == 0 ||
startswith(buf, "Model :")) {
char * p = strchr(buf, ':');
if (p) {
p++;
Expand Down
88 changes: 12 additions & 76 deletions llama.cpp/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1217,28 +1217,28 @@ struct llama_mmap {
void * addr;
size_t size;
bool is_owned;
llamafile * lfile;

llama_mmap(const llama_mmap &) = delete;

static constexpr bool SUPPORTED = true;

// list of mapped fragments (first_offset, last_offset)
std::vector<std::pair<size_t, size_t>> mapped_fragments;

llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
size = llamafile_size(file->file);
if (!llamafile_fp(file->file)) {
lfile = file->file;
size = llamafile_size(lfile);
if (!llamafile_fp(lfile)) {
// file is an uncompressed zip asset
// therefore it's already mapped
is_owned = false;
addr = llamafile_content(file->file);
llamafile_ref(lfile);
addr = llamafile_content(lfile);
if (!llamafile_has_gpu()) {
llamafile_schlep(addr, size);
}
return;
}
is_owned = true;
int fd = fileno(llamafile_fp(file->file));
int fd = fileno(llamafile_fp(lfile));
// advise the kernel to read the file sequentially (increases readahead)
errno_t err;
if ((err = posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL)) && err != ENOSYS) {
Expand Down Expand Up @@ -1273,83 +1273,19 @@ struct llama_mmap {
if (!llamafile_has_gpu()) {
llamafile_schlep(addr, size);
}

// initialize list of mapped_fragments
mapped_fragments.emplace_back(0, size);
}

static void align_range(size_t * first, size_t * last, size_t page_size) {
// align first to the next page
size_t offset_in_page = *first & (page_size - 1);
size_t offset_to_page = offset_in_page == 0 ? 0 : page_size - offset_in_page;
*first += offset_to_page;

// align last to the previous page
*last = *last & ~(page_size - 1);

if (*last <= *first) {
*last = *first;
}
}

// partially unmap the file in the range [first, last)
void unmap_fragment(size_t first, size_t last) {
// note: this function must not be called multiple times with overlapping ranges
// otherwise, there is a risk of invalidating addresses that have been repurposed for other mappings
int page_size = sysconf(_SC_PAGESIZE);
align_range(&first, &last, page_size);
size_t len = last - first;

if (len == 0) {
return;
}

GGML_ASSERT(first % page_size == 0);
GGML_ASSERT(last % page_size == 0);
GGML_ASSERT(last > first);

void * next_page_start = (uint8_t *) addr + first;

// unmap the range
#if 0
// TODO(jart): make this safe
if (munmap(next_page_start, len)) {
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
}
#endif

// update the list of mapped fragments to avoid unmapping the same range again in the destructor
std::vector<std::pair<size_t, size_t>> new_mapped_fragments;
for (const auto & frag : mapped_fragments) {
if (frag.first < first && frag.second > last) {
// the range is in the middle of the fragment, split it
new_mapped_fragments.emplace_back(frag.first, first);
new_mapped_fragments.emplace_back(last, frag.second);
} else if (frag.first < first && frag.second > first) {
// the range starts in the middle of the fragment
new_mapped_fragments.emplace_back(frag.first, first);
} else if (frag.first < last && frag.second > last) {
// the range ends in the middle of the fragment
new_mapped_fragments.emplace_back(last, frag.second);
} else if (frag.first >= first && frag.second <= last) {
// the range covers the entire fragment
} else {
// the range is outside the fragment
new_mapped_fragments.push_back(frag);
}
}
mapped_fragments = std::move(new_mapped_fragments);
// [jart] nah
}

~llama_mmap() {
#if 0
// TODO(jart): make this safe
for (const auto & frag : mapped_fragments) {
if (munmap((char *) addr + frag.first, frag.second - frag.first)) {
LLAMA_LOG_WARN("warning: munmap failed: %s\n", strerror(errno));
}
if (is_owned) {
munmap(addr, size);
} else {
llamafile_unref(lfile);
}
#endif
}
};
using llama_mmaps = std::vector<std::unique_ptr<llama_mmap>>;
Expand Down
23 changes: 19 additions & 4 deletions llamafile/llamafile.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
Expand Down Expand Up @@ -54,6 +55,7 @@ struct llamafile {
void *mapping;
size_t mapsize;
char fname[PATH_MAX];
atomic_int refs;
};

static struct llamafile *llamafile_open_zip(const char *prog, const char *fname, const char *mode) {
Expand Down Expand Up @@ -214,7 +216,7 @@ static struct llamafile *llamafile_open_zip(const char *prog, const char *fname,
off_t mapoff = off & -pagesz;
long skew = off - mapoff;
file->mapsize = skew + file->size;
file->mapping = mmap(0, file->mapsize, PROT_READ, MAP_SHARED | MAP_POPULATE, fd, mapoff);
file->mapping = mmap(0, file->mapsize, PROT_READ, MAP_SHARED, fd, mapoff);
if (file->mapping == MAP_FAILED) {
fprintf(stderr, "%s: warning: failed to map zip file: %s\n", file->fname, strerror(errno));
goto Failure;
Expand Down Expand Up @@ -377,12 +379,25 @@ long llamafile_write(struct llamafile *file, const void *ptr, size_t len) {
return len;
}

void llamafile_close(struct llamafile *file) {
static void llamafile_close_impl(struct llamafile *file) {
if (file->fp)
fclose(file->fp);
if (file->mapping && file->mapping != MAP_FAILED) {
// TODO(jart): reference count this mapping w/ llama_mmap
// munmap(file->mapping, file->mapsize);
munmap(file->mapping, file->mapsize);
}
free(file);
}

void llamafile_ref(struct llamafile *file) {
atomic_fetch_add(&file->refs, 1);
}

void llamafile_unref(struct llamafile *file) {
if (!atomic_fetch_sub(&file->refs, 1)) {
llamafile_close_impl(file);
}
}

void llamafile_close(struct llamafile *file) {
llamafile_unref(file);
}
2 changes: 2 additions & 0 deletions llamafile/llamafile.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ void *llamafile_content(struct llamafile *);
size_t llamafile_tell(struct llamafile *);
size_t llamafile_size(struct llamafile *);
FILE *llamafile_fp(struct llamafile *);
void llamafile_ref(struct llamafile *);
void llamafile_unref(struct llamafile *);

void llamafile_govern(void);
void llamafile_check_cpu(void);
Expand Down

0 comments on commit 4451c6d

Please sign in to comment.