From 1c30ad014b03a2a313590d954cf65ea004fe311f Mon Sep 17 00:00:00 2001 From: Waldemar Kozaczuk Date: Mon, 18 Mar 2024 00:29:26 -0400 Subject: [PATCH] ext4: add initial implementation This commit adds an initial implementation of the ext4 filesystem driver based on the lwext4 project (https://github.com/gkostka/lwext4). It provides a light weight read-write alternative to ZFS filesystem. Please note this implementation is NOT thread-safe and will need to be enhanced to be so in future. However it is functional enough to support the test cases examined by modules/libext/test.sh. One can build the OSv like so: ./scripts/manifest_from_host.sh -w /usr/bin/find && ./scripts/build fs=rofs image=libext,native-example -j$(nproc) --append-manifest Then create an ext4 filesystem: mkdir -p ext_images dd if=/dev/zero of=ext_images/ext4 bs=1M count=128 sudo mkfs.ext4 ext_images/ext4 Add some files to it if needed: sudo losetup -o 0 -f --show ext_images/ext4 sudo mount /dev/loop0 ext_images/image .. update content sudo umount ext_images/image sudo losetup -d /dev/loop0 qemu-img convert -f raw -O qcow2 ext_images/ext4 ext_images/ext4.img And then run it: ./scripts/run.py --execute='--mount-fs=ext,/dev/vblk1,/data /hello' --second-disk-image ./ext_images/ext4.img or using the test.sh ./modules/libext/test.sh '/find /data/ -ls' Fixes #1179 Signed-off-by: Waldemar Kozaczuk --- Makefile | 1 + fs/ext/ext_null_vfsops.cc | 43 ++ fs/vfs/vfs_conf.cc | 3 + modules/libext/.gitignore | 1 + modules/libext/Makefile | 33 + modules/libext/README.md | 50 ++ modules/libext/ext_vfsops.cc | 218 ++++++ modules/libext/ext_vnops.cc | 1290 ++++++++++++++++++++++++++++++++++ modules/libext/module.py | 3 + modules/libext/test.sh | 103 +++ modules/lwext4/.gitignore | 1 + modules/lwext4/Makefile | 36 + modules/lwext4/usr.manifest | 9 + 13 files changed, 1791 insertions(+) create mode 100644 fs/ext/ext_null_vfsops.cc create mode 100644 modules/libext/.gitignore create mode 100644 modules/libext/Makefile create mode 100644 modules/libext/README.md create mode 100644 modules/libext/ext_vfsops.cc create mode 100644 modules/libext/ext_vnops.cc create mode 100644 modules/libext/module.py create mode 100755 modules/libext/test.sh create mode 100644 modules/lwext4/.gitignore create mode 100644 modules/lwext4/Makefile create mode 100644 modules/lwext4/usr.manifest diff --git a/Makefile b/Makefile index 864fc4b4b4..c0e4a500ea 100644 --- a/Makefile +++ b/Makefile @@ -2075,6 +2075,7 @@ endif boost-libs := $(boost-lib-dir)/libboost_system$(boost-mt).a objects += fs/nfs/nfs_null_vfsops.o +objects += fs/ext/ext_null_vfsops.o # The OSv kernel is linked into an ordinary, non-PIE, executable, so there is no point in compiling # with -fPIC or -fpie and objects that can be linked into a PIE. On the contrary, PIE-compatible objects diff --git a/fs/ext/ext_null_vfsops.cc b/fs/ext/ext_null_vfsops.cc new file mode 100644 index 0000000000..455599f585 --- /dev/null +++ b/fs/ext/ext_null_vfsops.cc @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2024 Waldemar Kozaczuk + * + * Based on ramfs code Copyright (c) 2006-2007, Kohsuke Ohtani + * + * This work is open source software, licensed under the terms of the + * BSD license as described in the LICENSE file in the top-level directory. + */ + +#include + +#define ext_mount ((vfsop_mount_t)vfs_nullop) +#define ext_umount ((vfsop_umount_t)vfs_nullop) +#define ext_sync ((vfsop_sync_t)vfs_nullop) +#define ext_vget ((vfsop_vget_t)vfs_nullop) +#define ext_statfs ((vfsop_statfs_t)vfs_nullop) + +static int ext_noop_mount(struct mount *mp, const char *dev, int flags, + const void *data) +{ + printf("The ext module is in-active!. Please add ext module to the image.\n"); + return -1; +} + +/* + * File system operations + * + * This deactivates the EXT file system when libext.so is not loaded. + * + */ +struct vfsops ext_vfsops = { + ext_noop_mount, /* mount */ + ext_umount, /* umount */ + ext_sync, /* sync */ + ext_vget, /* vget */ + ext_statfs, /* statfs */ + nullptr, /* vnops */ +}; + +extern "C" int ext_init(void) +{ + return 0; +} diff --git a/fs/vfs/vfs_conf.cc b/fs/vfs/vfs_conf.cc index 48211e7789..d4c4c29c9e 100644 --- a/fs/vfs/vfs_conf.cc +++ b/fs/vfs/vfs_conf.cc @@ -56,6 +56,7 @@ extern struct vfsops zfs_vfsops; #if CONF_drivers_virtio_fs extern struct vfsops virtiofs_vfsops; #endif +extern struct vfsops ext_vfsops; extern int ramfs_init(void); extern int rofs_init(void); @@ -67,6 +68,7 @@ extern int nfs_init(void); extern int procfs_init(void); extern int sysfs_init(void); extern "C" int zfs_init(void); +extern "C" int ext_init(void); /* * VFS switch table @@ -82,5 +84,6 @@ const struct vfssw vfssw[] = { #if CONF_drivers_virtio_fs {"virtiofs", virtiofs_init, &virtiofs_vfsops}, #endif + {"ext", ext_init, &ext_vfsops}, {nullptr, fs_noop, nullptr}, }; diff --git a/modules/libext/.gitignore b/modules/libext/.gitignore new file mode 100644 index 0000000000..f9235a6be1 --- /dev/null +++ b/modules/libext/.gitignore @@ -0,0 +1 @@ +usr.manifest diff --git a/modules/libext/Makefile b/modules/libext/Makefile new file mode 100644 index 0000000000..f225eb8bfe --- /dev/null +++ b/modules/libext/Makefile @@ -0,0 +1,33 @@ +arch=x64 +include ../common.gmk + +module_out := $(out)/modules/libext + +CXXFLAGS = -fPIC -std=gnu++11 $(INCLUDES) -I../lwext4/upstream/lwext4/include -I../lwext4/upstream/lwext4/build_lib_only/include \ + -D_KERNEL -D_GNU_SOURCE -Wall -fno-exceptions -fno-rtti + +# the build target executable: +TARGET = libext +CPP_FILES := ext_vfsops.cc ext_vnops.cc +OBJ_FILES := $(addprefix $(module_out)/,$(CPP_FILES:.cc=.o)) +DEPS := $(OBJ_FILES:.o=.d) + +LIBS = -L../lwext4/upstream/lwext4/build_lib_only/src/ -llwext4 + +$(module_out)/$(TARGET).so: $(OBJ_FILES) + $(call quiet, $(CXX) $(CXXFLAGS) $(LDFLAGS) -static-libstdc++ -shared -o $(module_out)/$(TARGET).so $^ $(LIBS), LINK $@) + +$(module_out)/%.o: %.cc + $(call quiet, $(CXX) $(CXXFLAGS) -c -o $@ $<, CXX $@) + +init: + @echo " MKDIRS" + $(call very-quiet, mkdir -p $(module_out)) +.PHONY: init + +module: init $(module_out)/$(TARGET).so + echo '/usr/lib/fs/libext.so: ./modules/libext/libext.so' > usr.manifest + +clean: + rm -f $(TARGET)*.so usr.manifest + $(call very-quiet, $(RM) -rf $(module_out)) diff --git a/modules/libext/README.md b/modules/libext/README.md new file mode 100644 index 0000000000..dc6b6ca483 --- /dev/null +++ b/modules/libext/README.md @@ -0,0 +1,50 @@ +## Building Image with Ext4 Support + +```bash +./scripts/build fs=rofs image=libext,native-example -j$(nproc) +``` + +## Creating Disk with Ext4 Filesystem + +### Create Empty Disk +```bash +mkdir -p ext_images +dd if=/dev/zero of=ext_images/ext4 bs=1M count=128 +sudo mkfs.ext4 ext_images/ext4 +``` + +### Mounting Disk as Loop Device +```bash +sudo losetup -o 0 -f --show ext_images/ext4 +sudo mount /dev/loop0 ext_images/image + +#Copy sample files from the host +cp -rf fs ext_images/image + +#Unmount +sudo umount ext_images/image +sudo losetup -d /dev/loop0 + +qemu-img convert -f raw -O qcow2 ext_images/ext4 ext_images/ext4.img +``` + +## Running with Ext4 Disk + +```bash +./scripts/run.py --execute='--mount-fs=ext,/dev/vblk1,/data /hello' --second-disk-image ./ext_images/ext4.img +``` + +or use the `test.sh`: + +```bash +./modules/libext/test.sh '/find /data/ -ls' +``` + +## Checking the Disk + +* Mount the disk as described above +* Run fsck + +```bash +sudo fsck -n /dev/loop0 +``` diff --git a/modules/libext/ext_vfsops.cc b/modules/libext/ext_vfsops.cc new file mode 100644 index 0000000000..3ce183daeb --- /dev/null +++ b/modules/libext/ext_vfsops.cc @@ -0,0 +1,218 @@ +/* + * Copyright (C) 2024 Waldemar Kozaczuk + * + * This work is open source software, licensed under the terms of the + * BSD license as described in the LICENSE file in the top-level directory. + */ + +extern "C" { +#define USE_C_INTERFACE 1 +#include +#include +#include +#include +#include +#include + +void* alloc_contiguous_aligned(size_t size, size_t align); +void free_contiguous_aligned(void* p); +} + +#include +#include +#include +#include + +#include +#include +#include + +extern "C" bool is_linear_mapped(const void *addr); + +int ext_init(void) { return 0;} + +static int blockdev_open(struct ext4_blockdev *bdev) +{ + return EOK; +} + +static int blockdev_bread_or_write(struct ext4_blockdev *bdev, void *buf, uint64_t blk_id, uint32_t blk_cnt, bool read) +{ + struct bio *bio = alloc_bio(); + if (!bio) + return ENOMEM; + + bio->bio_cmd = read ? BIO_READ : BIO_WRITE; + bio->bio_dev = (struct device*)bdev->bdif->p_user; + bio->bio_offset = blk_id * bdev->bdif->ph_bsize; + bio->bio_bcount = blk_cnt * bdev->bdif->ph_bsize; + + if (!is_linear_mapped(buf)) { + bio->bio_data = alloc_contiguous_aligned(bio->bio_bcount, alignof(std::max_align_t)); + if (!read) { + memcpy(bio->bio_data, buf, bio->bio_bcount); + } + } else { + bio->bio_data = buf; + } + + bio->bio_dev->driver->devops->strategy(bio); + int error = bio_wait(bio); + + kprintf("[ext4] %s %ld bytes at offset %ld to %p with error:%d\n", read ? "Read" : "Wrote", + bio->bio_bcount, bio->bio_offset, bio->bio_data, error); + + if (!is_linear_mapped(buf)) { + if (read && !error) { + memcpy(buf, bio->bio_data, bio->bio_bcount); + } + free_contiguous_aligned(bio->bio_data); + } + destroy_bio(bio); + + return error; +} + +static int blockdev_bread(struct ext4_blockdev *bdev, void *buf, uint64_t blk_id, uint32_t blk_cnt) +{ + return blockdev_bread_or_write(bdev, buf, blk_id, blk_cnt, true); +} + +static int blockdev_bwrite(struct ext4_blockdev *bdev, const void *buf, + uint64_t blk_id, uint32_t blk_cnt) +{ + return blockdev_bread_or_write(bdev, const_cast(buf), blk_id, blk_cnt, false); +} + +static int blockdev_close(struct ext4_blockdev *bdev) +{ + return EOK; +} + +EXT4_BLOCKDEV_STATIC_INSTANCE(ext_blockdev, 512, 0, blockdev_open, + blockdev_bread, blockdev_bwrite, blockdev_close, 0, 0); + +static struct ext4_fs ext_fs; +static struct ext4_bcache ext_block_cache; +extern struct vnops ext_vnops; + +static int +ext_mount(struct mount *mp, const char *dev, int flags, const void *data) +{ + struct device *device; + + const char *dev_name = dev + 5; + kprintf("[ext4] Trying to open device: [%s]\n", dev_name); + int error = device_open(dev_name, DO_RDWR, &device); + + if (error) { + kprintf("[ext4] Error opening device!\n"); + return error; + } + + ext4_dmask_set(DEBUG_ALL); + // + // Save a reference to the filesystem + mp->m_dev = device; + ext_blockdev.bdif->p_user = device; + ext_blockdev.part_offset = 0; + ext_blockdev.part_size = device->size; + ext_blockdev.bdif->ph_bcnt = ext_blockdev.part_size / ext_blockdev.bdif->ph_bsize; + + kprintf("[ext4] Trying to mount ext4 on device: [%s] with size:%ld\n", dev_name, device->size); + int r = ext4_block_init(&ext_blockdev); + if (r != EOK) + return r; + + r = ext4_fs_init(&ext_fs, &ext_blockdev, false); + if (r != EOK) { + ext4_block_fini(&ext_blockdev); + return r; + } + + uint32_t bsize = ext4_sb_get_block_size(&ext_fs.sb); + ext4_block_set_lb_size(&ext_blockdev, bsize); + + r = ext4_bcache_init_dynamic(&ext_block_cache, CONFIG_BLOCK_DEV_CACHE_SIZE, bsize); + if (r != EOK) { + ext4_block_fini(&ext_blockdev); + return r; + } + + if (bsize != ext_block_cache.itemsize) + return ENOTSUP; + + /*Bind block cache to block device*/ + r = ext4_block_bind_bcache(&ext_blockdev, &ext_block_cache); + if (r != EOK) { + ext4_bcache_cleanup(&ext_block_cache); + ext4_block_fini(&ext_blockdev); + ext4_bcache_fini_dynamic(&ext_block_cache); + return r; + } + + ext_blockdev.fs = &ext_fs; + mp->m_data = &ext_fs; + mp->m_root->d_vnode->v_ino = EXT4_INODE_ROOT_INDEX; + + kprintf("[ext4] Mounted ext4 on device: [%s] with code:%d\n", dev_name, r); + printf("WARNING: The ext4 filesystem driver is considered alpha and is NOT thread-safe\n"); + return r; +} + +static int +ext_unmount(struct mount *mp, int flags) +{ + int r = ext4_fs_fini(&ext_fs); + if (r == EOK) { + ext4_bcache_cleanup(&ext_block_cache); + ext4_bcache_fini_dynamic(&ext_block_cache); + } + + r = ext4_block_fini(&ext_blockdev); + kprintf("[ext4] Trying to unmount ext4 (after %d)!\n", r); + return device_close((struct device*)ext_blockdev.bdif->p_user); +} + +static int +ext_sync(struct mount *mp) +{ + return EIO; +} + +static int +ext_statfs(struct mount *mp, struct statfs *statp) +{ + kprintf("[ext4] statfs\n"); + struct ext4_fs *fs = (struct ext4_fs *)mp->m_data; + statp->f_bsize = ext4_sb_get_block_size(&fs->sb); + + statp->f_blocks = ext4_sb_get_blocks_cnt(&fs->sb); + statp->f_bfree = ext4_sb_get_free_blocks_cnt(&fs->sb); + statp->f_bavail = ext4_sb_get_free_blocks_cnt(&fs->sb); + + statp->f_ffree = ext4_get32(&fs->sb, free_inodes_count); + statp->f_files = ext4_get32(&fs->sb, inodes_count); + + statp->f_namelen = EXT4_DIRECTORY_FILENAME_LEN; + statp->f_type = EXT4_SUPERBLOCK_MAGIC; + + statp->f_fsid = mp->m_fsid; /* File system identifier */ + return EOK; +} + +// We are relying on vfsops structure defined in kernel +extern struct vfsops ext_vfsops; + +// Overwrite "null" vfsops structure fields with "real" +// functions upon loading libext.so shared object +void __attribute__((constructor)) initialize_vfsops() { + ext_vfsops.vfs_mount = ext_mount; + ext_vfsops.vfs_unmount = ext_unmount; + ext_vfsops.vfs_sync = ext_sync; + ext_vfsops.vfs_vget = ((vfsop_vget_t)vfs_nullop); + ext_vfsops.vfs_statfs = ext_statfs; + ext_vfsops.vfs_vnops = &ext_vnops; +} + +asm(".pushsection .note.osv-mlock, \"a\"; .long 0, 0, 0; .popsection"); diff --git a/modules/libext/ext_vnops.cc b/modules/libext/ext_vnops.cc new file mode 100644 index 0000000000..df067c37cf --- /dev/null +++ b/modules/libext/ext_vnops.cc @@ -0,0 +1,1290 @@ +/* + * Copyright (C) 2024 Waldemar Kozaczuk + * + * This work is open source software, licensed under the terms of the + * BSD license as described in the LICENSE file in the top-level directory. + */ + +//Most of the code in this file is modeled after corresponding vnops +//implementation in ZFS, RoFS and RamFS filesystems but also loosely +//after the code in src/ext4.c from lwext4 library. The internal functions +//ext_internal_read() and ext_internal_write() on other hand are almost verbatim +//taken from ext4_read() and ext4_write() from the same file and slighly +//adjusted to C++. +// +//In effect, this vnops implementation bypasses the ext4.c layer of the lwext4 +//library and interacts with lower-layer functions like ext4_block_*(), ext4_dir_*(), +//ext4_fs_*() and ext4_inode_*() in a similar way the original ext4.c does. +// +//WARNING: This implementation is functional enough for all tests in the test.sh +//to pass. But it is NOT thread-safe yet. To make it so, we will need to synchronize +//access to block cache, i-node and block allocation routines as well as updating +//super block. +// +//Also, it does not implement journal (we can integrate it later and make it optional) +//nor xattr which is not even supported by OSv VFS layer. + +extern "C" { +#define USE_C_INTERFACE 1 +#include +#include +#include +#include +#include +#include +#include + +void* alloc_contiguous_aligned(size_t size, size_t align); +void free_contiguous_aligned(void* p); +} + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +//Simple RAII struct to automate release of i-node reference +//when it goes out of scope. +struct auto_inode_ref { + struct ext4_inode_ref _ref; + int _r; + + auto_inode_ref(struct ext4_fs *fs, uint32_t inode_no) { + _r = ext4_fs_get_inode_ref(fs, inode_no, &_ref); + } + ~auto_inode_ref() { + if (_r == EOK) { + ext4_fs_put_inode_ref(&_ref); + } + } +}; + +//Simple RAII struct to set boundaries around ext4 function calls +//with block cache write back enabled. Effectively, when the instance +//of this struct goes out of scope, the writes are flushed to disk +//and write back disabled. +struct auto_write_back { + struct ext4_fs *_fs; + + auto_write_back(struct ext4_fs *fs) { + _fs = fs; + ext4_block_cache_write_back(_fs->bdev, 1); + } + + ~auto_write_back() { + ext4_block_cache_write_back(_fs->bdev, 0); + } +}; + +typedef struct vnode vnode_t; +typedef struct file file_t; +typedef struct uio uio_t; +typedef off_t offset_t; +typedef struct vattr vattr_t; + +//TODO: +//Ops: +// - ext_ioctl +// - ext_fsync +// +// Later: +// - ext_arc +// - ext_fallocate - Linux specific + +static int +ext_open(struct file *fp) +{ + kprintf("[ext4] Opening file\n"); + return (EOK); +} + +static int +ext_close(vnode_t *vp, file_t *fp) +{ + kprintf("[ext4] Closing file\n"); + return (EOK); +} + +static int +ext_internal_read(struct ext4_fs *fs, struct ext4_inode_ref *ref, uint64_t offset, void *buf, size_t size, size_t *rcnt) +{ + ext4_fsblk_t fblock; + ext4_fsblk_t fblock_start; + + uint8_t *u8_buf = (uint8_t *)buf; + int r; + + if (!size) + return EOK; + + struct ext4_sblock *const sb = &fs->sb; + + if (rcnt) + *rcnt = 0; + + /*Sync file size*/ + uint64_t fsize = ext4_inode_get_size(sb, ref->inode); + + uint32_t block_size = ext4_sb_get_block_size(sb); + size = ((uint64_t)size > (fsize - offset)) + ? ((size_t)(fsize - offset)) : size; + + uint32_t iblock_idx = (uint32_t)((offset) / block_size); + uint32_t iblock_last = (uint32_t)((offset + size) / block_size); + uint32_t unalg = (offset) % block_size; + + uint32_t fblock_count = 0; + if (unalg) { + size_t len = size; + if (size > (block_size - unalg)) + len = block_size - unalg; + + r = ext4_fs_get_inode_dblk_idx(ref, iblock_idx, &fblock, true); + if (r != EOK) + goto Finish; + + /* Do we get an unwritten range? */ + if (fblock != 0) { + uint64_t off = fblock * block_size + unalg; + r = ext4_block_readbytes(fs->bdev, off, u8_buf, len); + if (r != EOK) + goto Finish; + + } else { + /* Yes, we do. */ + memset(u8_buf, 0, len); + } + + u8_buf += len; + size -= len; + offset += len; + + if (rcnt) + *rcnt += len; + + iblock_idx++; + } + + fblock_start = 0; + while (size >= block_size) { + while (iblock_idx < iblock_last) { + r = ext4_fs_get_inode_dblk_idx(ref, iblock_idx, + &fblock, true); + if (r != EOK) + goto Finish; + + iblock_idx++; + + if (!fblock_start) + fblock_start = fblock; + + if ((fblock_start + fblock_count) != fblock) + break; + + fblock_count++; + } + + kprintf("[ext4] ext4_blocks_get_direct: block_start:%ld, block_count:%d\n", fblock_start, fblock_count); + r = ext4_blocks_get_direct(fs->bdev, u8_buf, fblock_start, + fblock_count); + if (r != EOK) + goto Finish; + + size -= block_size * fblock_count; + u8_buf += block_size * fblock_count; + offset += block_size * fblock_count; + + if (rcnt) + *rcnt += block_size * fblock_count; + + fblock_start = fblock; + fblock_count = 1; + } + + if (size) { + r = ext4_fs_get_inode_dblk_idx(ref, iblock_idx, &fblock, true); + if (r != EOK) + goto Finish; + + uint64_t off = fblock * block_size; + kprintf("[ext4] ext4_block_readbytes: off:%ld, size:%ld\n", off, size); + r = ext4_block_readbytes(fs->bdev, off, u8_buf, size); + if (r != EOK) + goto Finish; + + offset += size; + + if (rcnt) + *rcnt += size; + } + +Finish: + return r; +} + +static int +ext_read(vnode_t *vp, struct file *fp, uio_t *uio, int ioflag) +{ + kprintf("[ext4] Reading %ld bytes at offset:%ld from file i-node:%ld\n", uio->uio_resid, uio->uio_offset, vp->v_ino); + + /* Cant read directories */ + if (vp->v_type == VDIR) + return EISDIR; + + /* Cant read anything but reg */ + if (vp->v_type != VREG) + return EINVAL; + + /* Cant start reading before the first byte */ + if (uio->uio_offset < 0) + return EINVAL; + + /* Need to read more than 1 byte */ + if (uio->uio_resid == 0) + return 0; + + struct ext4_fs *fs = (struct ext4_fs *)vp->v_mount->m_data; + auto_inode_ref inode_ref(fs, vp->v_ino); + if (inode_ref._r != EOK) { + return inode_ref._r; + } + + // Total read amount is what they requested, or what is left + uint64_t fsize = ext4_inode_get_size(&fs->sb, inode_ref._ref.inode); + uint64_t read_amt = std::min(fsize - uio->uio_offset, (uint64_t)uio->uio_resid); + void *buf = alloc_contiguous_aligned(read_amt, alignof(std::max_align_t)); + + size_t read_count = 0; + int ret = ext_internal_read(fs, &inode_ref._ref, uio->uio_offset, buf, read_amt, &read_count); + if (ret) { + kprintf("[ext_read] Error reading data\n"); + free(buf); + return ret; + } + + ret = uiomove(buf, read_count, uio); + free_contiguous_aligned(buf); + + return ret; +} + +static int +ext_internal_write(struct ext4_fs *fs, struct ext4_inode_ref *ref, uint64_t offset, void *buf, size_t size, size_t *wcnt) +{ + kprintf("[ext4_interna_write] Writing %ld bytes at offset:%ld\n", size, offset); + ext4_fsblk_t fblock; + ext4_fsblk_t fblock_start = 0; + + uint8_t *u8_buf = (uint8_t *)buf; + int r; + + if (!size) + return EOK; + + struct ext4_sblock *const sb = &fs->sb; + + if (wcnt) + *wcnt = 0; + + /*Sync file size*/ + uint64_t fsize = ext4_inode_get_size(sb, ref->inode); + uint32_t block_size = ext4_sb_get_block_size(sb); + + uint32_t iblock_last = (uint32_t)((offset + size) / block_size); + uint32_t iblk_idx = (uint32_t)(offset / block_size); + uint32_t ifile_blocks = (uint32_t)((fsize + block_size - 1) / block_size); + + uint32_t unalg = (offset) % block_size; + + uint32_t fblock_count = 0; + if (unalg) { + size_t len = size; + uint64_t off; + if (size > (block_size - unalg)) + len = block_size - unalg; + + r = ext4_fs_init_inode_dblk_idx(ref, iblk_idx, &fblock); + if (r != EOK) + goto Finish; + + off = fblock * block_size + unalg; + r = ext4_block_writebytes(fs->bdev, off, u8_buf, len); + kprintf("[ext_internal_write] Wrote unaligned %ld bytes at %ld\n", len, off); + if (r != EOK) + goto Finish; + + u8_buf += len; + size -= len; + offset += len; + + if (wcnt) + *wcnt += len; + + iblk_idx++; + } + + /*Start write back cache mode.*/ + r = ext4_block_cache_write_back(fs->bdev, 1); + if (r != EOK) + goto Finish; + + int rr; + while (size >= block_size) { + + while (iblk_idx < iblock_last) { + if (iblk_idx < ifile_blocks) { + r = ext4_fs_init_inode_dblk_idx(ref, iblk_idx, + &fblock); + if (r != EOK) + goto Finish; + } else { + rr = ext4_fs_append_inode_dblk(ref, &fblock, + &iblk_idx); + if (rr != EOK) { + /* Unable to append more blocks. But + * some block might be allocated already + * */ + break; + } + } + + iblk_idx++; + + if (!fblock_start) { + fblock_start = fblock; + } + + if ((fblock_start + fblock_count) != fblock) + break; + + fblock_count++; + } + + r = ext4_blocks_set_direct(fs->bdev, u8_buf, fblock_start, + fblock_count); + kprintf("[ext_internal_write] Wrote direct %d blocks at block %ld\n", fblock_count, fblock_start); + if (r != EOK) + break; + + size -= block_size * fblock_count; + u8_buf += block_size * fblock_count; + offset += block_size * fblock_count; + + if (wcnt) + *wcnt += block_size * fblock_count; + + fblock_start = fblock; + fblock_count = 1; + + if (rr != EOK) { + /*ext4_fs_append_inode_block has failed and no + * more blocks might be written. But node size + * should be updated.*/ + r = rr; + goto out_fsize; + } + } + + /*Stop write back cache mode*/ + ext4_block_cache_write_back(fs->bdev, 0); + + if (r != EOK) + goto Finish; + + if (size) { + uint64_t off; + if (iblk_idx < ifile_blocks) { + r = ext4_fs_init_inode_dblk_idx(ref, iblk_idx, &fblock); + if (r != EOK) + goto Finish; + } else { + r = ext4_fs_append_inode_dblk(ref, &fblock, &iblk_idx); + if (r != EOK) + /*Node size sholud be updated.*/ + goto out_fsize; + } + + off = fblock * block_size; + r = ext4_block_writebytes(fs->bdev, off, u8_buf, size); + kprintf("[ext_internal_write] Wrote remaining %ld bytes at %ld\n", size, off); + if (r != EOK) + goto Finish; + + offset += size; + + if (wcnt) + *wcnt += size; + } + +out_fsize: + if (offset > fsize) { + ext4_inode_set_size(ref->inode, offset); + ref->dirty = true; + } + +Finish: + struct timespec now; + clock_gettime(CLOCK_REALTIME, &now); + ext4_inode_set_change_inode_time(ref->inode, now.tv_sec); + ext4_inode_set_modif_time(ref->inode, now.tv_sec); + ref->dirty = true; + + return r; +} + +static int +ext_write(vnode_t *vp, uio_t *uio, int ioflag) +{ + kprintf("[ext4] Writing %ld bytes at offset:%ld to file i-node:%ld\n", uio->uio_resid, uio->uio_offset, vp->v_ino); + + /* Cant write directories */ + if (vp->v_type == VDIR) + return EISDIR; + + /* Cant write anything but reg */ + if (vp->v_type != VREG) + return EINVAL; + + /* Cant start writing before the first byte */ + if (uio->uio_offset < 0) + return EINVAL; + + /* Need to write more than 1 byte */ + if (uio->uio_resid == 0) + return 0; + + struct ext4_fs *fs = (struct ext4_fs *)vp->v_mount->m_data; + auto_inode_ref inode_ref(fs, vp->v_ino); + if (inode_ref._r != EOK) { + return inode_ref._r; + } + + uio_t uio_copy = *uio; + if (ioflag & IO_APPEND) { + uio_copy.uio_offset = ext4_inode_get_size(&fs->sb, inode_ref._ref.inode); + } + + void *buf = alloc_contiguous_aligned(uio->uio_resid, alignof(std::max_align_t)); + int ret = uiomove(buf, uio->uio_resid, &uio_copy); + if (ret) { + kprintf("[ext_write] Error copying data\n"); + free(buf); + return ret; + } + + size_t write_count = 0; + ret = ext_internal_write(fs, &inode_ref._ref, uio->uio_offset, buf, uio->uio_resid, &write_count); + + uio->uio_resid -= write_count; + free_contiguous_aligned(buf); + + return ret; +} + +static int +ext_ioctl(vnode_t *vp, file_t *fp, u_long com, void *data) +{ + kprintf("[ext4] ioctl\n"); + return (EINVAL); +} + +static int +ext_fsync(vnode_t *vp, file_t *fp) +{ + kprintf("[ext4] fsync\n"); + return (EINVAL); +} + +static int +ext_readdir(struct vnode *dvp, struct file *fp, struct dirent *dir) +{ +#define EXT4_DIR_ENTRY_OFFSET_TERM (uint64_t)(-1) + struct ext4_fs *fs = (struct ext4_fs *)dvp->v_mount->m_data; + struct ext4_inode_ref inode_ref; + + if (file_offset(fp) == 1) {//EXT4_DIR_ENTRY_OFFSET_TERM) { + return ENOENT; + } + + int r = ext4_fs_get_inode_ref(fs, dvp->v_ino, &inode_ref); + if (r != EOK) { + return r; + } + + /* Check if node is directory */ + if (!ext4_inode_is_type(&fs->sb, inode_ref.inode, EXT4_INODE_MODE_DIRECTORY)) { + ext4_fs_put_inode_ref(&inode_ref); + return ENOTDIR; + } + + kprintf("[ext4] Reading directory with i-node:%ld at offset:%ld\n", dvp->v_ino, file_offset(fp)); + struct ext4_dir_iter it; + int rc = ext4_dir_iterator_init(&it, &inode_ref, file_offset(fp)); + if (rc != EOK) { + kprintf("[ext4] Reading directory with i-node:%ld at offset:%ld -> FAILED to init iterator\n", dvp->v_ino, file_offset(fp)); + ext4_fs_put_inode_ref(&inode_ref); + return rc; + } + + /* Test for non-empty directory entry */ + if (it.curr != NULL) { + if (ext4_dir_en_get_inode(it.curr) != 0) { + memset(dir->d_name, 0, sizeof(dir->d_name)); + uint16_t name_length = ext4_dir_en_get_name_len(&fs->sb, it.curr); + memcpy(dir->d_name, it.curr->name, name_length); + kprintf("[ext4] Reading directory with i-node:%ld at offset:%ld => entry name:%s\n", dvp->v_ino, file_offset(fp), dir->d_name); + + dir->d_ino = ext4_dir_en_get_inode(it.curr); + + uint8_t i_type = ext4_dir_en_get_inode_type(&fs->sb, it.curr); + if (i_type == EXT4_DE_DIR) { + dir->d_type = DT_DIR; + } else if (i_type == EXT4_DE_REG_FILE) { + dir->d_type = DT_REG; + } else if (i_type == EXT4_DE_SYMLINK) { + dir->d_type = DT_LNK; + } + + ext4_dir_iterator_next(&it); + + off_t f_offset = file_offset(fp); + dir->d_fileno = f_offset; + dir->d_off = f_offset + 1; + file_setoffset(fp, it.curr ? it.curr_off : EXT4_DIR_ENTRY_OFFSET_TERM); + } else { + kprintf("[ext4] Reading directory with i-node:%ld at offset:%ld -> cos ni tak\n", dvp->v_ino, file_offset(fp)); + } + } else { + ext4_dir_iterator_fini(&it); + ext4_fs_put_inode_ref(&inode_ref); + kprintf("[ext4] Reading directory with i-node:%ld at offset:%ld -> ENOENT\n", dvp->v_ino, file_offset(fp)); + return ENOENT; + } + + rc = ext4_dir_iterator_fini(&it); + ext4_fs_put_inode_ref(&inode_ref); + if (rc != EOK) + return rc; + + return EOK; +} + +static int +ext_lookup(struct vnode *dvp, char *nm, struct vnode **vpp) +{ + kprintf("[ext4] Looking up %s in directory with i-node:%ld\n", nm, dvp->v_ino); + struct ext4_fs *fs = (struct ext4_fs *)dvp->v_mount->m_data; + + auto_inode_ref inode_ref(fs, dvp->v_ino); + if (inode_ref._r != EOK) { + return inode_ref._r; + } + + /* Check if node is directory */ + if (!ext4_inode_is_type(&fs->sb, inode_ref._ref.inode, EXT4_INODE_MODE_DIRECTORY)) { + return ENOTDIR; + } + + struct ext4_dir_search_result result; + int r = ext4_dir_find_entry(&result, &inode_ref._ref, nm, strlen(nm)); + if (r == EOK) { + uint32_t inode_no = ext4_dir_en_get_inode(result.dentry); + vget(dvp->v_mount, inode_no, vpp); + + auto_inode_ref inode_ref2(fs, inode_no); + if (inode_ref2._r != EOK) { + return inode_ref2._r; + } + + uint32_t i_type = ext4_inode_type(&fs->sb, inode_ref2._ref.inode); + if (i_type == EXT4_INODE_MODE_DIRECTORY) { + (*vpp)->v_type = VDIR; + } else if (i_type == EXT4_INODE_MODE_FILE) { + (*vpp)->v_type = VREG; + } else if (i_type == EXT4_INODE_MODE_SOFTLINK) { + (*vpp)->v_type = VLNK; + } + + (*vpp)->v_mode = ext4_inode_get_mode(&fs->sb, inode_ref2._ref.inode); + + kprintf("[ext4] Looked up %s %s in directory with i-node:%ld as i-node:%d\n", + (*vpp)->v_type == VDIR ? "DIR" : ((*vpp)->v_type == VREG ? "FILE" : "SYMLINK"), + nm, dvp->v_ino, inode_no); + } else { + r = ENOENT; + } + + ext4_dir_destroy_result(&inode_ref._ref, &result); + + return r; +} + +static int +ext_dir_initialize(ext4_inode_ref *parent, ext4_inode_ref *child, bool dir_index_on) +{ + int r; +#if CONFIG_DIR_INDEX_ENABLE + /* Initialize directory index if supported */ + if (dir_index_on) { + kprintf("[ext4] DIR_INDEX on initializing directory with inode no:%d\n", child->index); + r = ext4_dir_dx_init(child, parent); + if (r != EOK) + return r; + + ext4_inode_set_flag(child->inode, EXT4_INODE_FLAG_INDEX); + } else +#endif + { + r = ext4_dir_add_entry(child, ".", strlen("."), child); + if (r != EOK) { + return r; + } + + r = ext4_dir_add_entry(child, "..", strlen(".."), parent); + if (r != EOK) { + ext4_dir_remove_entry(child, ".", strlen(".")); + return r; + } + } + + /*New empty directory. Two links (. and ..) */ + ext4_inode_set_links_cnt(child->inode, 2); + ext4_fs_inode_links_count_inc(parent); + parent->dirty = true; + child->dirty = true; + + return r; +} + +static int +ext_dir_link(struct vnode *dvp, char *name, int file_type, uint32_t *inode_no, uint32_t *inode_no_created) +{ + struct ext4_fs *fs = (struct ext4_fs *)dvp->v_mount->m_data; + auto_write_back wb(fs); + auto_inode_ref inode_ref(fs, dvp->v_ino); + if (inode_ref._r != EOK) { + return inode_ref._r; + } + + /* Check if node is directory */ + if (!ext4_inode_is_type(&fs->sb, inode_ref._ref.inode, EXT4_INODE_MODE_DIRECTORY)) { + return ENOTDIR; + } + + struct ext4_dir_search_result result; + int r = ext4_dir_find_entry(&result, &inode_ref._ref, name, strlen(name)); + ext4_dir_destroy_result(&inode_ref._ref, &result); + if (r == EOK) { + kprintf("[ext4] %s already exists under i-node %li\n", name, dvp->v_ino); + return EEXIST; + } + + struct ext4_inode_ref child_ref; + if (inode_no) { + r = ext4_fs_get_inode_ref(fs, *inode_no, &child_ref); + } else { + r = ext4_fs_alloc_inode(fs, &child_ref, file_type); + } + if (r != EOK) { + return r; + } + + if (!inode_no ) { + ext4_fs_inode_blocks_init(fs, &child_ref); + } + + r = ext4_dir_add_entry(&inode_ref._ref, name, strlen(name), &child_ref); + if (r == EOK) { + bool is_dir = ext4_inode_is_type(&fs->sb, child_ref.inode, EXT4_INODE_MODE_DIRECTORY); + if (is_dir && inode_no) { + r = EPERM; //Cannot create hard links for directories + } else if (is_dir) { +#if CONFIG_DIR_INDEX_ENABLE + bool dir_index_on = ext4_sb_feature_com(&fs->sb, EXT4_FCOM_DIR_INDEX); +#else + bool dir_index_on = false; +#endif + kprintf("[ext4] initializing directory %s with i-node:%d\n", name, child_ref.index); + r = ext_dir_initialize(&inode_ref._ref, &child_ref, dir_index_on); + if (r != EOK) { + ext4_dir_remove_entry(&inode_ref._ref, name, strlen(name)); + } + } else { + ext4_fs_inode_links_count_inc(&child_ref); + } + } + + if (r == EOK) { + struct timespec now; + clock_gettime(CLOCK_REALTIME, &now); + ext4_inode_set_change_inode_time(child_ref.inode, now.tv_sec); + if (!inode_no) { + ext4_inode_set_access_time(child_ref.inode, now.tv_sec); + ext4_inode_set_modif_time(child_ref.inode, now.tv_sec); + } + + ext4_inode_set_change_inode_time(inode_ref._ref.inode, now.tv_sec); + ext4_inode_set_modif_time(inode_ref._ref.inode, now.tv_sec); + + inode_ref._ref.dirty = true; + child_ref.dirty = true; + if (inode_no_created) { + *inode_no_created = child_ref.index; + } + kprintf("[ext4] created %s under i-node %li\n", name, dvp->v_ino); + } else { + if (!inode_no) { + ext4_fs_free_inode(&child_ref); + } + //We do not want to write new inode. But block has to be released. + kprintf("[ext4] failed to create %s under i-node %li due to error:%d!\n", name, dvp->v_ino, r); + child_ref.dirty = false; + } + + ext4_fs_put_inode_ref(&child_ref); + + return r; +} + +static int +ext_create(struct vnode *dvp, char *name, mode_t mode) +{ + kprintf("[ext4] create %s under i-node %li\n", name, dvp->v_ino); + + uint32_t len = strlen(name); + if (len > NAME_MAX || len > EXT4_DIRECTORY_FILENAME_LEN) { + return ENAMETOOLONG; + } + + if (!S_ISREG(mode)) + return EINVAL; + + return ext_dir_link(dvp, name, EXT4_DE_REG_FILE, nullptr, nullptr); +} + +static int +ext_trunc_inode(struct ext4_fs *fs, uint32_t index, uint64_t new_size) +{ + struct ext4_inode_ref inode_ref; + int r = ext4_fs_get_inode_ref(fs, index, &inode_ref); + if (r != EOK) + return r; + + uint64_t inode_size = ext4_inode_get_size(&fs->sb, inode_ref.inode); + ext4_fs_put_inode_ref(&inode_ref); +/* + bool has_trans = mp->fs.jbd_journal && mp->fs.curr_trans; + if (has_trans) + ext4_trans_stop(mp);*/ + + while (inode_size > new_size + CONFIG_MAX_TRUNCATE_SIZE) { + + inode_size -= CONFIG_MAX_TRUNCATE_SIZE; + + //ext4_trans_start(mp); + r = ext4_fs_get_inode_ref(fs, index, &inode_ref); + if (r != EOK) { + //ext4_trans_abort(mp); + break; + } + r = ext4_fs_truncate_inode(&inode_ref, inode_size); + if (r != EOK) + ext4_fs_put_inode_ref(&inode_ref); + else + r = ext4_fs_put_inode_ref(&inode_ref); + + if (r != EOK) { + //ext4_trans_abort(mp); + goto Finish; + }/* else + ext4_trans_stop(mp);*/ + } + + if (inode_size > new_size) { + inode_size = new_size; + + //ext4_trans_start(mp); + r = ext4_fs_get_inode_ref(fs, index, &inode_ref); + if (r != EOK) { + //ext4_trans_abort(mp); + goto Finish; + } + r = ext4_fs_truncate_inode(&inode_ref, inode_size); + if (r != EOK) + ext4_fs_put_inode_ref(&inode_ref); + else + r = ext4_fs_put_inode_ref(&inode_ref); +/* + if (r != EOK) + ext4_trans_abort(mp); + else + ext4_trans_stop(mp);*/ + + } + +Finish: + + /*if (has_trans) + ext4_trans_start(mp);*/ + + return r; +} + +static int +ext_dir_trunc(struct ext4_fs *fs, struct ext4_inode_ref *parent, struct ext4_inode_ref *dir) +{ + int r = EOK; + uint32_t block_size = ext4_sb_get_block_size(&fs->sb); + +#if CONFIG_DIR_INDEX_ENABLE + /* Initialize directory index if supported */ + if (ext4_sb_feature_com(&fs->sb, EXT4_FCOM_DIR_INDEX)) { + r = ext4_dir_dx_init(dir, parent); + if (r != EOK) + return r; + + r = ext_trunc_inode(fs, dir->index, + EXT4_DIR_DX_INIT_BCNT * block_size); + if (r != EOK) + return r; + } else +#endif + { + r = ext_trunc_inode(fs, dir->index, block_size); + if (r != EOK) + return r; + } + + return ext4_fs_truncate_inode(dir, 0); +} + +static int +ext_dir_remove_entry(struct vnode *dvp, struct vnode *vp, char *name) +{ + struct ext4_fs *fs = (struct ext4_fs *)dvp->v_mount->m_data; + auto_write_back wb(fs); + auto_inode_ref parent(fs, dvp->v_ino); + if (parent._r != EOK) { + return parent._r; + } + + auto_inode_ref child(fs, vp->v_ino); + if (child._r != EOK) { + return child._r; + } + + int r = EOK; + uint32_t inode_type = ext4_inode_type(&fs->sb, child._ref.inode); + if (inode_type != EXT4_INODE_MODE_DIRECTORY) { + if (ext4_inode_get_links_cnt(child._ref.inode) == 1) { + r = ext_trunc_inode(fs, child._ref.index, 0); + if (r != EOK) { + return r; + } + } + } else { + r = ext_dir_trunc(fs, &parent._ref, &child._ref); + if (r != EOK) { + return r; + } + } + + /* Remove entry from parent directory */ + r = ext4_dir_remove_entry(&parent._ref, name, strlen(name)); + if (r != EOK) { + return r; + } + + if (inode_type != EXT4_INODE_MODE_DIRECTORY) { + int links_cnt = ext4_inode_get_links_cnt(child._ref.inode); + if (links_cnt) { + ext4_fs_inode_links_count_dec(&child._ref); + child._ref.dirty = true; + + if (links_cnt == 1) {//Zero now + ext4_fs_free_inode(&child._ref); + } + } + } else { + ext4_fs_free_inode(&child._ref); + } + + if (r == EOK) { + struct timespec now; + clock_gettime(CLOCK_REALTIME, &now); + ext4_inode_set_change_inode_time(parent._ref.inode, now.tv_sec); + ext4_inode_set_modif_time(parent._ref.inode, now.tv_sec); + + parent._ref.dirty = true; + } + + return r; +} + +static int +ext_remove(struct vnode *dvp, struct vnode *vp, char *name) +{ + kprintf("[ext4] remove\n"); + return ext_dir_remove_entry(dvp, vp, name); +} + +static int +ext_rename(struct vnode *sdvp, struct vnode *svp, char *snm, + struct vnode *tdvp, struct vnode *tvp, char *tnm) +{ + kprintf("[ext4] rename\n"); + struct ext4_fs *fs = (struct ext4_fs *)sdvp->v_mount->m_data; + auto_write_back wb(fs); + + int r = EOK; + if (tvp) { + // Remove destination file, first ... if exists + kprintf("[ext4] rename removing %s from the target directory\n", tnm); + auto_inode_ref target_dir(fs, tdvp->v_ino); + if (target_dir._r != EOK) { + return target_dir._r; + } + /* Remove entry from target directory */ + r = ext4_dir_remove_entry(&target_dir._ref, tnm, strlen(tnm)); + if (r != EOK) { + return r; + } + } + + auto_inode_ref src_dir(fs, sdvp->v_ino); + if (src_dir._r != EOK) { + return src_dir._r; + } + + auto_inode_ref src_entry(fs, svp->v_ino); + if (src_entry._r != EOK) { + return src_entry._r; + } + + /* Same directory ? */ + if (sdvp == tdvp) { + // Add new entry to the same directory + r = ext4_dir_add_entry(&src_dir._ref, tnm, strlen(tnm), &src_entry._ref); + if (r != EOK) { + return r; + } + } else { + // Add new entry to the destination directory + auto_inode_ref dest_dir(fs, tdvp->v_ino); + if (dest_dir._r != EOK) { + return dest_dir._r; + } + + r = ext4_dir_add_entry(&dest_dir._ref, tnm, strlen(tnm), &src_entry._ref); + if (r != EOK) { + return r; + } + } + + // If directory need to reposition '..' to different parent - target directory + if (ext4_inode_is_type(&fs->sb, src_entry._ref.inode, EXT4_INODE_MODE_DIRECTORY)) { + auto_inode_ref dest_dir(fs, tdvp->v_ino); + if (dest_dir._r != EOK) { + return dest_dir._r; + } + + bool idx; + idx = ext4_inode_has_flag(src_entry._ref.inode, EXT4_INODE_FLAG_INDEX); + struct ext4_dir_search_result res; + if (!idx) { + r = ext4_dir_find_entry(&res, &src_entry._ref, "..", strlen("..")); + if (r != EOK) + return EIO; + + ext4_dir_en_set_inode(res.dentry, dest_dir._ref.index); + ext4_trans_set_block_dirty(res.block.buf); + r = ext4_dir_destroy_result(&src_entry._ref, &res); + if (r != EOK) + return r; + + } else { +#if CONFIG_DIR_INDEX_ENABLE + r = ext4_dir_dx_reset_parent_inode(&src_entry._ref, dest_dir._ref.index); + if (r != EOK) + return r; + +#endif + } + + ext4_fs_inode_links_count_inc(&dest_dir._ref); + dest_dir._ref.dirty = true; + } + + /* Remove old entry from the source directory */ + r = ext4_dir_remove_entry(&src_dir._ref, snm, strlen(snm)); + if (r != EOK) { + return r; + } + + return r; +} + +static int +ext_mkdir(struct vnode *dvp, char *dirname, mode_t mode) +{ + kprintf("[ext4] mkdir %s under i-node %li\n", dirname, dvp->v_ino); + + uint32_t len = strlen(dirname); + if (len > NAME_MAX || len > EXT4_DIRECTORY_FILENAME_LEN) { + return ENAMETOOLONG; + } + + if (!S_ISDIR(mode)) + return EINVAL; + + return ext_dir_link(dvp, dirname, EXT4_DE_DIR, nullptr, nullptr); +} + +static int +ext_rmdir(vnode_t *dvp, vnode_t *vp, char *name) +{ + kprintf("[ext4] rmdir\n"); + return ext_dir_remove_entry(dvp, vp, name); +} + +static int +ext_getattr(vnode_t *vp, vattr_t *vap) +{ + kprintf("[ext4] Getting attributes at i-node:%ld\n", vp->v_ino); + struct ext4_fs *fs = (struct ext4_fs *)vp->v_mount->m_data; + + auto_inode_ref inode_ref(fs, vp->v_ino); + if (inode_ref._r != EOK) { + return inode_ref._r; + } + + vap->va_mode = ext4_inode_get_mode(&fs->sb, inode_ref._ref.inode); + + uint32_t i_type = ext4_inode_type(&fs->sb, inode_ref._ref.inode); + if (i_type == EXT4_INODE_MODE_DIRECTORY) { + vap->va_type = VDIR; + } else if (i_type == EXT4_INODE_MODE_FILE) { + vap->va_type = VREG; + } else if (i_type == EXT4_INODE_MODE_SOFTLINK) { + vap->va_type = VLNK; + } + + vap->va_nodeid = vp->v_ino; + vap->va_size = ext4_inode_get_size(&fs->sb, inode_ref._ref.inode); + kprintf("[ext4] getattr: va_size:%ld\n", vap->va_size); + + vap->va_atime.tv_sec = ext4_inode_get_access_time(inode_ref._ref.inode); + vap->va_mtime.tv_sec = ext4_inode_get_modif_time(inode_ref._ref.inode); + vap->va_ctime.tv_sec = ext4_inode_get_change_inode_time(inode_ref._ref.inode); + + //auto *fsid = &vnode->v_mount->m_fsid; //TODO + //attr->va_fsid = ((uint32_t)fsid->__val[0]) | ((dev_t) ((uint32_t)fsid->__val[1]) << 32); + + return (EOK); +} + +static int +ext_setattr(vnode_t *vp, vattr_t *vap) +{ + kprintf("[ext4] setattr\n"); + struct ext4_fs *fs = (struct ext4_fs *)vp->v_mount->m_data; + + auto_write_back wb(fs); + auto_inode_ref inode_ref(fs, vp->v_ino); + if (inode_ref._r != EOK) { + return inode_ref._r; + } + + if (vap->va_mask & AT_ATIME) { + ext4_inode_set_access_time(inode_ref._ref.inode, vap->va_atime.tv_sec); + inode_ref._ref.dirty = true; + } + + if (vap->va_mask & AT_CTIME) { + ext4_inode_set_change_inode_time(inode_ref._ref.inode, vap->va_ctime.tv_sec); + inode_ref._ref.dirty = true; + } + + if (vap->va_mask & AT_MTIME) { + ext4_inode_set_modif_time(inode_ref._ref.inode, vap->va_mtime.tv_sec); + inode_ref._ref.dirty = true; + } + + if (vap->va_mask & AT_MODE) { + ext4_inode_set_mode(&fs->sb, inode_ref._ref.inode, vap->va_mode); + inode_ref._ref.dirty = true; + } + + return (EOK); +} + +static int +ext_truncate(struct vnode *vp, off_t new_size) +{ + kprintf("[ext4] truncate\n"); + struct ext4_fs *fs = (struct ext4_fs *)vp->v_mount->m_data; + auto_write_back wb(fs); + return ext_trunc_inode(fs, vp->v_ino, new_size); +} + +static int +ext_link(vnode_t *tdvp, vnode_t *svp, char *name) +{ + kprintf("[ext4] link\n"); + uint32_t len = strlen(name); + if (len > NAME_MAX || len > EXT4_DIRECTORY_FILENAME_LEN) { + return ENAMETOOLONG; + } + + uint32_t source_link_no = svp->v_ino; + return ext_dir_link(tdvp, name, EXT4_DE_REG_FILE, &source_link_no, nullptr); +} + +static int +ext_arc(vnode_t *vp, struct file* fp, uio_t *uio) +{ + kprintf("[ext4] arc\n"); + return (EINVAL); +} + +static int +ext_fallocate(vnode_t *vp, int mode, loff_t offset, loff_t len) +{ + kprintf("[ext4] fallocate\n"); + return (EINVAL); +} + +static int +ext_readlink(vnode_t *vp, uio_t *uio) +{ + kprintf("[ext4] readlink\n"); + if (vp->v_type != VLNK) { + return EINVAL; + } + if (uio->uio_offset < 0) { + return EINVAL; + } + if (uio->uio_resid == 0) { + return 0; + } + + struct ext4_fs *fs = (struct ext4_fs *)vp->v_mount->m_data; + + auto_inode_ref inode_ref(fs, vp->v_ino); + if (inode_ref._r != EOK) { + return inode_ref._r; + } + + uint64_t fsize = ext4_inode_get_size(&fs->sb, inode_ref._ref.inode); + if (fsize < sizeof(inode_ref._ref.inode->blocks) + && !ext4_inode_get_blocks_count(&fs->sb, inode_ref._ref.inode)) { + + char *content = (char *)inode_ref._ref.inode->blocks; + return uiomove(content, fsize, uio); + } else { + uint32_t block_size = ext4_sb_get_block_size(&fs->sb); + void *buf = malloc(block_size); + size_t read_count = 0; + int ret = ext_internal_read(fs, &inode_ref._ref, uio->uio_offset, buf, fsize, &read_count); + if (ret) { + kprintf("[ext_readlink] Error reading data\n"); + free(buf); + return ret; + } + + ret = uiomove(buf, read_count, uio); + free(buf); + return ret; + } +} + +static int +ext_fsymlink_set(struct ext4_fs *fs, uint32_t inode_no, const void *buf, uint32_t size) +{ + uint32_t block_size = ext4_sb_get_block_size(&fs->sb); + if (size > block_size) { + return EINVAL; + } + + auto_inode_ref inode_ref(fs, inode_no); + if (inode_ref._r != EOK) { + return inode_ref._r; + } + + /*If the size of symlink is smaller than 60 bytes*/ + if (size < sizeof(inode_ref._ref.inode->blocks)) { + memset(inode_ref._ref.inode->blocks, 0, sizeof(inode_ref._ref.inode->blocks)); + memcpy(inode_ref._ref.inode->blocks, buf, size); + ext4_inode_clear_flag(inode_ref._ref.inode, EXT4_INODE_FLAG_EXTENTS); + } else { + ext4_fs_inode_blocks_init(fs, &inode_ref._ref); + + uint32_t sblock; + ext4_fsblk_t fblock; + int r = ext4_fs_append_inode_dblk(&inode_ref._ref, &fblock, &sblock); + if (r != EOK) + return r; + + uint64_t off = fblock * block_size; + r = ext4_block_writebytes(fs->bdev, off, buf, size); + if (r != EOK) + return r; + } + + ext4_inode_set_size(inode_ref._ref.inode, size); + inode_ref._ref.dirty = true; + + return EOK; +} + +static int +ext_symlink(vnode_t *dvp, char *name, char *link) +{ + kprintf("[ext4] symlink\n"); + struct ext4_fs *fs = (struct ext4_fs *)dvp->v_mount->m_data; + auto_write_back wb(fs); + uint32_t inode_no_created; + int r = ext_dir_link(dvp, name, EXT4_DE_SYMLINK, nullptr, &inode_no_created); + if (r == EOK ) { + return ext_fsymlink_set(fs, inode_no_created, link, strlen(link)); + } + return r; +} + +#define ext_seek ((vnop_seek_t)vop_nullop) +#define ext_inactive ((vnop_inactive_t)vop_nullop) + +struct vnops ext_vnops = { + ext_open, /* open */ + ext_close, /* close */ + ext_read, /* read */ + ext_write, /* write */ + ext_seek, /* seek */ + ext_ioctl, /* ioctl */ + ext_fsync, /* fsync */ + ext_readdir, /* readdir */ + ext_lookup, /* lookup */ + ext_create, /* create */ + ext_remove, /* remove */ + ext_rename, /* rename */ + ext_mkdir, /* mkdir */ + ext_rmdir, /* rmdir */ + ext_getattr, /* getattr */ + ext_setattr, /* setattr */ + ext_inactive, /* inactive */ + ext_truncate, /* truncate */ + ext_link, /* link */ + ext_arc, /* arc */ + ext_fallocate, /* fallocate */ + ext_readlink, /* read link */ + ext_symlink, /* symbolic link */ +}; + diff --git a/modules/libext/module.py b/modules/libext/module.py new file mode 100644 index 0000000000..00f14154b8 --- /dev/null +++ b/modules/libext/module.py @@ -0,0 +1,3 @@ +from osv.modules import api + +api.require('lwext4') diff --git a/modules/libext/test.sh b/modules/libext/test.sh new file mode 100755 index 0000000000..1341b255ac --- /dev/null +++ b/modules/libext/test.sh @@ -0,0 +1,103 @@ +#!/bin/bash + +run_test() +{ + local TEST_COMMAND="$1" + local RUN_OPTIONS="$2" + echo "###### Running $1" + scripts/run.py --execute="--mount-fs=ext,/dev/vblk1,/data $TEST_COMMAND" --second-disk-image ./ext_images/ext4.img $RUN_OPTIONS +} + +if [[ "$1" != "" ]]; then + run_test "$1" "$2" + exit 0 +fi + +BASE='/data' +#BASE='/data/native-example' +#BASE='/data/TEST1' + +run_test "/find /data -ls" + +run_test "/mkdir -p $BASE" + +#Delete if exists, touch, stat and cat +run_test "/rm -f $BASE/JAJA" +run_test "/touch $BASE/JAJA" +run_test "/stat $BASE/JAJA" +run_test "/cat $BASE/JAJA" +run_test "/rm $BASE/JAJA" + +#Delete if exists, copy to test write, stat and cat +run_test "/rm -f $BASE/JAJA" +run_test "/cp /proc/mounts $BASE/JAJA" +run_test "/stat $BASE/JAJA" +run_test "/cat $BASE/JAJA" +run_test "/rm $BASE/JAJA" + +#Make empty dir, stat and list +run_test "/rm -rf $BASE/DIR" +run_test "/mkdir $BASE/DIR" +run_test "/stat $BASE/DIR" +run_test "/rmdir $BASE/DIR" + +#Make empty dir, stat and list +run_test "/rm -rf $BASE/DIR" +run_test "/mkdir $BASE/DIR" +run_test "/stat $BASE/DIR" +run_test "/ls -la $BASE/DIR" +run_test "/mkdir $BASE/DIR/SUBDIR" +run_test "/cp /proc/mounts $BASE/DIR/file1" +run_test "/touch $BASE/DIR/file2" +run_test "/cp $BASE/DIR/file1 $BASE/DIR/file3" +run_test "/find $BASE/DIR -ls" +run_test "/ls -la $BASE/DIR" +run_test "/stat -f $BASE/DIR" +run_test "/rm -rf $BASE/DIR" + +#run_test "$BASE/hello-static" + +#Test symlinks and hardlinks +run_test "/rm -rf $BASE/DIR" +run_test "/mkdir $BASE/DIR" +run_test "/ln -s /proc/mounts $BASE/DIR/symlink1" +run_test "/cat $BASE/DIR/symlink1" +run_test "/readlink -f $BASE/DIR/symlink1" +run_test "/touch $BASE/DIR/file1" +run_test "/ln $BASE/DIR/file1 $BASE/DIR/file1_hard" +run_test "/cp /proc/mounts $BASE/DIR/file1_hard" +run_test "/cat $BASE/DIR/file1" +run_test "/cat $BASE/DIR/file1_hard" +run_test "/ls -la $BASE/DIR" +run_test "/rm -rf $BASE/DIR" + +#Test rename +run_test "/rm -rf $BASE/DIR2" +run_test "/mkdir $BASE/DIR2" +run_test "/cp /proc/mounts $BASE/DIR2/file1" +run_test "/mv $BASE/DIR2/file1 $BASE/DIR2/file2" +run_test "/mkdir $BASE/DIR2/SUBDIR1" +run_test "/mkdir $BASE/DIR2/SUBDIR2" +run_test "/cp /proc/mounts $BASE/DIR2/SUBDIR2" +run_test "/mv $BASE/DIR2/SUBDIR2 $BASE/DIR2/SUBDIR1" +run_test "/mv $BASE/DIR2/SUBDIR1 $BASE/DIR2/SUBDIR3" +run_test "/mv $BASE/DIR2/file2 $BASE/DIR2/SUBDIR3" +run_test "/find $BASE/DIR2 -ls" + +#Test truncate +run_test "/rm -rf $BASE/DIR3" +run_test "/mkdir $BASE/DIR3" +run_test "/cp /proc/mounts $BASE/DIR3/file1" +run_test "/stat $BASE/DIR3/file1" +run_test "/truncate -s 64 $BASE/DIR3/file1" +run_test "/stat $BASE/DIR3/file1" +run_test "/rm -rf $BASE/DIR3" + +#Test "cp -rf" and "rm -rf" +run_test "/rm -rf $BASE/DIR4" +run_test "/mkdir $BASE/DIR4" +run_test "cp -rf /data/fs $BASE/DIR4" +run_test "find $BASE/DIR4 -ls" +run_test "stat $BASE/DIR4" +#run_test "rm -rf $BASE/DIR4/*" - does not work? +run_test "rm -rf $BASE/DIR4" diff --git a/modules/lwext4/.gitignore b/modules/lwext4/.gitignore new file mode 100644 index 0000000000..045951300c --- /dev/null +++ b/modules/lwext4/.gitignore @@ -0,0 +1 @@ +upstream diff --git a/modules/lwext4/Makefile b/modules/lwext4/Makefile new file mode 100644 index 0000000000..46ed2bee97 --- /dev/null +++ b/modules/lwext4/Makefile @@ -0,0 +1,36 @@ +src = $(shell readlink -f ../..) +module-dir = $(src)/modules/lwext4 + +VERSION_MAJOR = 1 +VERSION_MINOR = 0 +VERSION_PATCH = 0 + +VERSION = $(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH) + +BUILD_TYPE = Release + +COMMON_DEFINITIONS = \ + -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ + -DVERSION_MAJOR=$(VERSION_MAJOR) \ + -DVERSION_MINOR=$(VERSION_MINOR) \ + -DVERSION_PATCH=$(VERSION_PATCH) \ + -DVERSION=$(VERSION) \ + +all: module +module: lwext4 + +lwext4: upstream/lwext4/build_lib_only/src/liblwext4.so + +.PHONY: lwext4 + +upstream/lwext4/.git: + mkdir -p $(module-dir)/upstream && cd $(module-dir)/upstream && \ + git clone --depth 1 https://github.com/osvunikernel/lwext4.git + +upstream/lwext4/build_lib_only/src/liblwext4.so: upstream/lwext4/.git + cd $(module-dir)/upstream/lwext4 && \ + rm -R -f build_lib_only && mkdir build_lib_only && \ + cd build_lib_only && cmake $(COMMON_DEFINITIONS) -DLIB_ONLY=TRUE -DLWEXT4_BUILD_SHARED_LIB=ON .. && make + +clean: + cd $(module-dir) && rm -rf upstream diff --git a/modules/lwext4/usr.manifest b/modules/lwext4/usr.manifest new file mode 100644 index 0000000000..70b8fb2199 --- /dev/null +++ b/modules/lwext4/usr.manifest @@ -0,0 +1,9 @@ +# +# Copyright (C) 2024 Waldemar Kozaczuk +# +# This work is open source software, licensed under the terms of the +# BSD license as described in the LICENSE file in the top-level directory. +# + +[manifest] +/usr/lib/liblwext4.so: ${MODULE_DIR}/upstream/lwext4/build_lib_only/src/liblwext4.so