diff --git a/fs/tux3/Makefile b/fs/tux3/Makefile index 5ae93dc..bb4b66f 100644 --- a/fs/tux3/Makefile +++ b/fs/tux3/Makefile @@ -8,6 +8,6 @@ clean: else obj-$(CONFIG_TUX3) += tux3.o tux3-objs += balloc.o btree.o dir.o dleaf.o filemap.o hexdump.o iattr.o \ - ileaf.o inode.o super.o xattr.o + ileaf.o namei.o inode.o super.o xattr.o EXTRA_CFLAGS += -std=gnu99 -Wno-declaration-after-statement endif diff --git a/fs/tux3/balloc.c b/fs/tux3/balloc.c index 98ac7c4..2a99674 100644 --- a/fs/tux3/balloc.c +++ b/fs/tux3/balloc.c @@ -167,6 +167,7 @@ block_t bitmap_dump(struct inode *inode, block_t start, block_t count) block_t balloc_extent_from_range(struct inode *inode, block_t start, unsigned count, unsigned blocks) { trace("balloc %i blocks from [%Lx/%Lx]", blocks, (L)start, (L)count); + assert(blocks > 0); block_t limit = start + count; unsigned blocksize = 1 << tux_sb(inode->i_sb)->blockbits; unsigned mapshift = tux_sb(inode->i_sb)->blockbits + 3; @@ -224,7 +225,7 @@ block_t balloc_from_range(struct inode *inode, block_t start, block_t count) return balloc_extent_from_range(inode, start, count, 1); } -block_t balloc(SB) +block_t balloc(struct sb *sb) { trace_off("balloc block at goal %Lx", (L)sb->nextalloc); block_t goal = sb->nextalloc, total = sb->volblocks, block; @@ -238,7 +239,7 @@ found: return block; } -block_t balloc_extent(SB, unsigned blocks) +block_t balloc_extent(struct sb *sb, unsigned blocks) { trace_off("balloc %x blocks at goal %Lx", blocks, (L)sb->nextalloc); block_t goal = sb->nextalloc, total = sb->volblocks, block; @@ -252,7 +253,7 @@ found: return block; } -void bfree_extent(SB, block_t start, unsigned count) +void bfree_extent(struct sb *sb, block_t start, unsigned count) { unsigned mapshift = sb->blockbits + 3; unsigned mapmask = (1 << mapshift) - 1; @@ -276,7 +277,7 @@ eek: warn("extent 0x%Lx %s!\n", (L)start, why); } -void bfree(SB, block_t block) +void bfree(struct sb *sb, block_t block) { bfree_extent(sb, block, 1); } diff --git a/fs/tux3/btree.c b/fs/tux3/btree.c index 0a0daaa..57b9665 100644 --- a/fs/tux3/btree.c +++ b/fs/tux3/btree.c @@ -34,7 +34,7 @@ static inline unsigned bcount(struct bnode *node) return from_be_u32(node->count); } -static void free_block(SB, block_t block) +static void free_block(struct sb *sb, block_t block) { } @@ -76,36 +76,108 @@ static struct buffer_head *new_node(struct btree *btree) * for the leaf, which has its own specialized traversal algorithms. */ -static inline struct bnode *cursor_node(struct cursor cursor[], int level) +static inline struct bnode *cursor_node(struct cursor *cursor, int level) { - return bufdata(cursor[level].buffer); + return bufdata(cursor->path[level].buffer); } -void release_cursor(struct cursor cursor[], int depth) +struct buffer_head *cursor_leafbuf(struct cursor *cursor) { - for (int i = 0; i < depth; i++) - brelse(cursor[i].buffer); + assert(cursor->len >= 2); /* root-bnode + leaf >= 2 */ + return cursor->path[cursor->len - 1].buffer; +} + +static void level_root_add(struct cursor *cursor, struct buffer_head *buffer, + struct index_entry *next) +{ +#ifdef CURSOR_DEBUG + assert(cursor->len < cursor->maxlen); + assert(cursor->path[cursor->len].buffer == FREE_BUFFER); + assert(cursor->path[cursor->len].next == FREE_NEXT); +#endif + vecmove(cursor->path + 1, cursor->path, cursor->len); + cursor->len++; + cursor->path[0].buffer = buffer; + cursor->path[0].next = next; +} + +static void level_push(struct cursor *cursor, struct buffer_head *buffer, + struct index_entry *next) +{ +#ifdef CURSOR_DEBUG + assert(cursor->len < cursor->maxlen); + assert(cursor->path[cursor->len].buffer == FREE_BUFFER); + assert(cursor->path[cursor->len].next == FREE_NEXT); +#endif + cursor->path[cursor->len].buffer = buffer; + cursor->path[cursor->len].next = next; + cursor->len++; +} + +static struct buffer_head *level_pop(struct cursor *cursor) +{ + struct buffer_head *buffer; +#ifdef CURSOR_DEBUG + assert(cursor->len > 0); +#endif + cursor->len--; + buffer = cursor->path[cursor->len].buffer; +#ifdef CURSOR_DEBUG + cursor->path[cursor->len].buffer = FREE_BUFFER; + cursor->path[cursor->len].next = FREE_NEXT; +#endif + return buffer; +} + +static void level_pop_brelse(struct cursor *cursor) +{ + brelse(level_pop(cursor)); } -void show_cursor(struct cursor cursor[], int depth) +void release_cursor(struct cursor *cursor) +{ + while (cursor->len) + level_pop_brelse(cursor); +} + +void show_cursor(struct cursor *cursor, int depth) { printf(">>> cursor %p/%i:", cursor, depth); for (int i = 0; i < depth; i++) - printf(" [%Lx/%i]", (L)bufindex(cursor[i].buffer), bufcount(cursor[i].buffer)); + printf(" [%Lx/%i]", (L)bufindex(cursor->path[i].buffer), bufcount(cursor->path[i].buffer)); printf("\n"); } +static inline int alloc_cursor_size(int depth) +{ + return sizeof(struct cursor) + sizeof(struct path_level) * depth; +} + struct cursor *alloc_cursor(int depth) { - return malloc(sizeof(struct cursor) * depth); + struct cursor *cursor = malloc(alloc_cursor_size(depth)); + if (cursor) { + cursor->len = 0; +#ifdef CURSOR_DEBUG + cursor->maxlen = depth; + for (int i = 0; i < depth; i++) { + cursor->path[i].buffer = FREE_BUFFER; /* for debug */ + cursor->path[i].next = FREE_NEXT; /* for debug */ + } +#endif + } + return cursor; } -void free_cursor(struct cursor cursor[]) +void free_cursor(struct cursor *cursor) { +#ifdef CURSOR_DEBUG + assert(cursor->len == 0); +#endif free(cursor); } -int probe(BTREE, tuxkey_t key, struct cursor cursor[]) +int probe(struct btree *btree, tuxkey_t key, struct cursor *cursor) { unsigned i, depth = btree->root.depth; struct buffer_head *buffer = sb_bread(vfs_sb(btree->sb), btree->root.block); @@ -119,47 +191,47 @@ int probe(BTREE, tuxkey_t key, struct cursor cursor[]) if (from_be_u64(next->key) > key) break; //printf("probe level %i, %ti of %i\n", i, next - node->entries, bcount(node)); - cursor[i] = (struct cursor){ buffer, next }; + level_push(cursor, buffer, next); if (!(buffer = sb_bread(vfs_sb(btree->sb), from_be_u64((next - 1)->block)))) goto eek; node = (struct bnode *)bufdata(buffer); } assert((btree->ops->leaf_sniff)(btree, bufdata(buffer))); - cursor[depth] = (struct cursor){ buffer }; + level_push(cursor, buffer, NULL); return 0; eek: - release_cursor(cursor, i - 1); + release_cursor(cursor); return -EIO; /* stupid, it might have been NOMEM */ } -static inline int level_finished(struct cursor cursor[], int level) +static inline int level_finished(struct cursor *cursor, int level) { struct bnode *node = cursor_node(cursor, level); - return cursor[level].next == node->entries + bcount(node); + return cursor->path[level].next == node->entries + bcount(node); } // also write level_beginning!!! -int advance(BTREE, struct cursor cursor[]) +int advance(struct btree *btree, struct cursor *cursor) { int depth = btree->root.depth, level = depth; - struct buffer_head *buffer = cursor[level].buffer; - struct bnode *node; + struct buffer_head *buffer; do { - brelse(buffer); + level_pop_brelse(cursor); if (!level) return 0; - node = bufdata(buffer = cursor[--level].buffer); - //printf("pop to level %i, %tx of %x\n", level, cursor[level].next - node->entries, bcount(node)); + level--; } while (level_finished(cursor, level)); do { - //printf("push from level %i, %tx of %x\n", level, cursor[level].next - node->entries, bcount(node)); - if (!(buffer = sb_bread(vfs_sb(btree->sb), from_be_u64(cursor[level].next++->block)))) + buffer = sb_bread(vfs_sb(btree->sb), from_be_u64(cursor->path[level].next->block)); + if (!buffer) goto eek; - cursor[++level] = (struct cursor){ .buffer = buffer, .next = (node = bufdata(buffer))->entries }; + cursor->path[level].next++; + level_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); + level++; } while (level < depth); return 1; eek: - release_cursor(cursor, level); + release_cursor(cursor); return -EIO; } @@ -168,40 +240,43 @@ eek: * all the way to the end of the index block, there we find the key that * separates the subtree we are in (a leaf) from the next subtree to the right. */ -be_u64 *next_keyp(struct cursor cursor[], int depth) +be_u64 *next_keyp(struct cursor *cursor, int depth) { for (int level = depth; level--;) if (!level_finished(cursor, level)) - return &cursor[level].next->key; + return &cursor->path[level].next->key; return NULL; } -tuxkey_t next_key(struct cursor cursor[], int depth) +tuxkey_t next_key(struct cursor *cursor, int depth) { be_u64 *keyp = next_keyp(cursor, depth); return keyp ? from_be_u64(*keyp) : -1; } // also write this_key!!! -void show_tree_range(BTREE, tuxkey_t start, unsigned count) +void show_tree_range(struct btree *btree, tuxkey_t start, unsigned count) { printf("%i level btree at %Li:\n", btree->root.depth, (L)btree->root.block); - struct cursor cursor[30]; // check for overflow!!! + struct cursor *cursor = alloc_cursor(btree->root.depth + 1); + if (!cursor) + error("out of memory"); if (probe(btree, start, cursor)) error("tell me why!!!"); struct buffer_head *buffer; do { - buffer = cursor[btree->root.depth].buffer; + buffer = cursor_leafbuf(cursor); assert((btree->ops->leaf_sniff)(btree, bufdata(buffer))); (btree->ops->leaf_dump)(btree, bufdata(buffer)); //tuxkey_t *next = pnext_key(cursor, btree->depth); //printf("next key = %Lx:\n", next ? (L)*next : 0); } while (--count && advance(btree, cursor)); + free_cursor(cursor); } /* Deletion */ -static void brelse_free(SB, struct buffer_head *buffer) +static void brelse_free(struct sb *sb, struct buffer_head *buffer) { brelse(buffer); if (bufcount(buffer)) { @@ -212,17 +287,17 @@ static void brelse_free(SB, struct buffer_head *buffer) set_buffer_empty(buffer); // free it!!! (and need a buffer free state) } -static void remove_index(struct cursor cursor[], int level) +static void remove_index(struct cursor *cursor, int level) { struct bnode *node = cursor_node(cursor, level); int count = bcount(node), i; /* stomps the node count (if 0th key holds count) */ - memmove(cursor[level].next - 1, cursor[level].next, - (char *)&node->entries[count] - (char *)cursor[level].next); + memmove(cursor->path[level].next - 1, cursor->path[level].next, + (char *)&node->entries[count] - (char *)cursor->path[level].next); node->count = to_be_u32(count - 1); - --(cursor[level].next); - mark_buffer_dirty(cursor[level].buffer); + --(cursor->path[level].next); + mark_buffer_dirty(cursor->path[level].buffer); /* no separator for last entry */ if (level_finished(cursor, level)) @@ -234,13 +309,13 @@ static void remove_index(struct cursor cursor[], int level) * Climb the cursor while at first entry, bail out at root * find the node with the old sep, set it to deleted key */ - if (cursor[level].next == node->entries && level) { - be_u64 sep = (cursor[level].next)->key; - for (i = level - 1; cursor[i].next - 1 == cursor_node(cursor, i)->entries; i--) + if (cursor->path[level].next == node->entries && level) { + be_u64 sep = (cursor->path[level].next)->key; + for (i = level - 1; cursor->path[i].next - 1 == cursor_node(cursor, i)->entries; i--) if (!i) return; - (cursor[i].next - 1)->key = sep; - mark_buffer_dirty(cursor[i].buffer); + (cursor->path[i].next - 1)->key = sep; + mark_buffer_dirty(cursor->path[i].buffer); } } @@ -250,31 +325,35 @@ static void merge_nodes(struct bnode *node, struct bnode *node2) node->count = to_be_u32(bcount(node) + bcount(node2)); } -int delete_from_leaf(BTREE, vleaf *leaf, struct delete_info *info) +int delete_from_leaf(struct btree *btree, vleaf *leaf, struct delete_info *info) { - (btree->ops->leaf_chop)(btree, info->key, leaf); - return 0; + return (btree->ops->leaf_chop)(btree, info->key, leaf); } -int tree_chop(BTREE, struct delete_info *info, millisecond_t deadline) +int tree_chop(struct btree *btree, struct delete_info *info, millisecond_t deadline) { int depth = btree->root.depth, level = depth - 1, suspend = 0; - struct cursor *cursor, *prev; - struct buffer_head *leafbuf, *leafprev = NULL; + struct cursor *cursor; + struct buffer_head *leafbuf, **prev, *leafprev = NULL; struct btree_ops *ops = btree->ops; struct sb *sb = btree->sb; + int ret; cursor = alloc_cursor(depth + 1); - prev = alloc_cursor(depth + 1); - memset(prev, 0, sizeof(struct cursor) * (depth + 1)); + prev = malloc(sizeof(*prev) * depth); + memset(prev, 0, sizeof(*prev) * depth); probe(btree, info->resume, cursor); - leafbuf = cursor[depth].buffer; + leafbuf = level_pop(cursor); /* leaf walk */ while (1) { - if (delete_from_leaf(btree, bufdata(leafbuf), info)) + ret = delete_from_leaf(btree, bufdata(leafbuf), info); + if (ret) { mark_buffer_dirty(leafbuf); + if (ret < 0) + goto out; + } /* try to merge this leaf with prev */ if (leafprev) { @@ -307,10 +386,10 @@ keep_prev_leaf: /* pop and try to merge finished nodes */ while (suspend || level_finished(cursor, level)) { /* try to merge node with prev */ - if (prev[level].buffer) { + if (prev[level]) { assert(level); /* node has no prev */ struct bnode *this = cursor_node(cursor, level); - struct bnode *that = cursor_node(prev, level); + struct bnode *that = bufdata(prev[level]); trace_off("check node %p against %p", this, that); trace_off("this count = %i prev count = %i", bcount(this), bcount(that)); /* try to merge with node to left */ @@ -318,67 +397,70 @@ keep_prev_leaf: trace(">>> can merge node %p into node %p", this, that); merge_nodes(that, this); remove_index(cursor, level - 1); - mark_buffer_dirty(prev[level].buffer); - brelse_free(sb, cursor[level].buffer); + mark_buffer_dirty(prev[level]); + brelse_free(sb, level_pop(cursor)); //dirty_buffer_count_check(sb); goto keep_prev_node; } - brelse(prev[level].buffer); + brelse(prev[level]); } - prev[level].buffer = cursor[level].buffer; + prev[level] = level_pop(cursor); keep_prev_node: /* deepest key in the cursor is the resume address */ if (suspend == -1 && !level_finished(cursor, level)) { suspend = 1; /* only set resume once */ - info->resume = from_be_u64((cursor[level].next)->key); + info->resume = from_be_u64((cursor->path[level].next)->key); } if (!level) { /* remove depth if possible */ - while (depth > 1 && bcount(cursor_node(prev, 0)) == 1) { + while (depth > 1 && bcount(bufdata(prev[0])) == 1) { trace("drop btree level"); - btree->root.block = bufindex(prev[1].buffer); - brelse_free(sb, prev[0].buffer); + btree->root.block = bufindex(prev[1]); + brelse_free(sb, prev[0]); //dirty_buffer_count_check(sb); depth = --btree->root.depth; - memcpy(prev, prev + 1, depth * sizeof(prev[0])); + vecmove(prev, prev + 1, depth); //set_sb_dirty(sb); } - brelse(leafprev); - release_cursor(prev, depth); - free_cursor(cursor); - free_cursor(prev); //sb->snapmask &= ~snapmask; delete_snapshot_from_disk(); //set_sb_dirty(sb); //save_sb(sb); - return suspend; + ret = suspend; + goto out; } level--; - trace_off(printf("pop to level %i, block %Lx, %i of %i nodes\n", level, cursor[level].buffer->index, cursor[level].next - cursor_node(cursor, level)->entries, bcount(cursor_node(cursor, level)));); + trace_off(printf("pop to level %i, block %Lx, %i of %i nodes\n", level, bufindex(cursor->path[level].buffer), cursor->path[level].next - cursor_node(cursor, level)->entries, bcount(cursor_node(cursor, level)));); } /* push back down to leaf level */ while (level < depth - 1) { - struct buffer_head *buffer = sb_bread(vfs_sb(sb), from_be_u64(cursor[level++].next++->block)); + struct buffer_head *buffer = sb_bread(vfs_sb(sb), from_be_u64(cursor->path[level++].next++->block)); if (!buffer) { - brelse(leafprev); - release_cursor(cursor, level - 1); - free_cursor(cursor); - free_cursor(prev); - return -ENOMEM; + ret = -EIO; + goto out; } - cursor[level].buffer = buffer; - cursor[level].next = ((struct bnode *)bufdata(buffer))->entries; + level_push(cursor, buffer, ((struct bnode *)bufdata(buffer))->entries); trace_off(printf("push to level %i, block %Lx, %i nodes\n", level, bufindex(buffer), bcount(cursor_node(cursor, level)));); - }; + } //dirty_buffer_count_check(sb); /* go to next leaf */ - if (!(leafbuf = sb_bread(vfs_sb(sb), from_be_u64(cursor[level].next++->block)))) { - release_cursor(cursor, level); - free_cursor(cursor); - free_cursor(prev); - return -ENOMEM; + if (!(leafbuf = sb_bread(vfs_sb(sb), from_be_u64(cursor->path[level].next++->block)))) { + ret = -EIO; + goto out; } } + +out: + if (leafprev) + brelse(leafprev); + for (int i = 0; i < btree->root.depth; i++) { + if (prev[i]) + brelse(prev[i]); + } + free(prev); + release_cursor(cursor); + free_cursor(cursor); + return ret; } /* Insertion */ @@ -391,13 +473,13 @@ static void add_child(struct bnode *node, struct index_entry *p, block_t child, node->count = to_be_u32(bcount(node) + 1); } -int insert_node(struct btree *btree, u64 childkey, block_t childblock, struct cursor cursor[]) +int insert_node(struct btree *btree, u64 childkey, block_t childblock, struct cursor *cursor) { trace("insert node 0x%Lx key 0x%Lx into node 0x%Lx", (L)childblock, (L)childkey, (L)btree->root.block); int depth = btree->root.depth; while (depth--) { - struct index_entry *next = cursor[depth].next; - struct buffer_head *parentbuf = cursor[depth].buffer; + struct index_entry *next = cursor->path[depth].next; + struct buffer_head *parentbuf = cursor->path[depth].buffer; struct bnode *parent = bufdata(parentbuf); /* insert and exit if not full */ @@ -442,42 +524,42 @@ int insert_node(struct btree *btree, u64 childkey, block_t childblock, struct cu newroot->entries[1].key = to_be_u64(childkey); newroot->entries[1].block = to_be_u64(childblock); btree->root.block = bufindex(newbuf); - vecmove(cursor + 1, cursor, btree->root.depth++ + 1); - cursor[0] = (struct cursor){ .buffer = newbuf }; // .next = ??? + btree->root.depth++; + level_root_add(cursor, newbuf, NULL); // .next = ??? //set_sb_dirty(sb); mark_buffer_dirty(newbuf); return 0; eek: - release_cursor(cursor, depth + 1); + release_cursor(cursor); return -ENOMEM; } -int btree_leaf_split(struct btree *btree, struct cursor cursor[], tuxkey_t key) +int btree_leaf_split(struct btree *btree, struct cursor *cursor, tuxkey_t key) { trace("split leaf"); - struct buffer_head *leafbuf = cursor[btree->root.depth].buffer; struct buffer_head *newbuf = new_leaf(btree); if (!newbuf) { /* the rule: release cursor at point of error */ - release_cursor(cursor, btree->root.depth + 1); + release_cursor(cursor); return -ENOMEM; } + struct buffer_head *leafbuf = cursor_leafbuf(cursor); u64 newkey = (btree->ops->leaf_split)(btree, key, bufdata(leafbuf), bufdata(newbuf)); block_t childblock = bufindex(newbuf); trace_off("use upper? %Li %Li", key, newkey); if (key >= newkey) { - struct buffer_head *swap = leafbuf; - leafbuf = cursor[btree->root.depth].buffer = newbuf; - newbuf = swap; - } - brelse_dirty(newbuf); + mark_buffer_dirty(leafbuf); + level_pop_brelse(cursor); + level_push(cursor, newbuf, NULL); + } else + brelse_dirty(newbuf); return insert_node(btree, newkey, childblock, cursor); } -void *tree_expand(struct btree *btree, tuxkey_t key, unsigned newsize, struct cursor cursor[]) +void *tree_expand(struct btree *btree, tuxkey_t key, unsigned newsize, struct cursor *cursor) { for (int i = 0; i < 2; i++) { - struct buffer_head *leafbuf = cursor[btree->root.depth].buffer; + struct buffer_head *leafbuf = cursor_leafbuf(cursor); void *space = (btree->ops->leaf_resize)(btree, key, bufdata(leafbuf), newsize); if (space) return space; @@ -491,7 +573,7 @@ void *tree_expand(struct btree *btree, tuxkey_t key, unsigned newsize, struct cu return NULL; } -struct btree new_btree(SB, struct btree_ops *ops) +struct btree new_btree(struct sb *sb, struct btree_ops *ops) { struct btree btree = { .sb = sb, .ops = ops }; struct buffer_head *rootbuf = new_node(&btree); diff --git a/fs/tux3/dir.c b/fs/tux3/dir.c index e3af9ce..cf09172 100644 --- a/fs/tux3/dir.c +++ b/fs/tux3/dir.c @@ -154,7 +154,7 @@ create: mark_inode_dirty(dir); offset = (void *)entry - bufdata(buffer); brelse_dirty(buffer); - return (block << blockbits) + offset; + return (block << blockbits) + offset; /* only needed for xattr create */ } tux_dirent *tux_find_entry(struct inode *dir, const char *name, int len, struct buffer_head **result) @@ -162,15 +162,21 @@ tux_dirent *tux_find_entry(struct inode *dir, const char *name, int len, struct unsigned reclen = TUX_REC_LEN(len); unsigned blocksize = 1 << tux_sb(dir->i_sb)->blockbits; unsigned blocks = dir->i_size >> tux_sb(dir->i_sb)->blockbits, block; + int err = -ENOENT; for (block = 0; block < blocks; block++) { struct buffer_head *buffer = blockread(mapping(dir), block); + if (!buffer) { + err = -EIO; // need ERR_PTR for blockread!!! + goto error; + } tux_dirent *entry = bufdata(buffer); tux_dirent *limit = (void *)entry + blocksize - reclen; while (entry <= limit) { if (entry->rec_len == 0) { brelse(buffer); warn("zero length entry at <%Lx:%x>", (L)tux_inode(dir)->inum, block); - return NULL; + err = -EIO; + goto error; } if (tux_match(entry, name, len)) { *result = buffer; @@ -180,8 +186,9 @@ tux_dirent *tux_find_entry(struct inode *dir, const char *name, int len, struct } brelse(buffer); } - *result = NULL; - return NULL; +error: + *result = NULL; /* for debug */ + return ERR_PTR(err); } static unsigned char filetype[TUX_TYPES] = { @@ -195,7 +202,7 @@ static unsigned char filetype[TUX_TYPES] = { [TUX_LNK] = DT_LNK, }; -static int tux_readdir(struct file *file, void *state, filldir_t filldir) +int tux_readdir(struct file *file, void *state, filldir_t filldir) { loff_t pos = file->f_pos; #ifdef __KERNEL__ @@ -255,6 +262,7 @@ static int tux_readdir(struct file *file, void *state, filldir_t filldir) int tux_delete_entry(struct buffer_head *buffer, tux_dirent *entry) { + struct inode *dir = buffer_inode(buffer); tux_dirent *prev = NULL, *this = bufdata(buffer); while ((char *)this < (char *)entry) { if (this->rec_len == 0) { @@ -271,83 +279,48 @@ int tux_delete_entry(struct buffer_head *buffer, tux_dirent *entry) memset(entry->name, 0, entry->name_len); entry->name_len = entry->type = 0; entry->inum = to_be_u32(0); - brelse(buffer); + brelse_dirty(buffer); + dir->i_ctime = dir->i_mtime = gettime(); + mark_inode_dirty(dir); return 0; } -#ifdef __KERNEL__ -static struct dentry *tux_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) +int tux_dir_is_empty(struct inode *dir) { + unsigned blockbits = tux_sb(dir->i_sb)->blockbits; + unsigned blocks = dir->i_size >> blockbits, blocksize = 1 << blockbits; + /* FIXME: will overflow on 32bit arch */ + be_u32 self = to_be_u32(tux_inode(dir)->inum); struct buffer_head *buffer; - struct inode *inode = NULL; - tux_dirent *dirent; - dirent = tux_find_entry(dir, dentry->d_name.name, dentry->d_name.len, - &buffer); - if (dirent) { - inode = tux3_iget(dir->i_sb, from_be_u32(dirent->inum)); - brelse(buffer); - if (IS_ERR(inode)) - return ERR_CAST(inode); - } - return d_splice_alias(inode, dentry); -} - -static int tux3_create(struct inode *dir, struct dentry *dentry, int mode, - struct nameidata *nd) -{ - struct inode *inode; - loff_t where; - int err; - - inode = tux_create_inode(dir, mode); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); - goto error; - } + for (unsigned block = 0; block < blocks; block++) { + buffer = blockread(mapping(dir), block); + if (!buffer) + return -EIO; - where = tux_create_entry(dir, dentry->d_name.name, dentry->d_name.len, - tux_inode(inode)->inum, mode); - if (where < 0) { - err = where; - goto error; + tux_dirent *entry = bufdata(buffer); + tux_dirent *limit = bufdata(buffer) + blocksize - TUX_REC_LEN(1); + for (; entry <= limit; entry = next_entry(entry)) { + if (!entry->rec_len) { + warn("zero length entry at <%Lx:%x>", (L)tux_inode(dir)->inum, block); + goto not_empty; + } + if (is_deleted(entry)) + continue; + if (entry->name[0] != '.') + goto not_empty; + if (entry->name_len > 2) + goto not_empty; + if (entry->name_len < 2) { + if (entry->inum != self) + goto not_empty; + } else if (entry->name[1] != '.') + goto not_empty; + } + brelse(buffer); } - - d_instantiate(dentry, inode); + return 1; +not_empty: + brelse(buffer); return 0; - -error: - /* FIXME: we may want to call purge_inum() here */ - inode_dec_link_count(inode); - iput(inode); - return err; } - -const struct file_operations tux_dir_fops = { - .llseek = generic_file_llseek, - .read = generic_read_dir, - .readdir = tux_readdir, -}; - -const struct inode_operations tux_dir_iops = { - .create = tux3_create, - .lookup = tux_lookup, -// .link = ext3_link, -// .unlink = ext3_unlink, -// .symlink = ext3_symlink, -// .mkdir = ext3_mkdir, -// .rmdir = ext3_rmdir, -// .mknod = ext3_mknod, -// .rename = ext3_rename, -// .setattr = ext3_setattr, -// .setxattr = generic_setxattr, -// .getxattr = generic_getxattr, -// .listxattr = ext3_listxattr, -// .removexattr = generic_removexattr, -// .permission = ext3_permission, - /* FIXME: why doesn't ext4 support this for directory? */ -// .fallocate = ext4_fallocate, -// .fiemap = ext4_fiemap, -}; -#endif /* !__KERNEL__ */ diff --git a/fs/tux3/dleaf.c b/fs/tux3/dleaf.c index 00b87c3..a09d865 100644 --- a/fs/tux3/dleaf.c +++ b/fs/tux3/dleaf.c @@ -68,7 +68,7 @@ static inline struct dleaf *to_dleaf(vleaf *leaf) return leaf; } -int dleaf_init(BTREE, vleaf *leaf) +int dleaf_init(struct btree *btree, vleaf *leaf) { if (!leaf) return -1; @@ -79,39 +79,39 @@ int dleaf_init(BTREE, vleaf *leaf) return 0; } -int dleaf_sniff(BTREE, vleaf *leaf) +int dleaf_sniff(struct btree *btree, vleaf *leaf) { return from_be_u16(to_dleaf(leaf)->magic) == 0x1eaf; } -unsigned dleaf_free(BTREE, vleaf *leaf) +unsigned dleaf_free(struct btree *btree, vleaf *leaf) { return from_be_u16(to_dleaf(leaf)->used) - from_be_u16(to_dleaf(leaf)->free); } -unsigned dleaf_need(BTREE, struct dleaf *leaf) +unsigned dleaf_need(struct btree *btree, struct dleaf *leaf) { return btree->sb->blocksize - dleaf_free(btree, leaf) - sizeof(struct dleaf); } -int dleaf_free2(BTREE, void *vleaf) +int dleaf_free2(struct btree *btree, void *vleaf) { struct dleaf *leaf = vleaf; struct group *gdict = (void *)leaf + btree->sb->blocksize, *gstop = gdict - dleaf_groups(leaf); struct entry *edict = (void *)gstop, *entry = edict; - struct extent *extents = leaf->table; + struct diskextent *extents = leaf->table; for (struct group *group = gdict; group-- > gstop;) extents += entry_limit(entry -= group_count(group)); return (void *)entry - (void *)extents; } -void dleaf_dump(BTREE, vleaf *vleaf) +void dleaf_dump(struct btree *btree, vleaf *vleaf) { unsigned blocksize = btree->sb->blocksize; struct dleaf *leaf = vleaf; struct group *groups = (void *)leaf + blocksize, *grbase = --groups - dleaf_groups(leaf); struct entry *entries = (void *)(grbase + 1), *entry = entries; - struct extent *extents = leaf->table; + struct diskextent *extents = leaf->table; printf("%i entry groups:\n", dleaf_groups(leaf)); for (struct group *group = groups; group > grbase; group--) { @@ -127,7 +127,7 @@ void dleaf_dump(BTREE, vleaf *vleaf) if (count < 0) printf(" "); else for (int i = 0; i < count; i++) { - struct extent extent = extents[offset + i]; + struct diskextent extent = extents[offset + i]; printf(" %Lx", (L)extent_block(extent)); if (extent_count(extent)) printf("/%x", extent_count(extent)); @@ -159,13 +159,13 @@ void dleaf_dump(BTREE, vleaf *vleaf) * But it does truncate so it is getting checked in just for now. */ -int dleaf_chop(BTREE, tuxkey_t chop, vleaf *vleaf) +int dleaf_chop(struct btree *btree, tuxkey_t chop, vleaf *vleaf) { struct dleaf *leaf = vleaf; struct group *gdict = (void *)leaf + btree->sb->blocksize, *group = gdict; struct entry *entry = (void *)(--group - dleaf_groups(leaf)); struct group *gstop = group - dleaf_groups(leaf); - struct entry *estop = entry - group_count(group); + struct entry *estop = entry - group_count(group), *eend = NULL; unsigned extents = 0, start = 0, trunc = 0; unsigned newgroups = dleaf_groups(leaf); @@ -175,11 +175,13 @@ int dleaf_chop(BTREE, tuxkey_t chop, vleaf *vleaf) while (1) { unsigned count = entry_limit(entry) - start; tuxkey_t key = get_index(group, entry); + /* FIXME: even if key < chop, we may truncate partially */ if (key >= chop) { if (!trunc) { int removed = entry - estop, remaining = group_count(group) - removed; newgroups = gdict - group - !remaining; inc_group_count(group, - removed); + eend = entry + 1; trunc = 1; } for (int i = 0; i < count; i++) @@ -194,18 +196,23 @@ int dleaf_chop(BTREE, tuxkey_t chop, vleaf *vleaf) estop = entry - group_count(group); start = 0; } + if (!trunc) + return 0; + unsigned tamp = (dleaf_groups(leaf) - newgroups) * sizeof(struct group); unsigned tail = (void *)(gdict - newgroups) - ((void *)entry + tamp); memmove((void *)entry + tamp, entry, tail); set_dleaf_groups(leaf, newgroups); - return 0; + leaf->free = to_be_u16(from_be_u16(leaf->free) - extents * sizeof(struct diskextent)); + leaf->used = to_be_u16(tamp + ((void *)eend - (void *)leaf)); + return 1; } -int dleaf_check(BTREE, struct dleaf *leaf) +int dleaf_check(struct btree *btree, struct dleaf *leaf) { struct group *gdict = (void *)leaf + btree->sb->blocksize, *gstop = gdict - dleaf_groups(leaf); struct entry *edict = (void *)gstop, *entry = edict; - struct extent *extents = leaf->table; + struct diskextent *extents = leaf->table; unsigned excount = 0, encount = 0; char *why; @@ -231,7 +238,7 @@ eek: return -1; } -tuxkey_t dleaf_split(BTREE, tuxkey_t key, vleaf *from, vleaf *into) +tuxkey_t dleaf_split(struct btree *btree, tuxkey_t key, vleaf *from, vleaf *into) { assert(dleaf_sniff(btree, from)); struct dleaf *leaf = from, *dest = into; @@ -292,7 +299,7 @@ tuxkey_t dleaf_split(BTREE, tuxkey_t key, vleaf *from, vleaf *into) return get_index(destgroups - 1, destentries - 1); } -void dleaf_merge(BTREE, struct dleaf *leaf, struct dleaf *from) +void dleaf_merge(struct btree *btree, struct dleaf *leaf, struct dleaf *from) { struct group *groups = (void *)leaf + btree->sb->blocksize, *grbase = groups - dleaf_groups(leaf); struct entry *entries = (void *)grbase; @@ -384,7 +391,7 @@ int dwalk_probe(struct dleaf *leaf, unsigned blocksize, struct dwalk *walk, tuxk struct entry *edict = (struct entry *)(gdict - dleaf_groups(leaf)); struct group *gstop = gdict - dleaf_groups(leaf), *group = gdict; struct entry *estop = edict, *entry; - struct extent *exbase = leaf->table; + struct diskextent *exbase = leaf->table; if (dleaf_groups(leaf)) while (--group >= gstop) { @@ -404,7 +411,7 @@ int dwalk_probe(struct dleaf *leaf, unsigned blocksize, struct dwalk *walk, tuxk exbase += entry_limit(estop); } - struct extent *extent = exbase, *exstop = exbase; + struct diskextent *extent = exbase, *exstop = exbase; //trace("group %i entry %i of %i", gdict - 1 - group, estop + group_count(group) - 1 - entry, group_count(group)); if (!dleaf_groups(leaf) || group < gstop) entry = estop; @@ -446,7 +453,7 @@ tuxkey_t dwalk_index(struct dwalk *walk) * NOTE: ->group and ->entry is still not updated, so dwalk_index() will * return the index for the returned extent. */ -struct extent *dwalk_next(struct dwalk *walk) +struct diskextent *dwalk_next(struct dwalk *walk) { if (!dleaf_groups(walk->leaf)) return NULL; @@ -538,7 +545,7 @@ void dwalk_chop(struct dwalk *walk) // do we ever need this? #define MAX_GROUP_ENTRIES 7 #endif -int dwalk_mock(struct dwalk *walk, tuxkey_t index, struct extent extent) +int dwalk_mock(struct dwalk *walk, tuxkey_t index, struct diskextent extent) { if (!dleaf_groups(walk->leaf) || walk->entry == walk->estop || dwalk_index(walk) != index) { trace("add entry 0x%Lx", (L)index); @@ -561,7 +568,7 @@ int dwalk_mock(struct dwalk *walk, tuxkey_t index, struct extent extent) return 0; } -int dwalk_pack(struct dwalk *walk, tuxkey_t index, struct extent extent) +int dwalk_pack(struct dwalk *walk, tuxkey_t index, struct diskextent extent) { trace("group %ti/%i", walk->gstop + dleaf_groups(walk->leaf) - 1 - walk->group, dleaf_groups(walk->leaf)); //printf("at entry %ti/%i\n", walk->estop + group_count(walk->group) - 1 - walk->entry, group_count(walk->group)); diff --git a/fs/tux3/filemap.c b/fs/tux3/filemap.c index 592f64d..5b8ddc0 100644 --- a/fs/tux3/filemap.c +++ b/fs/tux3/filemap.c @@ -75,7 +75,7 @@ int filemap_extent_io(struct buffer_head *buffer, int write) block_t start, limit; guess_extent(buffer, &start, &limit, write); printf("---- extent 0x%Lx/%Lx ----\n", (L)start, (L)limit - start); - struct cursor *cursor = alloc_cursor(depth + 1); + struct cursor *cursor = alloc_cursor(depth + 2); /* +1 for new depth */ if (!cursor) return -ENOMEM; @@ -88,14 +88,14 @@ retry: /* do not overlap next leaf */ if (limit > next_key(cursor, depth)) limit = next_key(cursor, depth); - struct dleaf *leaf = bufdata(cursor[depth].buffer); + struct dleaf *leaf = bufdata(cursor_leafbuf(cursor)); struct dwalk *walk = &(struct dwalk){ }; -dleaf_dump(&tux_inode(inode)->btree, leaf); + dleaf_dump(&tux_inode(inode)->btree, leaf); /* Probe below io start to include overlapping extents */ dwalk_probe(leaf, sb->blocksize, walk, 0); // start at beginning of leaf just for now /* skip extents below start */ - for (struct extent *extent; (extent = dwalk_next(walk));) + for (struct diskextent *extent; (extent = dwalk_next(walk));) if (dwalk_index(walk) + extent_count(*extent) > start) { if (dwalk_index(walk) <= start) dwalk_back(walk); @@ -103,7 +103,7 @@ dleaf_dump(&tux_inode(inode)->btree, leaf); } struct dwalk rewind = *walk; printf("prior extents:"); - for (struct extent *extent; (extent = dwalk_next(walk));) + for (struct diskextent *extent; (extent = dwalk_next(walk));) printf(" 0x%Lx => %Lx/%x;", (L)dwalk_index(walk), (L)extent_block(*extent), extent_count(*extent)); printf("\n"); @@ -111,9 +111,9 @@ dleaf_dump(&tux_inode(inode)->btree, leaf); printf("---- rewind to 0x%Lx => %Lx/%x ----\n", (L)dwalk_index(&rewind), (L)extent_block(*rewind.extent), extent_count(*rewind.extent)); *walk = rewind; - struct extent *next_extent = NULL; + struct diskextent *next_extent = NULL; block_t index = start, offset = 0; - struct extent seg[1000]; + struct diskextent seg[1000]; unsigned segs = 0; while (index < limit) { trace("index %Lx, limit %Lx", (L)index, (L)limit); @@ -125,6 +125,8 @@ dleaf_dump(&tux_inode(inode)->btree, leaf); if (start > dwalk_index(walk)) count -= start - dwalk_index(walk); index += count; + next_extent = NULL; + continue; } block_t next_index = limit; if ((next_extent = dwalk_next(walk))) { @@ -179,6 +181,7 @@ dleaf_dump(&tux_inode(inode)->btree, leaf); assert(!try); if ((err = btree_leaf_split(&tux_inode(inode)->btree, cursor, 0))) goto eek; + depth = tux_inode(inode)->btree.root.depth; try = 1; goto retry; } @@ -191,7 +194,7 @@ dleaf_dump(&tux_inode(inode)->btree, leaf); dwalk_pack(walk, index, make_extent(extent_block(seg[i]), extent_count(seg[i]))); index += extent_count(seg[i]); } - mark_buffer_dirty(cursor[tux_inode(inode)->btree.root.depth].buffer); + mark_buffer_dirty(cursor_leafbuf(cursor)); //dleaf_dump(&tux_inode(inode)->btree, leaf); /* assert we used exactly the expected space */ @@ -224,7 +227,7 @@ dleaf_dump(&tux_inode(inode)->btree, leaf); index += count; skip = 0; } - release_cursor(cursor, depth + 1); + release_cursor(cursor); free_cursor(cursor); return err; nospace: @@ -252,7 +255,7 @@ int tux3_get_block(struct inode *inode, sector_t iblock, } block_t start = iblock, limit = iblock + max_blocks; - struct cursor *cursor = alloc_cursor(depth + 1); + struct cursor *cursor = alloc_cursor(depth + 2); /* +1 for new depth */ if (!cursor) return -ENOMEM; @@ -265,14 +268,14 @@ retry: /* do not overlap next leaf */ if (limit > next_key(cursor, depth)) limit = next_key(cursor, depth); - struct dleaf *leaf = bufdata(cursor[depth].buffer); + struct dleaf *leaf = bufdata(cursor_leafbuf(cursor)); struct dwalk *walk = &(struct dwalk){ }; dleaf_dump(&tux_inode(inode)->btree, leaf); /* Probe below io start to include overlapping extents */ dwalk_probe(leaf, sbi->blocksize, walk, 0); // start at beginning of leaf just for now /* skip extents below start */ - for (struct extent *extent; (extent = dwalk_next(walk));) + for (struct diskextent *extent; (extent = dwalk_next(walk));) if (dwalk_index(walk) + extent_count(*extent) > start) { if (dwalk_index(walk) <= start) dwalk_back(walk); @@ -280,9 +283,9 @@ retry: } struct dwalk rewind = *walk; - struct extent *next_extent = NULL; + struct diskextent *next_extent = NULL; block_t index = start, offset = 0; - struct extent seg[10]; + struct diskextent seg[10]; unsigned segs = 0, update_dtree = 0; while (index < limit && segs < ARRAY_SIZE(seg)) { trace("index %Lx, limit %Lx", (L)index, (L)limit); @@ -329,6 +332,7 @@ retry: seg[segs++] = make_extent(block, gap); update_dtree = 1; + inode->i_blocks += 1 << (sbi->blockbits - 9); set_buffer_new(bh_result); map_bh(bh_result, inode->i_sb, block); break; @@ -347,6 +351,7 @@ retry: assert(!try); if ((err = btree_leaf_split(&tux_inode(inode)->btree, cursor, 0))) goto eek; + depth = tux_inode(inode)->btree.root.depth; try = 1; goto retry; } @@ -359,7 +364,7 @@ retry: dwalk_pack(walk, index, make_extent(extent_block(seg[i]), extent_count(seg[i]))); index += extent_count(seg[i]); } - mark_buffer_dirty(cursor[tux_inode(inode)->btree.root.depth].buffer); + mark_buffer_dirty(cursor_leafbuf(cursor)); //dleaf_dump(&tux_inode(inode)->btree, leaf); /* assert we used exactly the expected space */ @@ -386,7 +391,7 @@ out: (L)tux_inode(inode)->inum, buffer_mapped(bh_result), (L)bh_result->b_blocknr, bh_result->b_size); - release_cursor(cursor, depth + 1); + release_cursor(cursor); free_cursor(cursor); return err; diff --git a/fs/tux3/iattr.c b/fs/tux3/iattr.c index e69b352..9841a56 100644 --- a/fs/tux3/iattr.c +++ b/fs/tux3/iattr.c @@ -2,7 +2,6 @@ * Inode table attributes * * Original copyright (c) 2008 Daniel Phillips - * Portions copyright (c) 2006-2008 Google Inc. * Licensed under the GPL version 3 * * By contributing changes to this file you grant the original copyright holder @@ -57,6 +56,7 @@ int attr_check(void *attrs, unsigned size) void dump_attrs(struct inode *inode) { //printf("present = %x\n", inode->present); + tuxnode_t *tuxnode = tux_inode(inode); for (int kind = 0; kind < 32; kind++) { if (!(tux_inode(inode)->present & (1 << kind))) continue; @@ -65,7 +65,7 @@ void dump_attrs(struct inode *inode) printf("mode 0%.6o uid %x gid %x ", inode->i_mode, inode->i_uid, inode->i_gid); break; case DATA_BTREE_ATTR: - printf("root %Lx:%u ", (L)tux_inode(inode)->btree.root.block, tux_inode(inode)->btree.root.depth); + printf("root %Lx:%u ", (L)tuxnode->btree.root.block, tuxnode->btree.root.depth); break; case CTIME_SIZE_ATTR: printf("ctime %Lx size %Lx ", (L)tuxtime(inode->i_ctime), (L)inode->i_size); @@ -89,10 +89,11 @@ void dump_attrs(struct inode *inode) void *encode_attrs(struct inode *inode, void *attrs, unsigned size) { - //printf("encode %u attr bytes\n", size); + trace_off("encode %u attr bytes", size); + tuxnode_t *tuxnode = tux_inode(inode); void *limit = attrs + size - 3; for (int kind = MIN_ATTR; kind < VAR_ATTRS; kind++) { - if (!(tux_inode(inode)->present & (1 << kind))) + if (!(tuxnode->present & (1 << kind))) continue; if (attrs >= limit) break; @@ -110,9 +111,8 @@ void *encode_attrs(struct inode *inode, void *attrs, unsigned size) case MTIME_ATTR: attrs = encode48(attrs, tuxtime(inode->i_mtime) >> TIME_ATTR_SHIFT); break; - case DATA_BTREE_ATTR:; - struct root *root = &tux_inode(inode)->btree.root; - attrs = encode64(attrs, ((u64)root->depth << 48) | root->block); + case DATA_BTREE_ATTR: + attrs = encode64(attrs, pack_root(&tuxnode->btree.root)); break; case LINK_COUNT_ATTR: attrs = encode32(attrs, inode->i_nlink); @@ -124,10 +124,11 @@ void *encode_attrs(struct inode *inode, void *attrs, unsigned size) void *decode_attrs(struct inode *inode, void *attrs, unsigned size) { - //printf("decode %u attr bytes\n", size); + trace_off("decode %u attr bytes", size); u64 v64; u32 v32; - struct xattr *xattr = tux_inode(inode)->xcache ? tux_inode(inode)->xcache->xattrs : NULL; + tuxnode_t *tuxnode = tux_inode(inode); + struct xattr *xattr = tuxnode->xcache ? tuxnode->xcache->xattrs : NULL; void *limit = attrs + size; while (attrs < limit - 1) { unsigned head; @@ -157,9 +158,12 @@ void *decode_attrs(struct inode *inode, void *attrs, unsigned size) break; case DATA_BTREE_ATTR: attrs = decode64(attrs, &v64); - tux_inode(inode)->btree = (struct btree){ .sb = tux_sb(inode->i_sb), .entries_per_leaf = 64, // !!! should depend on blocksize + tuxnode->btree = (struct btree){ + .sb = tux_sb(inode->i_sb), + .entries_per_leaf = 64, // !!! should depend on blocksize .ops = &dtree_ops, - .root = { .block = v64 & (-1ULL >> 16), .depth = v64 >> 48 } }; + .root = unpack_root(v64), + }; break; case LINK_COUNT_ATTR: attrs = decode32(attrs, &inode->i_nlink); @@ -171,18 +175,18 @@ void *decode_attrs(struct inode *inode, void *attrs, unsigned size) attrs = decode16(attrs, &atom); *xattr = (struct xattr){ .atom = atom, .size = bytes - 2 }; unsigned xsize = sizeof(struct xattr) + xattr->size; - assert((void *)xattr + xsize <= (void *)tux_inode(inode)->xcache + tux_inode(inode)->xcache->maxsize); + assert((void *)xattr + xsize <= (void *)tuxnode->xcache + tuxnode->xcache->maxsize); memcpy(xattr->body, attrs, xattr->size); attrs += xattr->size; - tux_inode(inode)->xcache->size += xsize; + tuxnode->xcache->size += xsize; xattr = xcache_next(xattr); // check limit!!! break; default: return NULL; } - tux_inode(inode)->present |= 1 << kind; + tuxnode->present |= 1 << kind; } - if (!(tux_inode(inode)->present & MTIME_BIT)) + if (!(tuxnode->present & MTIME_BIT)) inode->i_mtime = inode->i_ctime; return attrs; } diff --git a/fs/tux3/ileaf.c b/fs/tux3/ileaf.c index 6982fca..4caba7c 100644 --- a/fs/tux3/ileaf.c +++ b/fs/tux3/ileaf.c @@ -41,33 +41,33 @@ static inline tuxkey_t ibase(struct ileaf *leaf) return from_be_u64(leaf->ibase); } -int ileaf_init(BTREE, vleaf *leaf) +int ileaf_init(struct btree *btree, vleaf *leaf) { printf("initialize inode leaf %p\n", leaf); *(struct ileaf *)leaf = (struct ileaf){ to_be_u16(0x90de) }; return 0; } -int ileaf_sniff(BTREE, vleaf *leaf) +int ileaf_sniff(struct btree *btree, vleaf *leaf) { return ((struct ileaf *)leaf)->magic == to_be_u16(0x90de); } -unsigned ileaf_need(BTREE, vleaf *vleaf) +unsigned ileaf_need(struct btree *btree, vleaf *vleaf) { be_u16 *dict = vleaf + btree->sb->blocksize; unsigned count = icount(to_ileaf(vleaf)); return atdict(dict, count) + count * sizeof(*dict); } -unsigned ileaf_free(BTREE, vleaf *leaf) +unsigned ileaf_free(struct btree *btree, vleaf *leaf) { return btree->sb->blocksize - ileaf_need(btree, leaf) - sizeof(struct ileaf); } -void ileaf_dump(BTREE, vleaf *vleaf) +void ileaf_dump(struct btree *btree, vleaf *vleaf) { - SB = btree->sb; + struct sb *sb = btree->sb; struct ileaf *leaf = vleaf; inum_t inum = ibase(leaf); be_u16 *dict = vleaf + sb->blocksize; @@ -99,7 +99,7 @@ void ileaf_dump(BTREE, vleaf *vleaf) } } -void *ileaf_lookup(BTREE, inum_t inum, struct ileaf *leaf, unsigned *result) +void *ileaf_lookup(struct btree *btree, inum_t inum, struct ileaf *leaf, unsigned *result) { assert(inum >= ibase(leaf)); assert(inum < ibase(leaf) + btree->entries_per_leaf); @@ -116,7 +116,7 @@ void *ileaf_lookup(BTREE, inum_t inum, struct ileaf *leaf, unsigned *result) return attrs; } -int isinorder(BTREE, struct ileaf *leaf) +int isinorder(struct btree *btree, struct ileaf *leaf) { be_u16 *dict = (void *)leaf + btree->sb->blocksize; for (int i = 0, offset = 0, limit; --i >= -icount(leaf); offset = limit) @@ -125,7 +125,7 @@ int isinorder(BTREE, struct ileaf *leaf) return 1; } -int ileaf_check(BTREE, struct ileaf *leaf) +int ileaf_check(struct btree *btree, struct ileaf *leaf) { char *why; why = "not an inode table leaf"; @@ -140,7 +140,7 @@ eek: return -1; } -void ileaf_trim(BTREE, struct ileaf *leaf) { +void ileaf_trim(struct btree *btree, struct ileaf *leaf) { be_u16 *dict = (void *)leaf + btree->sb->blocksize; while (icount(leaf) > 1 && *(dict - icount(leaf)) == *(dict - icount(leaf) + 1)) leaf->count = to_be_u16(from_be_u16(leaf->count) - 1); @@ -150,7 +150,7 @@ void ileaf_trim(BTREE, struct ileaf *leaf) { #define SPLIT_AT_INUM -tuxkey_t ileaf_split(BTREE, tuxkey_t inum, vleaf *from, vleaf *into) +tuxkey_t ileaf_split(struct btree *btree, tuxkey_t inum, vleaf *from, vleaf *into) { assert(ileaf_sniff(btree, from)); struct ileaf *leaf = from, *dest = into; @@ -194,7 +194,7 @@ tuxkey_t ileaf_split(BTREE, tuxkey_t inum, vleaf *from, vleaf *into) return ibase(dest); } -void ileaf_merge(BTREE, struct ileaf *leaf, struct ileaf *from) +void ileaf_merge(struct btree *btree, struct ileaf *leaf, struct ileaf *from) { if (!icount(from)) return; @@ -209,7 +209,7 @@ void ileaf_merge(BTREE, struct ileaf *leaf, struct ileaf *from) add_idict(dict - i, from_be_u16(*(dict - at))); } -void *ileaf_resize(BTREE, tuxkey_t inum, vleaf *base, unsigned newsize) +void *ileaf_resize(struct btree *btree, tuxkey_t inum, vleaf *base, unsigned newsize) { assert(ileaf_sniff(btree, base)); struct ileaf *leaf = base; @@ -240,7 +240,7 @@ void *ileaf_resize(BTREE, tuxkey_t inum, vleaf *base, unsigned newsize) return attrs; } -inum_t find_empty_inode(BTREE, struct ileaf *leaf, inum_t goal) +inum_t find_empty_inode(struct btree *btree, struct ileaf *leaf, inum_t goal) { assert(goal >= ibase(leaf)); goal -= ibase(leaf); @@ -256,7 +256,7 @@ inum_t find_empty_inode(BTREE, struct ileaf *leaf, inum_t goal) return i + ibase(leaf); } -int ileaf_purge(BTREE, inum_t inum, struct ileaf *leaf) +int ileaf_purge(struct btree *btree, inum_t inum, struct ileaf *leaf) { if (inum < ibase(leaf) || inum - ibase(leaf) >= btree->entries_per_leaf) return -EINVAL; diff --git a/fs/tux3/inode.c b/fs/tux3/inode.c index 0db4839..1726ff0 100644 --- a/fs/tux3/inode.c +++ b/fs/tux3/inode.c @@ -2,7 +2,6 @@ * Tux3 versioning filesystem in user space * * Original copyright (c) 2008 Daniel Phillips - * Portions copyright (c) 2006-2008 Google Inc. * Licensed under the GPL version 3 * * By contributing changes to this file you grant the original copyright holder @@ -15,7 +14,7 @@ #define trace trace_on #endif -int store_attrs(struct inode *inode, struct cursor cursor[]) +int store_attrs(struct inode *inode, struct cursor *cursor) { unsigned size = encode_asize(tux_inode(inode)->present) + encode_xsize(inode); void *base = tree_expand(&tux_sb(inode->i_sb)->itable, tux_inode(inode)->inum, size, cursor); @@ -24,7 +23,7 @@ int store_attrs(struct inode *inode, struct cursor cursor[]) void *attr = encode_attrs(inode, base, size); attr = encode_xattrs(inode, attr, base + size - attr); assert(attr == base + size); - mark_buffer_dirty(cursor[tux_sb(inode->i_sb)->itable.root.depth].buffer); + mark_buffer_dirty(cursor_leafbuf(cursor)); return 0; } @@ -53,9 +52,9 @@ int store_attrs(struct inode *inode, struct cursor cursor[]) int make_inode(struct inode *inode, struct tux_iattr *iattr) { - SB = tux_sb(inode->i_sb); + struct sb *sb = tux_sb(inode->i_sb); int err = -ENOENT, depth = sb->itable.root.depth; - struct cursor *cursor = alloc_cursor(depth + 1); + struct cursor *cursor = alloc_cursor(depth + 2); /* +1 for now depth */ if (!cursor) return -ENOMEM; @@ -63,7 +62,7 @@ int make_inode(struct inode *inode, struct tux_iattr *iattr) free_cursor(cursor); return err; } - struct buffer_head *leafbuf = cursor[depth].buffer; + struct buffer_head *leafbuf = cursor_leafbuf(cursor); // struct ileaf *leaf = to_ileaf(bufdata(leafbuf)); trace("create inode 0x%Lx", (L)tux_inode(inode)->inum); @@ -95,15 +94,15 @@ int make_inode(struct inode *inode, struct tux_iattr *iattr) inode->i_nlink = 1; tux_inode(inode)->inum = inum; tux_inode(inode)->btree = new_btree(sb, &dtree_ops); // error??? - tux_inode(inode)->present = CTIME_SIZE_BIT|MODE_OWNER_BIT|DATA_BTREE_BIT; + tux_inode(inode)->present = CTIME_SIZE_BIT|MODE_OWNER_BIT|DATA_BTREE_BIT|LINK_COUNT_BIT; if ((err = store_attrs(inode, cursor))) - goto eek; - release_cursor(cursor, depth + 1); + goto errout; + release_cursor(cursor); free_cursor(cursor); return 0; -eek: - release_cursor(cursor, depth + 1); + errout: + /* release_cursor() was already called at error point */ free_cursor(cursor); warn("make_inode 0x%Lx failed (%d)", (L)tux_inode(inode)->inum, err); return err; @@ -111,7 +110,7 @@ errout: static int open_inode(struct inode *inode) { - SB = tux_sb(inode->i_sb); + struct sb *sb = tux_sb(inode->i_sb); int err, depth = sb->itable.root.depth; struct cursor *cursor = alloc_cursor(depth + 1); if (!cursor) @@ -122,13 +121,13 @@ static int open_inode(struct inode *inode) return err; } unsigned size; - void *attrs = ileaf_lookup(&sb->itable, tux_inode(inode)->inum, bufdata(cursor[depth].buffer), &size); + void *attrs = ileaf_lookup(&sb->itable, tux_inode(inode)->inum, bufdata(cursor_leafbuf(cursor)), &size); if (!attrs) { err = -ENOENT; goto eek; } trace("found inode 0x%Lx", (L)tux_inode(inode)->inum); - //ileaf_dump(&sb->itable, cursor[depth].buffer->data); + //ileaf_dump(&sb->itable, bufdata(cursor[depth].buffer)); //hexdump(attrs, size); unsigned xsize = decode_xsize(inode, attrs, size); err = -ENOMEM; @@ -140,7 +139,7 @@ static int open_inode(struct inode *inode) xcache_dump(inode); err = 0; eek: - release_cursor(cursor, depth + 1); + release_cursor(cursor); free_cursor(cursor); return err; } @@ -148,9 +147,9 @@ eek: int save_inode(struct inode *inode) { trace("save inode 0x%Lx", (L)tux_inode(inode)->inum); - SB = tux_sb(inode->i_sb); + struct sb *sb = tux_sb(inode->i_sb); int err, depth = sb->itable.root.depth; - struct cursor *cursor = alloc_cursor(depth + 1); + struct cursor *cursor = alloc_cursor(depth + 2); /* +1 for new depth */ if (!cursor) return -ENOMEM; @@ -159,15 +158,19 @@ int save_inode(struct inode *inode) return err; } unsigned size; - if (!(ileaf_lookup(&sb->itable, tux_inode(inode)->inum, bufdata(cursor[depth].buffer), &size))) + if (!(ileaf_lookup(&sb->itable, tux_inode(inode)->inum, bufdata(cursor_leafbuf(cursor)), &size))) return -EINVAL; err = store_attrs(inode, cursor); - release_cursor(cursor, depth + 1); + if (err) + goto error; + /* release_cursor() was already called at error point */ + release_cursor(cursor); +error: free_cursor(cursor); return err; } -int purge_inum(BTREE, inum_t inum) +static int purge_inum(struct btree *btree, inum_t inum) { int err = -ENOENT, depth = btree->root.depth; struct cursor *cursor = alloc_cursor(depth + 1); @@ -175,15 +178,72 @@ int purge_inum(BTREE, inum_t inum) return -ENOMEM; if (!(err = probe(btree, inum, cursor))) { - struct ileaf *ileaf = to_ileaf(bufdata(cursor[depth].buffer)); + /* FIXME: truncate the bnode and leaf if empty. */ + struct buffer_head *ileafbuf = cursor_leafbuf(cursor); + struct ileaf *ileaf = to_ileaf(bufdata(ileafbuf)); err = ileaf_purge(btree, inum, ileaf); - release_cursor(cursor, depth + 1); + if (!err) + mark_buffer_dirty(ileafbuf); + release_cursor(cursor); } free_cursor(cursor); return err; } #ifdef __KERNEL__ +static int tux_can_truncate(struct inode *inode) +{ + if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + return 0; + if (S_ISREG(inode->i_mode)) + return 1; + if (S_ISDIR(inode->i_mode)) + return 1; + if (S_ISLNK(inode->i_mode)) + return 1; + return 0; +} + +static void tux3_truncate(struct inode *inode) +{ + struct sb *sb = tux_sb(inode->i_sb); + struct delete_info del_info = { + .key = (inode->i_size + sb->blockmask) >> sb->blockbits, + }; + int err; + + if (!tux_can_truncate(inode)) + return; + /* FIXME: must fix dleaf_chop bug, and expand size */ + WARN_ON(inode->i_size); + block_truncate_page(inode->i_mapping, inode->i_size, tux3_get_block); + err = tree_chop(&tux_inode(inode)->btree, &del_info, 0); + inode->i_blocks = ((inode->i_size + sb->blockmask) + & ~(loff_t)sb->blockmask) >> 9; + inode->i_mtime = inode->i_ctime = gettime(); + mark_inode_dirty(inode); +} + +void tux3_delete_inode(struct inode *inode) +{ + struct sb *sb = tux_sb(inode->i_sb); + inum_t inum = tux_inode(inode)->inum; + + truncate_inode_pages(&inode->i_data, 0); + if (is_bad_inode(inode)) { + clear_inode(inode); + return; + } + inode->i_size = 0; + if (inode->i_blocks) + tux3_truncate(inode); + + /* clear_inode() before freeing this ino. */ + clear_inode(inode); + + purge_inum(&sb->itable, inum); +} + void tux3_clear_inode(struct inode *inode) { if (tux_inode(inode)->xcache) @@ -216,7 +276,7 @@ static const struct file_operations tux_file_fops = { }; static const struct inode_operations tux_file_iops = { -// .truncate = ext4_truncate, + .truncate = tux3_truncate, // .permission = ext4_permission, // .setattr = ext4_setattr, // .getattr = ext4_getattr @@ -260,8 +320,8 @@ static void tux3_setup_inode(struct inode *inode) mapping_set_gfp_mask(inode->i_mapping, GFP_USER); break; case S_IFLNK: -// inode->i_op = &tux_symlink_iops; -// inode->i_mapping->a_ops = &tux_aops; + inode->i_op = &page_symlink_inode_operations; + inode->i_mapping->a_ops = &tux_aops; break; case 0: /* FIXME: bitmap, vtable, atable doesn't have S_IFMT */ @@ -293,8 +353,11 @@ struct inode *tux_create_inode(struct inode *dir, int mode) iattr.ctime = iattr.mtime = iattr.atime = gettime(); int err = make_inode(inode, &iattr); - if (err) + if (err) { + make_bad_inode(inode); + iput(inode); return ERR_PTR(err); + } tux3_setup_inode(inode); insert_inode_hash(inode); return inode; diff --git a/fs/tux3/namei.c b/fs/tux3/namei.c new file mode 100644 index 0000000..73f0e28 --- /dev/null +++ b/fs/tux3/namei.c @@ -0,0 +1,209 @@ +#include "tux3.h" + +static struct dentry *tux3_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct buffer_head *buffer; + struct inode *inode; + tux_dirent *entry; + + entry = tux_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &buffer); + if (IS_ERR(entry)) { + if (PTR_ERR(entry) != -ENOENT) + return ERR_CAST(entry); + inode = NULL; + goto out; + } + inode = tux3_iget(dir->i_sb, from_be_u32(entry->inum)); + brelse(buffer); + if (IS_ERR(inode)) + return ERR_CAST(inode); +out: + return d_splice_alias(inode, dentry); +} + +static int tux_add_dirent(struct inode *dir, struct dentry *dentry, struct inode *inode) +{ + loff_t where; + + where = tux_create_entry(dir, dentry->d_name.name, dentry->d_name.len, + tux_inode(inode)->inum, inode->i_mode); + if (where < 0) + return where; + d_instantiate(dentry, inode); + return 0; +} + +static int tux_del_dirent(struct inode *dir, struct dentry *dentry) +{ + struct buffer_head *buffer; + tux_dirent *entry = tux_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &buffer); + return IS_ERR(entry) ? PTR_ERR(entry) : tux_delete_entry(buffer, entry); +} + +static int tux3_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) +{ + struct inode *inode; + int err; + + inode = tux_create_inode(dir, mode); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + err = tux_add_dirent(dir, dentry, inode); + if (!err) + return 0; + inode_dec_link_count(inode); + iput(inode); + } + return err; +} + +static int tux3_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + return tux3_create(dir, dentry, S_IFDIR | mode, NULL); +} + +static int tux3_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + int err; + + if (inode->i_nlink >= TUX_LINK_MAX) + return -EMLINK; + + inode->i_ctime = gettime(); + inode_inc_link_count(inode); + atomic_inc(&inode->i_count); + err = tux_add_dirent(dir, dentry, inode); + if (err) { + inode_dec_link_count(inode); + iput(inode); + } + return err; +} + +static int tux3_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct inode *inode; + int err; + + inode = tux_create_inode(dir, S_IFLNK | S_IRWXUGO); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + err = page_symlink(inode, symname, strlen(symname) + 1); + if (!err) { + err = tux_add_dirent(dir, dentry, inode); + if (!err) + return 0; + } + inode_dec_link_count(inode); + iput(inode); + } + return err; +} + +static int tux3_unlink(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + int err = tux_del_dirent(dir, dentry); + + if (!err) { + inode->i_ctime = dir->i_ctime; + inode_dec_link_count(inode); + } + return err; +} + +static int tux3_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct buffer_head *old_buffer, *new_buffer; + tux_dirent *old_de, *new_de = NULL; + + old_de = tux_find_entry(old_dir, old_dentry->d_name.name, + old_dentry->d_name.len, &old_buffer); + if (IS_ERR(old_de)) + return PTR_ERR(old_de); + + if (new_inode) { + int err = -ENOTEMPTY; + if (!tux_dir_is_empty(new_inode)) + return err; + + new_de = tux_find_entry(new_dir, new_dentry->d_name.name, + new_dentry->d_name.len, &new_buffer); + if (IS_ERR(new_de)) + return PTR_ERR(old_de); + + if ((err = tux_delete_entry(new_buffer, new_de))) + return err; + + new_inode->i_ctime = new_dentry->d_parent->d_inode->i_ctime; + inode_dec_link_count(new_inode); + err = tux_create_entry(new_dentry->d_parent->d_inode, + new_dentry->d_name.name, + new_dentry->d_name.len, + tux_inode(old_inode)->inum, + old_inode->i_mode); + + if (err) + return err; + + } else { + int err = tux_add_dirent(new_dentry->d_parent->d_inode, new_dentry, old_inode); + if (err) + return err; + } + old_inode->i_ctime = gettime(); + tux_delete_entry(old_buffer, old_de); + return 0; +} + +static int tux3_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + int err = -ENOTEMPTY; + + if (tux_dir_is_empty(inode)) { + + err = tux_del_dirent(dir, dentry); + + if (!err) { + inode->i_ctime = dir->i_ctime; + inode->i_size = 0; + inode_dec_link_count(inode); + inode_dec_link_count(dir); + } + } + return err; +} + +const struct file_operations tux_dir_fops = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .readdir = tux_readdir, +}; + +const struct inode_operations tux_dir_iops = { + .create = tux3_create, + .lookup = tux3_lookup, + .link = tux3_link, + .unlink = tux3_unlink, + .symlink = tux3_symlink, + .mkdir = tux3_mkdir, + .rmdir = tux3_rmdir, +// .mknod = ext3_mknod, + .rename = tux3_rename, +// .setattr = ext3_setattr, +// .setxattr = generic_setxattr, +// .getxattr = generic_getxattr, +// .listxattr = ext3_listxattr, +// .removexattr = generic_removexattr, +// .permission = ext3_permission, + /* FIXME: why doesn't ext4 support this for directory? */ +// .fallocate = ext4_fallocate, +// .fiemap = ext4_fiemap, +}; diff --git a/fs/tux3/super.c b/fs/tux3/super.c index f7cb316..bfe933b 100644 --- a/fs/tux3/super.c +++ b/fs/tux3/super.c @@ -4,30 +4,77 @@ * Portions copyright (c) 2008, Maciej Zenczykowski */ +#ifdef __KERNEL__ #include #include #include #include #include #include +#endif #include "tux3.h" +static int unpack_sb(struct sb *sb, struct disksuper *super, int silent) +{ + if (memcmp(super->magic, (char[])SB_MAGIC, sizeof(super->magic))) { + if (!silent) + printf("invalid superblock [%Lx]\n", + (L)from_be_u64(*(be_u64 *)super->magic)); + return -EINVAL; + } + + int blockbits = from_be_u16(super->blockbits); + + sb->itable = (struct btree){ + .sb = sb, + .ops = &itable_ops, + .root = unpack_root(from_be_u64(super->iroot)), + .entries_per_leaf = 1 << (blockbits - 6), + }; +// sb->rootbuf; + sb->blockbits = blockbits; + sb->blocksize = 1 << blockbits; + sb->blockmask = (1 << blockbits) - 1; + /* FIXME: those should be initialized based on blocksize. */ + sb->entries_per_node = 20; + sb->max_inodes_per_block = 64; +// sb->version; + sb->atomref_base = 1 << (40 - sb->blockbits); // see xattr.c + sb->unatom_base = sb->atomref_base + (1 << (34 - sb->blockbits)); + sb->volblocks = from_be_u64(super->volblocks); + sb->freeblocks = from_be_u64(super->freeblocks); + sb->nextalloc = from_be_u64(super->nextalloc); + sb->atomgen = from_be_u32(super->atomgen); + sb->freeatom = from_be_u32(super->freeatom); + + return 0; +} + +static void pack_sb(struct sb *sb, struct disksuper *super) +{ + super->blockbits = to_be_u16(sb->blockbits); + super->volblocks = to_be_u64(sb->volblocks); + super->nextalloc = to_be_u64(sb->nextalloc); // probably does not belong here + super->freeatom = to_be_u32(sb->freeatom); // probably does not belong here + super->atomgen = to_be_u32(sb->atomgen); // probably does not belong here + super->freeblocks = to_be_u64(sb->freeblocks); // probably does not belong here + super->iroot = to_be_u64(pack_root(&sb->itable.root)); +} + +#ifdef __KERNEL__ static struct kmem_cache *tux_inode_cachep; -static void tux3_inode_init_once(struct kmem_cache *cachep, void *foo) +static void tux3_inode_init_once(struct kmem_cache *cachep, void *mem) { - struct tux_inode *tuxi = (struct tux_inode *)foo; - inode_init_once(&tuxi->vfs_inode); + inode_init_once(&((tuxnode_t *)mem)->vfs_inode); } static int __init tux3_init_inodecache(void) { tux_inode_cachep = kmem_cache_create("tux_inode_cache", - sizeof(struct tux_inode), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - tux3_inode_init_once); + sizeof(tuxnode_t), 0, (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), + tux3_inode_init_once); if (tux_inode_cachep == NULL) return -ENOMEM; return 0; @@ -40,8 +87,7 @@ static void __exit tux3_destroy_inodecache(void) static struct inode *tux3_alloc_inode(struct super_block *sb) { - struct tux_inode *tuxi; - tuxi = (struct tux_inode *)kmem_cache_alloc(tux_inode_cachep, GFP_KERNEL); + tuxnode_t *tuxi = kmem_cache_alloc(tux_inode_cachep, GFP_KERNEL); if (!tuxi) return NULL; tuxi->btree = (struct btree){}; @@ -58,79 +104,36 @@ static void tux3_destroy_inode(struct inode *inode) static int tux_load_sb(struct super_block *sb, int silent) { - struct sb *sbi = tux_sb(sb); - struct buffer_head *bh = NULL; + struct buffer_head *bh; int err; - err = -EIO; + BUG_ON(SB_LOC < sb->s_blocksize); bh = sb_bread(sb, SB_LOC >> sb->s_blocksize_bits); if (!bh) { if (!silent) printk(KERN_ERR "TUX3: unable to read superblock\n"); - goto error; + return -EIO; } - - err = -EINVAL; - struct disksuper *disk = (struct disksuper *)bh->b_data; - if (memcmp(disk->magic, (char[])SB_MAGIC, sizeof(disk->magic))) { - if (!silent) - printk(KERN_ERR "TUX3: invalid superblock [%Lx]", - (L)from_be_u64(*(be_u64 *)disk->magic)); - goto error; - } - - int blockbits = from_be_u16(disk->blockbits); - u64 iroot = from_be_u64(disk->iroot); - - sbi->itable = (struct btree){ - .sb = sbi, - .ops = &itable_ops, - .root = (struct root){ - .depth = iroot >> 48, - .block = iroot & (-1ULL >> 16) - }, - .entries_per_leaf = 1 << (blockbits - 6), - }; -// sbi->rootbuf; - sbi->blockbits = blockbits; - sbi->blocksize = 1 << blockbits; - sbi->blockmask = (1 << blockbits) - 1; - sbi->volblocks = from_be_u64(disk->volblocks); - sbi->freeblocks = from_be_u64(disk->freeblocks); - sbi->nextalloc = from_be_u64(disk->nextalloc); - sbi->atomgen = from_be_u32(disk->atomgen); - sbi->freeatom = from_be_u32(disk->freeatom); - + err = unpack_sb(tux_sb(sb), bufdata(bh), silent); + /* FIXME: this is needed? */ + memcpy(&tux_sb(sb)->super, bufdata(bh), sizeof(tux_sb(sb)->super)); brelse(bh); - return 0; - -error: - brelse(bh); return err; } static void tux3_write_super(struct super_block *sb) { - struct sb *sbi = tux_sb(sb); struct buffer_head *bh; + BUG_ON(SB_LOC < sb->s_blocksize); bh = sb_bread(sb, SB_LOC >> sb->s_blocksize_bits); if (!bh) { printk(KERN_ERR "TUX3: unable to read superblock\n"); return; } - - struct disksuper *disk = bufdata(bh); - disk->blockbits = to_be_u16(sbi->blockbits); - disk->volblocks = to_be_u64(sbi->volblocks); - disk->nextalloc = to_be_u64(sbi->nextalloc); // probably does not belong here - disk->freeatom = to_be_u32(sbi->freeatom); // probably does not belong here - disk->atomgen = to_be_u32(sbi->atomgen); // probably does not belong here - disk->freeblocks = to_be_u64(sbi->freeblocks); // probably does not belong here - disk->iroot = to_be_u64((u64)sbi->itable.root.depth << 48 | sbi->itable.root.block); + pack_sb(tux_sb(sb), bufdata(bh)); brelse_dirty(bh); - sb->s_dirt = 0; } @@ -173,11 +176,12 @@ static int tux3_statfs(struct dentry *dentry, struct kstatfs *buf) static const struct super_operations tux3_super_ops = { .alloc_inode = tux3_alloc_inode, .destroy_inode = tux3_destroy_inode, + .delete_inode = tux3_delete_inode, + .clear_inode = tux3_clear_inode, + .write_inode = tux3_write_inode, .write_super = tux3_write_super, .put_super = tux3_put_super, .statfs = tux3_statfs, - .clear_inode = tux3_clear_inode, - .write_inode = tux3_write_inode, }; static int tux3_fill_super(struct super_block *sb, void *data, int silent) @@ -196,11 +200,6 @@ static int tux3_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sbi->vfs_sb = sb; - sbi->entries_per_node = 20; -// sbi->max_inodes_per_block = 64; -// sbi->version; -// sbi->atomref_base; -// sbi->unatom_base; err = -EIO; blocksize = sb_min_blocksize(sb, BLOCK_SIZE); @@ -298,3 +297,4 @@ static void __exit exit_tux3(void) module_init(init_tux3) module_exit(exit_tux3) MODULE_LICENSE("GPL"); +#endif /* !__KERNEL__ */ diff --git a/fs/tux3/tux3.h b/fs/tux3/tux3.h index 6fce17e..9253662 100644 --- a/fs/tux3/tux3.h +++ b/fs/tux3/tux3.h @@ -146,7 +146,7 @@ static inline void *decode48(void *at, u64 *val) } /* Tux3 disk format */ -#define SB_MAGIC_SIZE 8 +#define SB_MAGIC_SIZE 8 #define SB_MAGIC { 't', 'u', 'x', '3', 0xdd, 0x08, 0x09, 0x06 } /* date of latest incompatible sb format */ /* * disk format revision history @@ -162,7 +162,6 @@ static inline void *decode48(void *at, u64 *val) #define MAX_FILESIZE (1LL << MAX_FILESIZE_BITS) #define MAX_EXTENT (1 << 6) #define SB_LOC (1 << 12) -#define SB struct sb *sb /* Special inode numbers */ #define TUX_BITMAP_INO 0 @@ -172,56 +171,104 @@ static inline void *decode48(void *at, u64 *val) struct disksuper { + /* Update magic on any incompatible format change */ char magic[SB_MAGIC_SIZE]; - be_u64 birthdate; - be_u64 flags; - be_u64 iroot; - be_u64 aroot; - be_u16 blockbits; - be_u16 unused1; - be_u32 unused2; - be_u64 volblocks, freeblocks, nextalloc; + be_u64 birthdate; /* Volume creation date */ + be_u64 flags; /* Need to assign some flags */ + be_u64 iroot; /* Root of the inode table btree */ + be_u64 aroot; /* The atime table is a file now, delete on next format rev */ + be_u16 blockbits; /* Shift to get volume block size */ + be_u16 unused1; /* Throw away on next format rev */ + be_u32 unused2; /* Throw away on next format rev */ + be_u64 volblocks; /* Volume size */ + /* The rest should be moved to a "metablock" that is updated frequently */ + be_u64 freeblocks; /* Should match total of zero bits in allocation bitmap */ + be_u64 nextalloc; /* Get rid of this when we have a real allocation policy */ be_u32 freeatom, atomgen; }; -struct root { u64 depth:16, block:48; }; +struct root { + unsigned depth; /* btree levels not including leaf level */ + block_t block; /* disk location of btree root */ +}; struct btree { - struct sb *sb; - struct btree_ops *ops; - struct root root; - u16 entries_per_leaf; + struct sb *sb; /* Convenience to reduce parameter list size */ + struct btree_ops *ops; /* Generic btree low level operations */ + struct root root; /* Cached description of btree root */ + u16 entries_per_leaf; /* Used in btree leaf splitting */ +}; + +/* Define layout of btree root on disk, endian conversion is elsewhere. */ + +static inline u64 pack_root(struct root *root) +{ + return (u64)root->depth << 48 | root->block; +} + +static inline struct root unpack_root(u64 v) +{ + return (struct root){ .depth = v >> 48, .block = v & (-1ULL >> 16), }; +} + +/* Path cursor for btree traversal */ + +struct cursor { +#define CURSOR_DEBUG +#ifdef CURSOR_DEBUG +#define FREE_BUFFER ((void *)0xdbc06505) +#define FREE_NEXT ((void *)0xdbc06507) + int maxlen; +#endif + int len; + struct path_level { + struct buffer_head *buffer; + struct index_entry *next; + } path[]; }; -struct cursor { struct buffer_head *buffer; struct index_entry *next; }; +/* Tux3-specific sb is a handle for the entire volume state */ struct sb { struct disksuper super; - - struct btree itable; - struct buffer_head *rootbuf; - struct inode *bitmap, *rootdir, *vtable, *atable; + struct btree itable; /* Cached root of the inode table */ + struct inode *bitmap; /* allocation bitmap special file */ + struct inode *rootdir; /* root directory special file */ + struct inode *vtable; /* version table special file */ + struct inode *atable; /* xattr atom special file */ unsigned blocksize, blockbits, blockmask; block_t volblocks, freeblocks, nextalloc; - unsigned entries_per_node, max_inodes_per_block; - unsigned version, atomref_base, unatom_base; - unsigned freeatom, atomgen; + unsigned entries_per_node; /* must be per-btree type, get rid of this */ + unsigned max_inodes_per_block; /* get rid of this and use entries per leaf */ + unsigned version; /* Currently mounted volume version view */ + unsigned atomref_base, unatom_base; /* layout of atom table */ + unsigned freeatom; /* Start of free atom list in atom table */ + unsigned atomgen; /* Next atom number to allocate if no free atoms */ #ifdef __KERNEL__ - struct super_block *vfs_sb; + struct super_block *vfs_sb; /* Generic kernel superblock */ #else - map_t *devmap; + map_t *devmap; /* Userspace device block cache */ #endif }; #ifdef __KERNEL__ -struct tux_inode { - struct btree btree; - inum_t inum; - unsigned present; - struct xcache *xcache; +/* + * In kernel an inode has a generic part and a filesystem-specific part + * with conversions between them, in order to support multiple different + * kinds of filesystem. In userspace there is only one kind of filesystem, + * Tux3, so no need for two kinds of inodes, and a big pain to initialize + * inodes for testing if there were. We use a typedef here so that the + * filesystem-specific type can be transparently aliased to the generic + * inode type in userspace and be a separate type in kernel. + */ - struct inode vfs_inode; -}; +typedef struct { + struct btree btree; + inum_t inum; /* Inode number. Fixme: also in generic inode */ + unsigned present; /* Attributes decoded from or to be encoded to inode table */ + struct xcache *xcache; /* Extended attribute cache */ + struct inode vfs_inode; /* Generic kernel inode */ +} tuxnode_t; static inline struct sb *tux_sb(struct super_block *sb) { @@ -233,9 +280,9 @@ static inline struct super_block *vfs_sb(struct sb *sb) return sb->vfs_sb; } -static inline struct tux_inode *tux_inode(struct inode *inode) +static inline tuxnode_t *tux_inode(struct inode *inode) { - return container_of(inode, struct tux_inode, vfs_inode); + return container_of(inode, tuxnode_t, vfs_inode); } typedef struct address_space map_t; @@ -256,19 +303,18 @@ static inline void free(void *ptr) kfree(ptr); } #else -struct inode { +typedef struct inode { struct btree btree; inum_t inum; unsigned present; struct xcache *xcache; - struct sb *i_sb; map_t *map; u64 i_size; unsigned i_version; struct timespec i_mtime, i_ctime, i_atime; unsigned i_mode, i_uid, i_gid, i_nlink; -}; +} tuxnode_t; struct file { struct inode *f_inode; @@ -297,6 +343,8 @@ static inline map_t *mapping(struct inode *inode) } #endif /* !__KERNEL__ */ +#define TUX_LINK_MAX 64 /* just for debug for now */ + #define TUX_NAME_LEN 255 /* directory entry */ @@ -308,18 +356,18 @@ typedef struct { } tux_dirent; /* version:10, count:6, block:48 */ -struct extent { be_u64 block_count_version; }; +struct diskextent { be_u64 block_count_version; }; /* count:8, keyhi:24 */ struct group { be_u32 count_and_keyhi; }; /* limit:8, keylo:24 */ struct entry { be_u32 limit_and_keylo; }; -struct dleaf { be_u16 magic, groups, free, used; struct extent table[]; }; +struct dleaf { be_u16 magic, groups, free, used; struct diskextent table[]; }; struct dwalk { struct dleaf *leaf; struct group *group, *gstop, *gdict; struct entry *entry, *estop; - struct extent *exbase, *extent, *exstop; + struct diskextent *exbase, *extent, *exstop; struct { struct group group; struct entry entry; @@ -378,23 +426,23 @@ static inline void inc_entry_limit(struct entry *entry, int n) /* extent wrappers */ -static inline struct extent make_extent(block_t block, unsigned count) +static inline struct diskextent make_extent(block_t block, unsigned count) { assert(block < (1ULL << 48) && count - 1 < (1 << 6)); - return (struct extent){ to_be_u64(((u64)(count - 1) << 48) | block) }; + return (struct diskextent){ to_be_u64(((u64)(count - 1) << 48) | block) }; } -static inline unsigned extent_block(struct extent extent) +static inline unsigned extent_block(struct diskextent extent) { return from_be_u64(*(be_u64 *)&extent) & ~(-1LL << 48); } -static inline unsigned extent_count(struct extent extent) +static inline unsigned extent_count(struct diskextent extent) { return ((from_be_u64(*(be_u64 *)&extent) >> 48) & 0x3f) + 1; } -static inline unsigned extent_version(struct extent extent) +static inline unsigned extent_version(struct diskextent extent) { return from_be_u64(*(be_u64 *)&extent) >> 54; } @@ -424,20 +472,19 @@ static inline void inc_dleaf_groups(struct dleaf *leaf, int n) typedef void vleaf; -#define BTREE struct btree *btree - struct btree_ops { - int (*leaf_sniff)(BTREE, vleaf *leaf); - int (*leaf_init)(BTREE, vleaf *leaf); - tuxkey_t (*leaf_split)(BTREE, tuxkey_t key, vleaf *from, vleaf *into); - void *(*leaf_resize)(BTREE, tuxkey_t key, vleaf *leaf, unsigned size); - void (*leaf_dump)(BTREE, vleaf *leaf); - unsigned (*leaf_need)(BTREE, vleaf *leaf); - unsigned (*leaf_free)(BTREE, vleaf *leaf); - int (*leaf_chop)(BTREE, tuxkey_t key, vleaf *leaf); - void (*leaf_merge)(BTREE, vleaf *into, vleaf *from); - block_t (*balloc)(SB); - void (*bfree)(SB, block_t block); + int (*leaf_sniff)(struct btree *btree, vleaf *leaf); + int (*leaf_init)(struct btree *btree, vleaf *leaf); + tuxkey_t (*leaf_split)(struct btree *btree, tuxkey_t key, vleaf *from, vleaf *into); + void *(*leaf_resize)(struct btree *btree, tuxkey_t key, vleaf *leaf, unsigned size); + void (*leaf_dump)(struct btree *btree, vleaf *leaf); + unsigned (*leaf_need)(struct btree *btree, vleaf *leaf); + unsigned (*leaf_free)(struct btree *btree, vleaf *leaf); + /* return value: 1 - modified, 0 - not modified, < 0 - error */ + int (*leaf_chop)(struct btree *btree, tuxkey_t key, vleaf *leaf); + void (*leaf_merge)(struct btree *btree, vleaf *into, vleaf *from); + block_t (*balloc)(struct sb *sb); + void (*bfree)(struct sb *sb, block_t block); }; /* @@ -485,8 +532,8 @@ struct tux_iattr { }; void hexdump(void *data, unsigned size); -block_t balloc(SB); -void bfree(SB, block_t block); +block_t balloc(struct sb *sb); +void bfree(struct sb *sb, block_t block); enum atkind { MIN_ATTR = 6, @@ -513,6 +560,15 @@ enum atbit { extern unsigned atsize[MAX_ATTRS]; +#ifndef ENOATTR +#define ENOATTR ENOENT +#endif + +#ifndef XATTR_CREATE +#define XATTR_CREATE 1 // fail if xattr already exists +#define XATTR_REPLACE 2 // fail if xattr does not exist +#endif + struct xattr { u16 atom, size; char body[]; }; struct xcache { u16 size, maxsize; struct xattr xattrs[]; }; @@ -574,41 +630,46 @@ static inline struct inode *buffer_inode(struct buffer_head *buffer) } /* balloc.c */ -block_t balloc_extent(SB, unsigned blocks); +block_t balloc_extent(struct sb *sb, unsigned blocks); /* btree.c */ -void release_cursor(struct cursor cursor[], int depth); +struct buffer_head *cursor_leafbuf(struct cursor *cursor); +void release_cursor(struct cursor *cursor); struct cursor *alloc_cursor(int); -void free_cursor(struct cursor cursor[]); -int probe(BTREE, tuxkey_t key, struct cursor cursor[]); -int advance(BTREE, struct cursor cursor[]); -tuxkey_t next_key(struct cursor cursor[], int depth); -void show_tree_range(BTREE, tuxkey_t start, unsigned count); -int tree_chop(BTREE, struct delete_info *info, millisecond_t deadline); -int btree_leaf_split(struct btree *btree, struct cursor cursor[], tuxkey_t key); -void *tree_expand(struct btree *btree, tuxkey_t key, unsigned newsize, struct cursor cursor[]); -struct btree new_btree(SB, struct btree_ops *ops); +void free_cursor(struct cursor *cursor); +int probe(struct btree *btree, tuxkey_t key, struct cursor *cursor); +int advance(struct btree *btree, struct cursor *cursor); +tuxkey_t next_key(struct cursor *cursor, int depth); +void show_tree_range(struct btree *btree, tuxkey_t start, unsigned count); +int tree_chop(struct btree *btree, struct delete_info *info, millisecond_t deadline); +int btree_leaf_split(struct btree *btree, struct cursor *cursor, tuxkey_t key); +void *tree_expand(struct btree *btree, tuxkey_t key, unsigned newsize, struct cursor *cursor); +struct btree new_btree(struct sb *sb, struct btree_ops *ops); /* dir.c */ -loff_t tux_create_entry(struct inode *dir, const char *name, int len, inum_t inum, unsigned mode); tux_dirent *tux_find_entry(struct inode *dir, const char *name, int len, struct buffer_head **result); +loff_t tux_create_entry(struct inode *dir, const char *name, int len, inum_t inum, unsigned mode); int tux_delete_entry(struct buffer_head *buffer, tux_dirent *entry); +int tux_readdir(struct file *file, void *state, filldir_t filldir); +int tux_dir_is_empty(struct inode *dir); extern const struct file_operations tux_dir_fops; extern const struct inode_operations tux_dir_iops; /* dtree.c */ -unsigned dleaf_free(BTREE, vleaf *leaf); -void dleaf_dump(BTREE, vleaf *vleaf); +unsigned dleaf_free(struct btree *btree, vleaf *leaf); +void dleaf_dump(struct btree *btree, vleaf *vleaf); extern struct btree_ops dtree_ops; int dwalk_probe(struct dleaf *leaf, unsigned blocksize, struct dwalk *walk, tuxkey_t key); tuxkey_t dwalk_index(struct dwalk *walk); -struct extent *dwalk_next(struct dwalk *walk); +struct diskextent *dwalk_next(struct dwalk *walk); void dwalk_back(struct dwalk *walk); void dwalk_chop_after(struct dwalk *walk); -int dwalk_mock(struct dwalk *walk, tuxkey_t index, struct extent extent); -int dwalk_pack(struct dwalk *walk, tuxkey_t index, struct extent extent); +int dwalk_mock(struct dwalk *walk, tuxkey_t index, struct diskextent extent); +int dwalk_pack(struct dwalk *walk, tuxkey_t index, struct diskextent extent); /* filemap.c */ +int tux3_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); extern const struct address_space_operations tux_aops; extern const struct address_space_operations tux_blk_aops; @@ -619,12 +680,13 @@ void *encode_attrs(struct inode *inode, void *attrs, unsigned size); void *decode_attrs(struct inode *inode, void *attrs, unsigned size); /* ileaf.c */ -void *ileaf_lookup(BTREE, inum_t inum, struct ileaf *leaf, unsigned *result); -inum_t find_empty_inode(BTREE, struct ileaf *leaf, inum_t goal); -int ileaf_purge(BTREE, inum_t inum, struct ileaf *leaf); +void *ileaf_lookup(struct btree *btree, inum_t inum, struct ileaf *leaf, unsigned *result); +inum_t find_empty_inode(struct btree *btree, struct ileaf *leaf, inum_t goal); +int ileaf_purge(struct btree *btree, inum_t inum, struct ileaf *leaf); extern struct btree_ops itable_ops; /* inode.c */ +void tux3_delete_inode(struct inode *inode); void tux3_clear_inode(struct inode *inode); int tux3_write_inode(struct inode *inode, int do_sync); struct inode *tux_create_inode(struct inode *dir, int mode); @@ -634,7 +696,7 @@ struct inode *tux3_iget(struct super_block *sb, inum_t inum); int xcache_dump(struct inode *inode); struct xcache *new_xcache(unsigned maxsize); struct xattr *get_xattr(struct inode *inode, char *name, unsigned len); -int set_xattr(struct inode *inode, char *name, unsigned len, void *data, unsigned size); +int set_xattr(struct inode *inode, char *name, unsigned len, void *data, unsigned size, unsigned flags); void *encode_xattrs(struct inode *inode, void *attrs, unsigned size); unsigned decode_xsize(struct inode *inode, void *attrs, unsigned size); unsigned encode_xsize(struct inode *inode); diff --git a/fs/tux3/xattr.c b/fs/tux3/xattr.c index 53314e9..7daa4a4 100644 --- a/fs/tux3/xattr.c +++ b/fs/tux3/xattr.c @@ -52,9 +52,38 @@ struct buffer_head *blockread_unatom(struct inode *atable, atom_t atom, unsigned return blockread(mapping(atable), tux_sb(atable->i_sb)->unatom_base + (atom >> shift)); } +static int unatom(struct inode *atable, atom_t atom, char *name, unsigned size) +{ + unsigned offset; + struct sb *sb = tux_sb(atable->i_sb); + struct buffer_head *buffer = blockread_unatom(atable, atom, &offset); + if (!buffer) + return -ENOMEM; + u64 where = from_be_u64(((be_u64 *)bufdata(buffer))[offset]); + brelse(buffer); + buffer = blockread(mapping(atable), where >> sb->blockbits); + if (!buffer) + return -ENOMEM; + tux_dirent *entry = bufdata(buffer) + (where & sb->blockmask); + if (entry_atom(entry) != atom) { + warn("atom %x reverse entry broken", atom); + return -EINVAL; + } + unsigned len = entry->name_len; + if (size) { + if (len > size) { + brelse(buffer); + return -ERANGE; + } + memcpy(name, entry->name, len); + } + brelse(buffer); + return len; +} + void dump_atoms(struct inode *atable) { - SB = tux_sb(atable->i_sb); + struct sb *sb = tux_sb(atable->i_sb); unsigned blocks = (sb->atomgen + (sb->blockmask >> 1)) >> (sb->blockbits - 1); for (unsigned j = 0; j < blocks; j++) { unsigned block = sb->atomref_base + 2 * j; @@ -69,33 +98,22 @@ void dump_atoms(struct inode *atable) if (!refs) continue; atom_t atom = i; - unsigned offset; - struct buffer_head *buffer = blockread_unatom(atable, atom, &offset); - if (!buffer) - goto eek; - u64 where = from_be_u64(((be_u64 *)bufdata(buffer))[offset]); - brelse(buffer); - buffer = blockread(mapping(atable), where >> sb->blockbits); - if (!buffer) + char name[100]; + int len = unatom(atable, atom, name, sizeof(name)); + if (len < 0) goto eek; - tux_dirent *entry = bufdata(buffer) + (where & sb->blockmask); - if (entry_atom(entry) != atom) { - warn("atom %x reverse entry broken", atom); - continue; - } - printf("{%.*s} = %x\n", entry->name_len, entry->name, atom); - brelse(buffer); + printf("%.*s = %x\n", len, name, atom); } brelse(lobuf); brelse(hibuf); } return; eek: - warn("something broke"); + warn("atom name lookup failed"); return; } -void show_freeatoms(SB) +void show_freeatoms(struct sb *sb) { struct inode *atable = sb->atable; atom_t atom = sb->freeatom; @@ -118,7 +136,7 @@ eek: atom_t get_freeatom(struct inode *atable) { - SB = tux_sb(atable->i_sb); + struct sb *sb = tux_sb(atable->i_sb); atom_t atom = sb->freeatom; if (!atom) return sb->atomgen++; @@ -139,7 +157,7 @@ eek: int use_atom(struct inode *atable, atom_t atom, int use) { - SB = tux_sb(atable->i_sb); + struct sb *sb = tux_sb(atable->i_sb); unsigned shift = sb->blockbits - 1; unsigned block = sb->atomref_base + 2 * (atom >> shift); unsigned offset = atom & ~(-1 << shift), kill = 0; @@ -184,8 +202,8 @@ atom_t find_atom(struct inode *atable, char *name, unsigned len) { struct buffer_head *buffer; tux_dirent *entry = tux_find_entry(atable, name, len, &buffer); - if (!entry) - return -1; + if (IS_ERR(entry)) + return -1; /* FIXME: return correct errno */ atom_t atom = entry_atom(entry); brelse(buffer); return atom; @@ -219,55 +237,44 @@ int xcache_dump(struct inode *inode) if (!tux_inode(inode)->xcache) return 0; //warn("xattrs %p/%i", inode->xcache, inode->xcache->size); - struct xattr *xattr = tux_inode(inode)->xcache->xattrs; - struct xattr *limit = xcache_limit(tux_inode(inode)->xcache); + struct xcache *xcache = tux_inode(inode)->xcache; + struct xattr *xattr = xcache->xattrs, *limit = xcache_limit(xcache); while (xattr < limit) { - if (!xattr->size) - goto zero; if (xattr->size > tux_sb(inode->i_sb)->blocksize) - goto barf; + goto bail; printf("atom %.3x => ", xattr->atom); - hexdump(xattr->body, xattr->size); - struct xattr *xnext = xcache_next(xattr); - if (xnext > limit) - goto over; - xattr = xnext; + if (xattr->size) + hexdump(xattr->body, xattr->size); + else + printf("\n"); + if ((xattr = xcache_next(xattr)) > limit) + goto fail; } assert(xattr == limit); return 0; -zero: - error("zero length xattr"); -over: +fail: error("corrupt xattrs"); -barf: +bail: error("xattr too big"); return -1; } -struct xattr *xcache_lookup(struct inode *inode, unsigned atom, int *err) +struct xattr *xcache_lookup(struct xcache *xcache, unsigned atom, int *err) { - if (!tux_inode(inode)->xcache) + if (!xcache) return NULL; - struct xattr *xattr = tux_inode(inode)->xcache->xattrs; - struct xattr *limit = xcache_limit(tux_inode(inode)->xcache); + struct xattr *xattr = xcache->xattrs; + struct xattr *limit = xcache_limit(xcache); while (xattr < limit) { - if (!xattr->size) - goto zero; if (xattr->atom == atom) return xattr; - struct xattr *xnext = xcache_next(xattr); - if (xnext > limit) - goto over; - xattr = xnext; + if ((xattr = xcache_next(xattr)) > limit) + goto fail; } assert(xattr == limit); null: return NULL; -zero: - *err = EINVAL; - error("zero length xattr"); - goto null; -over: +fail: *err = EINVAL; error("corrupt xattrs"); goto null; @@ -283,6 +290,15 @@ struct xcache *new_xcache(unsigned maxsize) return xcache; } +static inline int remove_old(struct xcache *xcache, struct xattr *xattr) +{ + if (!xattr) + return 0; + unsigned size = (void *)xcache_next(xattr) - (void *)xattr; + memmove(xattr, xcache_next(xattr), xcache->size -= size); + return 1; +} + /* * Things to improve about xcache_update: * @@ -294,40 +310,40 @@ struct xcache *new_xcache(unsigned maxsize) * * * Should expand by binary factor */ -int xcache_update(struct inode *inode, unsigned atom, void *data, unsigned len) +int xcache_update(struct inode *inode, unsigned atom, void *data, unsigned len, unsigned flags) { int err = 0, use = 0; - struct xattr *xattr = xcache_lookup(inode, atom, &err); + struct xcache *xcache = tux_inode(inode)->xcache; + struct xattr *xattr = xcache_lookup(xcache, atom, &err); if (xattr) { - unsigned size = (void *)xcache_next(xattr) - (void *)xattr; - //warn("size = %i\n", size); - memmove(xattr, xcache_next(xattr), tux_inode(inode)->xcache->size -= size); - use--; - } - if (len) { - unsigned more = sizeof(*xattr) + len; - struct xcache *xcache = tux_inode(inode)->xcache; - if (!xcache || xcache->size + more > xcache->maxsize) { - unsigned oldsize = xcache ? xcache->size : offsetof(struct xcache, xattrs); - unsigned maxsize = xcache ? xcache->maxsize : (1 << 7); - unsigned newsize = oldsize + (more < maxsize ? maxsize : more); - struct xcache *newcache = new_xcache(newsize); - if (!newcache) - return -ENOMEM; - if (xcache) { - memcpy(newcache->xattrs, xcache->xattrs, oldsize - offsetof(struct xcache, xattrs)); - newcache->size = oldsize; - free(xcache); - } - tux_inode(inode)->xcache = newcache; + if (flags & XATTR_CREATE) + return -EEXIST; + use -= remove_old(xcache, xattr); + } else if (flags & XATTR_REPLACE) + return -ENOATTR; + + /* Insert new */ + unsigned more = sizeof(*xattr) + len; + if (!xcache || xcache->size + more > xcache->maxsize) { + unsigned oldsize = xcache ? xcache->size : offsetof(struct xcache, xattrs); + unsigned maxsize = xcache ? xcache->maxsize : (1 << 7); + unsigned newsize = oldsize + (more < maxsize ? maxsize : more); + struct xcache *newcache = new_xcache(newsize); + if (!newcache) + return -ENOMEM; + if (xcache) { + memcpy(newcache->xattrs, xcache->xattrs, oldsize - offsetof(struct xcache, xattrs)); + newcache->size = oldsize; + free(xcache); } - xattr = xcache_limit(tux_inode(inode)->xcache); - //warn("expand by %i\n", more); - tux_inode(inode)->xcache->size += more; - memcpy(xattr->body, data, (xattr->size = len)); - xattr->atom = atom; - use++; + tux_inode(inode)->xcache = newcache; } + xattr = xcache_limit(tux_inode(inode)->xcache); + //warn("expand by %i\n", more); + tux_inode(inode)->xcache->size += more; + memcpy(xattr->body, data, (xattr->size = len)); + xattr->atom = atom; + use++; if (use) use_atom(tux_sb(inode->i_sb)->atable, atom, use); return 0; @@ -339,15 +355,62 @@ struct xattr *get_xattr(struct inode *inode, char *name, unsigned len) atom_t atom = find_atom(tux_sb(inode->i_sb)->atable, name, len); if (atom == -1) return NULL; - return xcache_lookup(inode, atom, &err); // and what about the err??? + return xcache_lookup(tux_inode(inode)->xcache, atom, &err); // and what about the err??? } -int set_xattr(struct inode *inode, char *name, unsigned len, void *data, unsigned size) +int set_xattr(struct inode *inode, char *name, unsigned len, void *data, unsigned size, unsigned flags) { atom_t atom = make_atom(tux_sb(inode->i_sb)->atable, name, len); if (atom == -1) - return -ENOENT; - return xcache_update(inode, atom, data, size); + return -EINVAL; + return xcache_update(inode, atom, data, size, flags); +} + +int del_xattr(struct inode *inode, char *name, unsigned len) +{ + int err = 0; + atom_t atom = find_atom(tux_sb(inode->i_sb)->atable, name, len); + if (atom == -1) + return -ENOATTR; + struct xcache *xcache = tux_inode(inode)->xcache; + struct xattr *xattr = xcache_lookup(xcache, atom, &err); + if (err) + return err; + if (!xattr) + return -ENOATTR; + int used = remove_old(xcache, xattr); + if (used) + use_atom(tux_sb(inode->i_sb)->atable, atom, -used); + return err; +} + +int xattr_list(struct inode *inode, char *text, size_t size) +{ + if (!tux_inode(inode)->xcache) + return 0; + struct inode *atable = tux_sb(inode->i_sb)->atable; + struct xcache *xcache = tux_inode(inode)->xcache; + struct xattr *xattr = xcache->xattrs, *limit = xcache_limit(xcache); + char *base = text, *top = text + size; + while (xattr < limit) { + atom_t atom = xattr->atom; + if (size) { + int tail = top - text; + int len = unatom(atable, atom, text, tail); + if (len < 0 || len == tail) + goto full; + *(text += len) = 0; + text++; + } else + text += unatom(atable, atom, NULL, 0) + 1; + if ((xattr = xcache_next(xattr)) > limit) + goto fail; + } + assert(xattr == limit); +full: + return text - base; +fail: + return -EINVAL; } /* Xattr encode/decode */ @@ -376,7 +439,7 @@ void *encode_xattrs(struct inode *inode, void *attrs, unsigned size) unsigned decode_xsize(struct inode *inode, void *attrs, unsigned size) { - SB = tux_sb(inode->i_sb); + struct sb *sb = tux_sb(inode->i_sb); unsigned total = 0, bytes; void *limit = attrs + size; while (attrs < limit - 1) {