[uClinux-dev] PATCH: enable read-only filemapping for !CONFIG_MMU (2.5.73-uc0)

Bernardo Innocenti bernie at codewiz.org
Mon Jun 30 07:05:38 EDT 2003


Hello Greg,

I've cleaned up and ported my old mmap patch to 2.5.73-uc0.
I'm now resending it for inclusion in the mainline kernel
since it worked very well so far (the flat reloc problem
we discussed a few days ago was not really caused by this
patch).

Of course, I'm still open to discuss any design considerations
or other kinds of improvements, but I don't believe adding a
global list of libraries in binfmt_flat is a viable solution.

---------------------------------------------------------------------

DESCRIPTION
===========

The main motivation behind this patch is saving memory when
loading binaries from read/write filesystems such as JFFS2
that don't support in-place execution. Combining this patch
with FLAT shared libraries support reduces memory usage on
a typical uClinux system.

This patch has received a few weeks testing on a M5272C3
board with various binaries running at the same time.
The system has been running smoothly as always and the
amount of free memory increased considerably.

IMPLEMENTATION
==============

The address_space structure keeps track of memory mappings for inodes.
In uClinux we can't rely on paging, therefore we need to allocate a
contiguous memory block and read the file into it at once. This is done
by generic_file_mmap().

The difficult part is getting rid of the allocated block when all
processes have finished using it. I added a usage counter in the
address_space structure and added a link in mm_rblock_struct to the
the mapped file.

The code for handling rblock and tblock structures got more complex
and is now needed in multiple places. Therefore I decided to fold it
into separate functions taking care of all details: create_tblock()
and delete_tblock(). As a bonus, do_mmap_pgoff() got much shorter
and cleaner.

There's an ugly asymmetry here: those mmaped blocks are allocated in
mm/filemap.c:generic_file_read() and freed in mm/nommu.c:delete_tblock().
It's unfortunate there's no generic_file_munmap().


KNOWN PROBLEMS
==============

I'm not sure about locking issues. Of course there aren't
nommu SMP platforms, so it's not really a problem ;-)

I have a strong feeling we should put back the VMA stuff in nommu.c
instead of messing with tblocks to enable file map tracking.

I'm violating some of mmap() semantics, but of course you can't have
a full mmap() implementationwithout an MMU.
Read-only mapping is not guaranteed to be read-only, and writes won't
be flushed to disk. Also, we assume all maps are of the same size and
at the same file offset. What we have is just good enough for sharing
mapped binaries among processes.


diff -Nru linux-2.5.73-uc0/fs/inode.c linux-2.5.x/fs/inode.c
--- linux-2.5.73-uc0/fs/inode.c	2003-06-22 20:33:34.000000000 +0200
+++ linux-2.5.x/fs/inode.c	2003-06-25 22:29:44.000000000 +0200
@@ -145,6 +145,12 @@
 		mapping->dirtied_when = 0;
 		mapping->assoc_mapping = NULL;
 		mapping->backing_dev_info = &default_backing_dev_info;
+
+#ifndef CONFIG_MMU
+		mapping->i_mmap_block = NULL;
+		mapping->i_mmap_cnt = 0;
+#endif /* !CONFIG_MMU */
+
 		if (sb->s_bdev)
 			mapping->backing_dev_info = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
 		memset(&inode->u, 0, sizeof(inode->u));
@@ -183,11 +189,13 @@
 	sema_init(&inode->i_sem, 1);
 	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
 	spin_lock_init(&inode->i_data.page_lock);
-	init_MUTEX(&inode->i_data.i_shared_sem);
 	INIT_LIST_HEAD(&inode->i_data.private_list);
 	spin_lock_init(&inode->i_data.private_lock);
+#ifdef CONFIG_MMU
 	INIT_LIST_HEAD(&inode->i_data.i_mmap);
 	INIT_LIST_HEAD(&inode->i_data.i_mmap_shared);
+	init_MUTEX(&inode->i_data.i_shared_sem);
+#endif /* CONFIG_MMU */
 	spin_lock_init(&inode->i_lock);
 }
 
diff -Nru linux-2.5.73-uc0/fs/locks.c linux-2.5.x/fs/locks.c
--- linux-2.5.73-uc0/fs/locks.c	2003-06-22 20:33:18.000000000 +0200
+++ linux-2.5.x/fs/locks.c	2003-06-25 22:29:44.000000000 +0200
@@ -1423,6 +1423,7 @@
 
 	inode = filp->f_dentry->d_inode;
 
+#ifdef CONFIG_MMU
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
 	 */
@@ -1435,6 +1436,7 @@
 			goto out;
 		}
 	}
+#endif /* CONFIG_MMU */
 
 	error = flock_to_posix_lock(filp, file_lock, &flock);
 	if (error)
@@ -1561,6 +1563,7 @@
 
 	inode = filp->f_dentry->d_inode;
 
+#ifdef CONFIG_MMU
 	/* Don't allow mandatory locks on files that may be memory mapped
 	 * and shared.
 	 */
@@ -1573,6 +1576,7 @@
 			goto out;
 		}
 	}
+#endif /* CONFIG_MMU */
 
 	error = flock64_to_posix_lock(filp, file_lock, &flock);
 	if (error)
diff -Nru linux-2.5.73-uc0/include/asm-m68knommu/mmu.h linux-2.5.x/include/asm-m68knommu/mmu.h
--- linux-2.5.73-uc0/include/asm-m68knommu/mmu.h	2003-06-22 20:33:01.000000000 +0200
+++ linux-2.5.x/include/asm-m68knommu/mmu.h	2003-06-25 22:29:44.000000000 +0200
@@ -4,9 +4,10 @@
 /* Copyright (C) 2002, David McCullough <davidm at snapgear.com> */
 
 struct mm_rblock_struct {
-	int	size;
-	int	refcount;
-	void	*kblock;
+	int		size;
+	int		refcount;
+	struct file	*file;
+	void		*kblock;
 };
 
 struct mm_tblock_struct {
diff -Nru linux-2.5.73-uc0/include/linux/fs.h linux-2.5.x/include/linux/fs.h
--- linux-2.5.73-uc0/include/linux/fs.h	2003-06-22 20:32:38.000000000 +0200
+++ linux-2.5.x/include/linux/fs.h	2003-06-25 22:29:44.000000000 +0200
@@ -320,9 +320,14 @@
 	struct list_head	io_pages;	/* being prepared for I/O */
 	unsigned long		nrpages;	/* number of total pages */
 	struct address_space_operations *a_ops;	/* methods */
+#ifdef CONFIG_MMU
 	struct list_head	i_mmap;		/* list of private mappings */
 	struct list_head	i_mmap_shared;	/* list of shared mappings */
 	struct semaphore	i_shared_sem;	/* protect both above lists */
+#else /* !CONFIG_MMU */
+	char *			i_mmap_block;	/* Pointer to memory buffer for mmapped file */
+	int			i_mmap_cnt;	/* Track use count of i_mmap_block (FIXME: shall we use atomic_t?) */
+#endif /* !CONFIG_MMU */
 	unsigned long		dirtied_when;	/* jiffies of first page dirtying */
 	int			gfp_mask;	/* how to allocate the pages */
 	struct backing_dev_info *backing_dev_info; /* device readahead, etc */
diff -Nru linux-2.5.73-uc0/mm/filemap.c linux-2.5.x/mm/filemap.c
--- linux-2.5.73-uc0/mm/filemap.c	2003-06-22 20:32:41.000000000 +0200
+++ linux-2.5.x/mm/filemap.c	2003-06-25 22:29:44.000000000 +0200
@@ -40,6 +40,9 @@
 #include <asm/uaccess.h>
 #include <asm/mman.h>
 
+/* Turn on verbose debug messages for mmap related stuff */
+#undef DEBUG_MMAP
+
 /*
  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  * though.
@@ -580,12 +583,14 @@
 		if (!PageUptodate(page))
 			goto page_not_up_to_date;
 page_ok:
+#ifdef CONFIG_MMU
 		/* If users can be writing to this page using arbitrary
 		 * virtual addresses, take care about potential aliasing
 		 * before reading the page on the kernel side.
 		 */
 		if (!list_empty(&mapping->i_mmap_shared))
 			flush_dcache_page(page);
+#endif /* CONFIG_MMU */
 
 		/*
 		 * Mark the page accessed if we read the beginning.
@@ -1262,6 +1267,56 @@
 	return 0;
 }
 
+#else /* !CONFIG_MMU */
+
+int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
+{
+	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
+	size_t error;
+	mm_segment_t old_fs;
+	size_t len = vma->vm_end - vma->vm_start;
+
+	if (++mapping->i_mmap_cnt == 1)
+	{
+#ifdef DEBUG_MMAP
+		printk("generic_file_mmap(): allocating %u bytes for inode #%ld\n",
+			len, mapping->host->i_ino);
+#endif
+		if (!(mapping->i_mmap_block = kmalloc(len, GFP_KERNEL)))
+		{
+			--mapping->i_mmap_cnt;
+			return -ENOMEM;
+		}
+	}
+#ifdef DEBUG_MMAP
+	else
+		printk("mmap: reusing allocated block for inode #%ld (i_mmap_cnt = %d)\n",
+			mapping->host->i_ino, mapping->i_mmap_cnt);
+#endif
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	error = file->f_op->read(file, mapping->i_mmap_block, len, &file->f_pos);
+	set_fs(old_fs);
+
+	if (error < 0) {
+		if (--mapping->i_mmap_cnt == 0)
+		{
+			kfree(mapping->i_mmap_block);
+			mapping->i_mmap_block = NULL;
+		}
+		return error;
+	}
+
+	/* Clear rest of mapped block */
+	if (error < len)
+		memset(mapping->i_mmap_block + error, 0, len - error);
+
+	vma->vm_start = (unsigned long)mapping->i_mmap_block;
+	return 0;
+}
+#endif /* !CONFIG_MMU */
+
 /*
  * This is for filesystems which do not implement ->writepage.
  */
@@ -1271,16 +1326,6 @@
 		return -EINVAL;
 	return generic_file_mmap(file, vma);
 }
-#else
-int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
-{
-	return -ENOSYS;
-}
-int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
-{
-	return -ENOSYS;
-}
-#endif /* CONFIG_MMU */
 
 static inline struct page *__read_cache_page(struct address_space *mapping,
 				unsigned long index,
diff -Nru linux-2.5.73-uc0/mm/nommu.c linux-2.5.x/mm/nommu.c
--- linux-2.5.73-uc0/mm/nommu.c	2003-06-25 21:52:00.000000000 +0200
+++ linux-2.5.x/mm/nommu.c	2003-06-25 22:29:44.000000000 +0200
@@ -17,13 +17,18 @@
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
-#include <linux/blkdev.h>
+#include <linux/file.h>
 
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
+/* Define to enable debug output for mm */
+#undef DEBUG
+#undef DEBUG_TBLOCK
+#undef WARN_ON_SLACK
+
 void *high_memory;
 struct page *mem_map = NULL;
 unsigned long max_mapnr;
@@ -250,9 +255,9 @@
 #undef _trans
 }
 
-#ifdef DEBUG
 static void show_process_blocks(void)
 {
+#ifdef DEBUG_TBLOCK
 	struct mm_tblock_struct *tblock;
 
 	printk("Process blocks %d:", current->pid);
@@ -263,8 +268,120 @@
 			printk(" (%d @%p #%d)", kobjsize(tblock->rblock->kblock), tblock->rblock->kblock, tblock->rblock->refcount);
 		printk(tblock->next ? " ->" : ".\n");
 	}
+#endif /* DEBUG_TBLOCK */
+}
+
+static struct mm_tblock_struct *create_tblock(struct mm_struct *mm, void *kblock, size_t size)
+{
+	struct mm_tblock_struct *tblock;
+
+	tblock = (struct mm_tblock_struct *)
+		kmalloc(sizeof(struct mm_tblock_struct), GFP_KERNEL);
+
+	if (!tblock) {
+		printk("Allocation of tblock for %u byte allocation from process %d failed\n",
+			size, current->pid);
+		show_free_areas();
+		return NULL;
+	}
+
+	tblock->rblock = (struct mm_rblock_struct *)
+		kmalloc(sizeof(struct mm_rblock_struct), GFP_KERNEL);
+
+	if (!tblock->rblock) {
+		printk("Allocation of rblock for %u byte allocation from process %d failed\n",
+			size, current->pid);
+		show_free_areas();
+		kfree(tblock);
+		return NULL;
+	}
+
+	if (!kblock)
+	{
+		if (!(kblock = kmalloc(size, GFP_KERNEL))) {
+			printk("Allocation of %u bytes from process %d failed\n",
+				size, current->pid);
+			show_free_areas();
+			kfree(tblock->rblock);
+			kfree(tblock);
+			return NULL;
+		}
+
+		memset(kblock, '\0', size);
+
+		realalloc += kobjsize(kblock);
+		askedalloc += size;
+	}
+
+	/* Init rblock */
+	tblock->rblock->refcount = 1;
+	tblock->rblock->file = NULL;
+	tblock->rblock->kblock = kblock;
+	tblock->rblock->size = size;
+
+#ifdef WARN_ON_SLACK
+	if ((size + WARN_ON_SLACK) <= kobjsize(result))
+		printk("Allocation of %u bytes from process %d has %lu bytes of slack\n",
+			size, current->pid, kobjsize(result) - size);
+#endif
+
+	realalloc += kobjsize(tblock);
+	askedalloc += sizeof(struct mm_tblock_struct);
+
+	realalloc += kobjsize(tblock->rblock);
+	askedalloc += sizeof(struct mm_rblock_struct);
+
+	/* Link tblock into mm list */
+	tblock->next = current->mm->context.tblock.next;
+	current->mm->context.tblock.next = tblock;
+
+	return tblock;
+}
+
+void delete_tblock(struct mm_tblock_struct *tblock, struct mm_tblock_struct *prev)
+{
+	if (tblock->rblock) {
+		if (!--tblock->rblock->refcount) {
+			struct file *file;
+
+			if ((file = tblock->rblock->file)) {
+				struct address_space *mapping;
+
+				mapping = file->f_dentry->d_inode->i_mapping;
+
+#ifdef DEBUG
+				printk("delete_tblock(): releasing file with f_count %d, i_mmap_cnt %d, (inode #%ld)\n",
+					file->f_count.counter, mapping->i_mmap_cnt, mapping->host->i_ino);
+#endif
+				if (--mapping->i_mmap_cnt == 0) {
+#ifdef DEBUG
+					printk("delete_tblock(): freeing mmapped block @%p (inode #%ld).\n",
+						mapping->i_mmap_block, mapping->host->i_ino);
+#endif
+					kfree(mapping->i_mmap_block);
+					mapping->i_mmap_block = NULL;
+				}
+
+				fput(file);
+			}
+			else if (tblock->rblock->kblock) {
+				realalloc -= kobjsize(tblock->rblock->kblock);
+				askedalloc -= tblock->rblock->size;
+				kfree(tblock->rblock->kblock);
+			}
+
+			realalloc -= kobjsize(tblock->rblock);
+			askedalloc -= sizeof(struct mm_rblock_struct);
+			kfree(tblock->rblock);
+		}
+	}
+
+	/* Unlink tblock from mm list */
+	prev->next = tblock->next;
+	realalloc -= kobjsize(tblock);
+	askedalloc -= sizeof(struct mm_tblock_struct);
+	kfree(tblock);
 }
-#endif /* DEBUG */
 
 unsigned long do_mmap_pgoff(
 	struct file * file,
@@ -274,7 +391,6 @@
 	unsigned long flags,
 	unsigned long pgoff)
 {
-	void * result;
 	struct mm_tblock_struct * tblock;
 	unsigned int vm_flags;
 
@@ -348,95 +464,42 @@
 		   or do something truly complicated. */
 		   
 		if (file->f_op->mmap) {
+			get_file(file);
 			error = file->f_op->mmap(file, &vma);
-				   
 #ifdef DEBUG
 			printk("f_op->mmap() returned %d/%lx\n", error, vma.vm_start);
 #endif
-			if (!error)
-				return vma.vm_start;
-			else if (error != -ENOSYS)
+			if (error)
+			{
+				fput(file);
 				return error;
-		} else
-			return -ENODEV; /* No mapping operations defined */
-
-		/* An ENOSYS error indicates that mmap isn't possible (as opposed to
-		   tried but failed) so we'll fall through to the copy. */
-	}
+			}
 
-	tblock = (struct mm_tblock_struct *)
-                        kmalloc(sizeof(struct mm_tblock_struct), GFP_KERNEL);
-	if (!tblock) {
-		printk("Allocation of tblock for %lu byte allocation from process %d failed\n", len, current->pid);
-		show_free_areas();
-		return -ENOMEM;
-	}
+			if (!(tblock = create_tblock(current->mm, (void *)vma.vm_start, vma.vm_end - vma.vm_start)))
+			{
+				fput(file);
+				return -ENOMEM;
+			}
 
-	tblock->rblock = (struct mm_rblock_struct *)
-			kmalloc(sizeof(struct mm_rblock_struct), GFP_KERNEL);
+			tblock->rblock->file = file;
 
-	if (!tblock->rblock) {
-		printk("Allocation of rblock for %lu byte allocation from process %d failed\n", len, current->pid);
-		show_free_areas();
-		kfree(tblock);
-		return -ENOMEM;
-	}
-
-	result = kmalloc(len, GFP_KERNEL);
-	if (!result) {
-		printk("Allocation of length %lu from process %d failed\n", len,
-				current->pid);
-		show_free_areas();
-		kfree(tblock->rblock);
-		kfree(tblock);
-		return -ENOMEM;
+		} else
+			return -ENODEV; /* No mapping operations defined */
 	}
-
-	tblock->rblock->refcount = 1;
-	tblock->rblock->kblock = result;
-	tblock->rblock->size = len;
-	
-	realalloc += kobjsize(result);
-	askedalloc += len;
-
-#ifdef WARN_ON_SLACK	
-	if ((len+WARN_ON_SLACK) <= kobjsize(result))
-		printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", len, current->pid, kobjsize(result)-len);
-#endif
-	
-	if (file) {
-		int error;
-		mm_segment_t old_fs = get_fs();
-		set_fs(KERNEL_DS);
-		error = file->f_op->read(file, (char *) result, len, &file->f_pos);
-		set_fs(old_fs);
-		if (error < 0) {
-			kfree(result);
-			kfree(tblock->rblock);
-			kfree(tblock);
-			return error;
-		}
-		if (error < len)
-			memset(result+error, '\0', len-error);
-	} else {
-		memset(result, '\0', len);
+	else
+	{
+		/* Handle anonymous mapping */
+		if (!(tblock = create_tblock(current->mm, NULL, len)))
+			return -ENOMEM;
 	}
 
-	realalloc += kobjsize(tblock);
-	askedalloc += sizeof(struct mm_tblock_struct);
-
-	realalloc += kobjsize(tblock->rblock);
-	askedalloc += sizeof(struct mm_rblock_struct);
-
-	tblock->next = current->mm->context.tblock.next;
-	current->mm->context.tblock.next = tblock;
-
 #ifdef DEBUG
 	printk("do_mmap:\n");
 	show_process_blocks();
-#endif	  
+#endif
 
-	return (unsigned long)result;
+	/* Return addr of mmapped memory block */
+	return (unsigned long) tblock->rblock->kblock;
 }
 
 int do_munmap(struct mm_struct * mm, unsigned long addr, size_t len)
@@ -466,27 +529,9 @@
 				current->pid, current->comm, (void*)addr);
 		return -EINVAL;
 	}
-	if (tblock->rblock) {
-		if (!--tblock->rblock->refcount) {
-			if (tblock->rblock->kblock) {
-				realalloc -= kobjsize(tblock->rblock->kblock);
-				askedalloc -= tblock->rblock->size;
-				kfree(tblock->rblock->kblock);
-			}
-			
-			realalloc -= kobjsize(tblock->rblock);
-			askedalloc -= sizeof(struct mm_rblock_struct);
-			kfree(tblock->rblock);
-		}
-	}
-	tmp->next = tblock->next;
-	realalloc -= kobjsize(tblock);
-	askedalloc -= sizeof(struct mm_tblock_struct);
-	kfree(tblock);
 
-#ifdef DEBUG
+	delete_tblock(tblock, tmp);
 	show_process_blocks();
-#endif	  
 
 	return -EINVAL;
 }
@@ -504,28 +549,8 @@
 #endif
 
 	while((tmp = mm->context.tblock.next)) {
-		if (tmp->rblock) {
-			if (!--tmp->rblock->refcount) {
-				if (tmp->rblock->kblock) {
-					realalloc -= kobjsize(tmp->rblock->kblock);
-					askedalloc -= tmp->rblock->size;
-					kfree(tmp->rblock->kblock);
-				}
-				realalloc -= kobjsize(tmp->rblock);
-				askedalloc -= sizeof(struct mm_rblock_struct);
-				kfree(tmp->rblock);
-			}
-			tmp->rblock = 0;
-		}
-		mm->context.tblock.next = tmp->next;
-		realalloc -= kobjsize(tmp);
-		askedalloc -= sizeof(struct mm_tblock_struct);
-		kfree(tmp);
+		delete_tblock(mm->context.tblock.next, &mm->context.tblock);
 	}
-
-#ifdef DEBUG
-	show_process_blocks();
-#endif	  
 }
 
 asmlinkage long sys_munmap(unsigned long addr, size_t len)
diff -Nru linux-2.5.73-uc0/mm/vmscan.c linux-2.5.x/mm/vmscan.c
--- linux-2.5.73-uc0/mm/vmscan.c	2003-06-22 20:32:33.000000000 +0200
+++ linux-2.5.x/mm/vmscan.c	2003-06-26 01:30:25.000000000 +0200
@@ -189,11 +189,13 @@
 	if (PageSwapCache(page))
 		return 1;
 
+#ifdef CONFIG_MMU
 	/* File is mmap'd by somebody. */
 	if (!list_empty(&mapping->i_mmap))
 		return 1;
 	if (!list_empty(&mapping->i_mmap_shared))
 		return 1;
+#endif /* CONFIG_MMU */
 
 	return 0;
 }


-- 
  // Bernardo Innocenti - Develer S.r.l., R&D dept.
\X/  http://www.develer.com/

Please don't send Word attachments - http://www.gnu.org/philosophy/no-word-attachments.html




More information about the uClinux-dev mailing list