Index: linux/arch/ia64/mm/hugetlbpage.c
===================================================================
--- linux.orig/arch/ia64/mm/hugetlbpage.c	2004-09-15 15:25:16.000000000 -0700
+++ linux/arch/ia64/mm/hugetlbpage.c	2004-09-17 15:54:43.000000000 -0700
@@ -43,13 +43,23 @@
 static struct page *dequeue_huge_page(struct vm_area_struct *vma, unsigned long addr)
 {
 	int nid = mpol_first_node(vma, addr); 
+	int tid, nid2;
 	struct page *page = NULL;
 
 	if (list_empty(&hugepage_freelists[nid])) {
-		for (nid = 0; nid < MAX_NUMNODES; ++nid)
-			if (mpol_node_valid(nid, vma, addr) && 
-			    !list_empty(&hugepage_freelists[nid]))
-				break;
+		/* Prefer the neighboring nodes for hugepage allocation */
+		for (tid =1 ; tid < MAX_NUMNODES; tid++)
+			nid2 = (nid + tid) % MAX_NUMNODES;
+			if (mpol_node_valid(nis2, vma, addr) &&
+				!list_empty(&hugepage_freelists[nid2]))
+					break;
+			if (tid > nid) continue;
+			nid2 = (nid - tid) % MAX_NUMNODES;
+			if (mpol_node_valid(nis2, vma, addr) &&
+				!list_empty(&hugepage_freelists[nid2]))
+					break;
+		}
+		nid = nid2;
 	}
 	if (nid >= 0 && nid < MAX_NUMNODES &&
 	    !list_empty(&hugepage_freelists[nid])) {
@@ -87,6 +97,27 @@
 	return page;
 }
 
+/* variation on the above. acquire htlbpage_lock as in 2.6.9-rc2 */
+static struct page *__alloc_hugetlb_page2(struct vm_area_struct *vma, unsigned long addr)
+{
+	int i;
+	struct page *page;
+
+	spin_lock(&htlbpage_lock);
+	page = dequeue_huge_page(vma, addr);
+	if (!page) {
+		spin_unlock(&htlbpage_lock);
+		return NULL;
+	}
+	htlbpagemem[page_zone(page)->zone_pgdat->node_id]--;
+	spin_unlock(&htlbpage_lock);
+	set_page_count(page, 1);
+	page->lru.prev = (void *)free_huge_page;
+	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
+		clear_highpage(&page[i]);
+	return page;
+}
+
 static pte_t *
 huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
 {
@@ -659,6 +690,7 @@
 	ret = VM_FAULT_MINOR;
 	if (unlikely(!pte_none(*pte)))
 		goto out;
+	spin_unlock(&mm->page_table_lock);
 
 	mapping = vma->vm_file->f_mapping;
 	idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
@@ -666,22 +698,19 @@
  retry:
 	page = find_get_page(mapping, idx);
 	if (!page) {
-		spin_lock(&htlbpage_lock);
 
 		/* Should do this at prefault time, but that gets us into
 		   trouble with freeing right now. We do a quick overcommit 
 		   check instead. */
 		ret = hugetlb_get_quota(mapping);
 		if (ret) {
-			spin_unlock(&htlbpage_lock);
 			ret = VM_FAULT_OOM;
-			goto out;
+			goto out2;
 		}
 		
-		page = __alloc_hugetlb_page(vma, addr);
+		page = __alloc_hugetlb_page2(vma, addr);
 		if (!page) {
 			hugetlb_put_quota(mapping);
-			spin_unlock(&htlbpage_lock);
 			
 			/* Instead of OOMing here could just transparently use
 			   small pages. */
@@ -689,7 +718,7 @@
 				       current->comm, current->pid);
 			
 			ret = VM_FAULT_OOM;
-			goto out;
+			goto out2;
 		}
 		ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
 		if (likely(!ret))
@@ -698,21 +727,24 @@
 			hugetlb_put_quota(mapping);
 			if (put_page_testzero(page))
 				__free_huge_page(page);
-			spin_unlock(&htlbpage_lock);
 			if (ret == -EEXIST)
 				goto retry;
 			ret = VM_FAULT_SIGBUS;
-			goto out;
+			goto out2;
 		}
-		spin_unlock(&htlbpage_lock);
 		ret = VM_FAULT_MAJOR; 
 	} else
 		ret = VM_FAULT_MINOR;
-		
-	set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+	
+	spin_lock(&mm->page_table_lock);
 
+	if (pte_none(*pte))
+		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+	else
+		page_cache_release(page);
 out:
 	spin_unlock(&mm->page_table_lock);
+out2:
 	return ret;
 }