linux內存管理之夥伴系統(建立)
linux內存管理之夥伴系統(建立)
內核使用夥伴系統來解決內存分配引起的外部碎片問題。
一、數據結構描述
結構zone中的free_area數組描述夥伴系統該數組為free_area結構
view plaincopy to clipboard01.struct zone {
02.……
03. struct free_area free_area;
04.……
05.}; view plaincopy to clipboard01.struct free_area {/*鏈表類型為5類,對於分類為新加入的*/
02. struct list_head free_list;
03. unsigned long nr_free;
04.}; 下圖為夥伴系統在管理區中的表示。
二、夥伴系統的初始化
在初始化物理管理區的時候初始化夥伴系統的,具體實現在下面的函數中:Start_kernel()->setup_arch()->paging_init()->zone_sizes_init()->free_area_init_nodes()->free_area_init_node()->free_area_init_core()->init_currently_empty_zone()->zone_init_free_lists()view plaincopy to clipboard01./*初始化對應zone中所有order和所有類型的鏈表*/
02.static void __meminit zone_init_free_lists(struct zone *zone)
03.{
04. int order, t;
05. for_each_migratetype_order(order, t) {
06. INIT_LIST_HEAD(&zone->free_area.free_list);
07. zone->free_area.nr_free = 0;
08. }
09.} 三、夥伴系統中數據初始化
將bootmem分配器中的數據回收到夥伴系統中
start_kernel()->mm_init()->mem_init()
view plaincopy to clipboard01.void __init mem_init(void)
02.{
03. int codesize, reservedpages, datasize, initsize;
04. int tmp;
05./*和具體硬體相關*/
06. pci_iommu_alloc();
07.
08.#ifdef CONFIG_FLATMEM
09. BUG_ON(!mem_map);
10.#endif
11. /* this will put all low memory onto the freelists */
12. /*釋放bootmem中的內存到夥伴系統中,包括bootmem佔有的點陣圖
13. 返回總共釋放的頁面數**/
14. totalram_pages += free_all_bootmem();
15.
16. reservedpages = 0;
17. for (tmp = 0; tmp < max_low_pfn; tmp++)
18. /*
19. * Only count reserved RAM pages:
20. */
21. if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
22. reservedpages++;
23. /*初始化高端內存區,將高端內存區放入夥伴系統中*/
24. set_highmem_pages_init();
25. /*內核代碼段、數據段、初始化端長度*/
26. codesize = (unsigned long) &_etext - (unsigned long) &_text;
27. datasize = (unsigned long) &_edata - (unsigned long) &_etext;
28. initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
29. /*列印輸出各種內存初始化后的信息*/
30. printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
31. "%dk reserved, %dk data, %dk init, %ldk highmem)\n",
32. nr_free_pages() << (PAGE_SHIFT-10),
33. num_physpages << (PAGE_SHIFT-10),
34. codesize >> 10,
35. reservedpages << (PAGE_SHIFT-10),
36. datasize >> 10,
37. initsize >> 10,
38. (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
39. );
40.
41. printk(KERN_INFO "virtual kernel memory layout:\n"
42. " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
43.#ifdef CONFIG_HIGHMEM
44. " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
45.#endif
46. " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
47. " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
48. " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
49. " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
50. " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
51. FIXADDR_START, FIXADDR_TOP,
52. (FIXADDR_TOP - FIXADDR_START) >> 10,
53.
54.#ifdef CONFIG_HIGHMEM
55. PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
56. (LAST_PKMAP*PAGE_SIZE) >> 10,
57.#endif
58.
59. VMALLOC_START, VMALLOC_END,
60. (VMALLOC_END - VMALLOC_START) >> 20,
61.
62. (unsigned long)__va(0), (unsigned long)high_memory,
63. ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
64.
65. (unsigned long)&__init_begin, (unsigned long)&__init_end,
66. ((unsigned long)&__init_end -
67. (unsigned long)&__init_begin) >> 10,
68.
69. (unsigned long)&_etext, (unsigned long)&_edata,
70. ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
71.
72. (unsigned long)&_text, (unsigned long)&_etext,
73. ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
74.
75. /*
76. * Check boundaries twice: Some fundamental inconsistencies can
77. * be detected at build time already.
78. */
79.#define __FIXADDR_TOP (-PAGE_SIZE)
80.#ifdef CONFIG_HIGHMEM
81. BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
82. BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
83.#endif
84.#define high_memory (-128UL << 20)
85. BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
86.#undef high_memory
87.#undef __FIXADDR_TOP
88.
89.#ifdef CONFIG_HIGHMEM
90. BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
91. BUG_ON(VMALLOC_END > PKMAP_BASE);
92.#endif
93. BUG_ON(VMALLOC_START >= VMALLOC_END);
94. BUG_ON((unsigned long)high_memory > VMALLOC_START);
95.
96. if (boot_cpu_data.wp_works_ok < 0)
97. test_wp_bit();
98.
99. save_pg_dir();
100. /*調用zap_low_mappings函數清low_memory的映射,內核線程只訪問內核空間是不能訪問用戶空間的
101. ,其實low_memory的映射被設置的部分也就是當初為
102. 8MB建立的恆等映射填充了臨時內核頁全局目錄的第0項,第1項
103. 這裡將用戶空間的頁目錄項<3G的PGD清0;*/
104. zap_low_mappings(true);
105.} view plaincopy to clipboard01./**
02. * free_all_bootmem - release free pages to the buddy allocator
03. *
04. * Returns the number of pages actually released.
05. */
06.unsigned long __init free_all_bootmem(void)
07.{
08. return free_all_bootmem_core(NODE_DATA(0)->bdata);
09.} view plaincopy to clipboard01.static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
02.{
03. int aligned;
04. struct page *page;
05. unsigned long start, end, pages, count = 0;
06.
07. if (!bdata->node_bootmem_map)
08. return 0;
09. /*節點內存開始和結束處*/
10. start = bdata->node_min_pfn;
11. end = bdata->node_low_pfn;
12.
13. /*
14. * If the start is aligned to the machines wordsize, we might
15. * be able to free pages in bulks of that order.
16. */
17. aligned = !(start & (BITS_PER_LONG - 1));
18.
19. bdebug("nid=%td start=%lx end=%lx aligned=%d\n",
20. bdata - bootmem_node_data, start, end, aligned);
21. /*用於釋放整個bootmem所涉及的內存*/
22. while (start < end) {
23. unsigned long *map, idx, vec;
24.
25. map = bdata->node_bootmem_map;
26. idx = start - bdata->node_min_pfn;/*相對於開始處的偏移*/
27. vec = ~map;/*vec值為頁面分配情況*/
28. /*如果開始地址以32位對其、連續的32個頁面都沒有被分配(空閑),並且
29. 釋放起點以上的32個頁面都是合法的(不超過end值),則釋放連續的32個
30. 頁面,即1<<5個頁面*/
31. if (aligned && vec == ~0UL && start + BITS_PER_LONG < end) {
32. int order = ilog2(BITS_PER_LONG);/*32位下為5*/
33. /*釋放到夥伴系統中*/
34. __free_pages_bootmem(pfn_to_page(start), order);
35. count += BITS_PER_LONG;/*釋放的總頁面數更新*/
36. } else {
37. unsigned long off = 0;
38. /*vec!=0表示這個區間存在頁面空閑,off為這個區間的下標,從0開始*/
39. while (vec && off < BITS_PER_LONG) {
40. if (vec & 1) {/*如果頁面空閑*/
41. /*偏移轉化為具體的頁面*/
42. page = pfn_to_page(start + off);
43. /*一個頁面一個頁面的釋放*/
44. __free_pages_bootmem(page, 0);/*釋放單個頁面*/
45. count++;/*更新釋放頁面總數*/
46. }
47. vec >>= 1;/*vec向右移動一位,表示訪問下一個頁面*/
48. off++;/*偏移加一*/
49. }
50. }
51. start += BITS_PER_LONG;/*偏移向後移動*/
52. }
53. /*虛擬地址轉化為page
54. 用於釋放bdata中的點陣圖所佔有的內存*/
55. page = virt_to_page(bdata->node_bootmem_map);
56. pages = bdata->node_low_pfn - bdata->node_min_pfn;
57.
58. /*計算bootmem分配器中所使用的頁面數,即點陣圖使用的頁面數*/
59. pages = bootmem_bootmap_pages(pages);
60. count += pages;/*釋放的總頁面數加*/
61. while (pages--)/*每次釋放一個頁面,釋放
62. 總共的pages個頁面*/
63. __free_pages_bootmem(page++, 0);
64.
65. bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
66.
67. return count;/*返回釋放的總頁面數*/
68.} view plaincopy to clipboard01./*
02. * permit the bootmem allocator to evade page validation on high-order frees
03. */
04.void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
05.{
06. if (order == 0) {
07. __ClearPageReserved(page);
08. set_page_count(page, 0);/*設置頁面的引用位為0*/
09. set_page_refcounted(page);/*設置頁面的引用位為1*/
10. __free_page(page);/*釋放單個頁面到夥伴系統中*/
11. } else {
12. int loop;
13.
14. /*這個不是很明白,可能和特定的體系相關???*/
15. prefetchw(page);
16. for (loop = 0; loop < BITS_PER_LONG; loop++) {
17. struct page *p = &page;
18.
19. if (loop + 1 < BITS_PER_LONG)
20. prefetchw(p + 1);
21. __ClearPageReserved(p);
22. set_page_count(p, 0);
23. }
24.
25. set_page_refcounted(page);/*設置頁面的引用計數為1*/
26. /*這裡具體釋放到那個類型裡面,
27. 要看page的裡面具體的東西,也就是
28. 可以用相關函數來獲取他所屬的類型*/
29. __free_pages(page, order);/*釋放order個頁面*/
30. }
31.} view plaincopy to clipboard01.void __init set_highmem_pages_init(void)
02.{
03. struct zone *zone;
04. int nid;
05.
06. for_each_zone(zone) {
07. unsigned long zone_start_pfn, zone_end_pfn;
08.
09. if (!is_highmem(zone))/*驗證是否屬於高端內存區域中*/
10. /*如果不屬於,將不執行下面的操作*/
11. continue;
12.
13. zone_start_pfn = zone->zone_start_pfn;
14. zone_end_pfn = zone_start_pfn + zone->spanned_pages;
15. /*返回zone中的node的id*/
16. nid = zone_to_nid(zone);
17. printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
18. zone->name, nid, zone_start_pfn, zone_end_pfn);
19. /*將區間中的內存放到夥伴系統中*/
20. add_highpages_with_active_regions(nid, zone_start_pfn,
21. zone_end_pfn);
22. }
23. totalram_pages += totalhigh_pages;
24.} view plaincopy to clipboard01.void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
02. unsigned long end_pfn)
03.{
04. struct add_highpages_data data;
05.
06. data.start_pfn = start_pfn;
07. data.end_pfn = end_pfn;
08. /*對節點中的每個區域進行頁面的回收到夥伴系統中*/
09. work_with_active_regions(nid, add_highpages_work_fn, &data);
10.} view plaincopy to clipboard01./*用指定函數來操作活動區,在高端內存初始化時用了*/
02.void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
03.{
04. int i;
05. int ret;
06.
07. for_each_active_range_index_in_nid(i, nid) {
08. ret = work_fn(early_node_map.start_pfn,
09. early_node_map.end_pfn, data);
10. if (ret)
11. break;
12. }
13.} view plaincopy to clipboard01.static int __init add_highpages_work_fn(unsigned long start_pfn,
02. unsigned long end_pfn, void *datax)
03.{
04. int node_pfn;
05. struct page *page;
06. unsigned long final_start_pfn, final_end_pfn;
07. struct add_highpages_data *data;
08.
09. data = (struct add_highpages_data *)datax;
10. /*活動內存區間與指定考慮區間交集*/
11. final_start_pfn = max(start_pfn, data->start_pfn);
12. final_end_pfn = min(end_pfn, data->end_pfn);
13. if (final_start_pfn >= final_end_pfn)
14. return 0;
15.
16. for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
17. node_pfn++) {
18. if (!pfn_valid(node_pfn))/*驗證頁面是否有效*/
19. continue;
20. page = pfn_to_page(node_pfn);/*將下標轉換為具體的頁面*/
21. /*初始化頁面的count值,將頁面釋放到夥伴系統中*/
22. add_one_highpage_init(page, node_pfn);
23. }
24.
25. return 0;
26.
27.} view plaincopy to clipboard01.static void __init add_one_highpage_init(struct page *page, int pfn)
02.{
03. /*ClearPageReserved清除了該頁面flag中的reserved標誌,表示該頁面屬於動態內存*/
04. ClearPageReserved(page);
05. init_page_count(page);/*設置page的count值為1*/
06. __free_page(page); /*釋放頁面到夥伴系統*/
07. totalhigh_pages++;/*更新高端頁面總數*/
08.} view plaincopy to clipboard01.void zap_low_mappings(bool early)
02.{
03. int i;
04.
05. /*
06. * Zap initial low-memory mappings.
07. *
08. * Note that "pgd_clear()" doesn't do it for
09. * us, because pgd_clear() is a no-op on i386.
10. */
11. /*這個函數很簡單,就是把前面我們在arch/x86/kernel/head_32.S中設置的頁全局目錄的前若干項清零
12. 。這若干項到底是多少
13. 不錯,0xc0000000>>22 & 1023= 768,這些也全局目錄項代表虛擬地址前3G的頁面,也就是所謂的用戶區
14. ,我們在這裡把它全清零了。*/
15. for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
16.#ifdef CONFIG_X86_PAE
17. set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
18.#else
19. set_pgd(swapper_pg_dir+i, __pgd(0));
20.#endif
21. }
22.
23. if (early)
24. __flush_tlb();
25. else
26. flush_tlb_all();
27.} 到此,夥伴系統已經建立並且裡面存放了應有的內存數據。要從夥伴系統中分配內存,必須要有分配和釋放機制。後面總結具體的分配和釋放工作。
《解決方案》
謝謝分享