Linux内核最新的连续内存分配器(CMA)——避免预留大块内存 .

http://blog.csdn.net/21cnbao/article/details/7309757

在我们使用ARM等嵌入式Linux系统的时候,一个头疼的问题是GPU,Camera,HDMI等都需要预留大量连续内存,这部分内存平时不用,但是一般的做法又必须先预留着。目前,Marek Szyprowski和Michal Nazarewicz实现了一套全新的Contiguous Memory Allocator。通过这套机制,我们可以做到不预留内存,这些内存平时是可用的,只有当需要的时候才被分配给Camera,HDMI等设备。下面分析它的基本代码流程。

声明连续内存

内核启动过程中arch/arm/mm/init.c中的arm_memblock_init()会调用dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit));

该函数位于:drivers/base/dma-contiguous.c

[cpp] view plaincopyprint?

    /***dma_contiguous_reserve()-reserveareaforcontiguousmemoryhandling*@limit:Endaddressofthereservedmemory(optional,0forany).**Thisfunctionreservesmemoryfromearlyallocator.Itshouldbe*calledbyarchspecificcodeoncetheearlyallocator(memblockorbootmem)*hasbeenactivatedandallothersubsystemshavealreadyallocated/reserved*memory.*/void__initdma_contiguous_reserve(phys_addr_tlimit){unsignedlongselected_size=0;pr_debug("%s(limit%08lx)\n",__func__,(unsignedlong)limit);if(size_cmdline!=-1){selected_size=size_cmdline;}else{#ifdefCONFIG_CMA_SIZE_SEL_MBYTESselected_size=size_bytes;#elifdefined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)selected_size=cma_early_percent_memory();#elifdefined(CONFIG_CMA_SIZE_SEL_MIN)selected_size=min(size_bytes,cma_early_percent_memory());#elifdefined(CONFIG_CMA_SIZE_SEL_MAX)selected_size=max(size_bytes,cma_early_percent_memory());#endif }if(selected_size){pr_debug("%s:reserving%ldMiBforglobalarea\n",__func__,selected_size/SZ_1M);dma_declare_contiguous(NULL,selected_size,0,limit);}};

其中的size_bytes定义为:

static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M; 默认情况下,CMA_SIZE_MBYTES会被定义为16MB,来源于CONFIG_CMA_SIZE_MBYTES=16

->

[cpp] view plaincopyprint?

    int__initdma_declare_contiguous(structdevice*dev,unsignedlongsize,phys_addr_tbase,phys_addr_tlimit){…/*Reservememory*/if(base){if(memblock_is_region_reserved(base,size)||memblock_reserve(base,size)<0){base=-EBUSY;gotoerr;}}else{/**Use__memblock_alloc_base()since*memblock_alloc_base()panic()s.*/phys_addr_taddr=__memblock_alloc_base(size,alignment,limit);if(!addr){base=-ENOMEM;gotoerr;}elseif(addr+size>~(unsignedlong)0){memblock_free(addr,size);base=-EINVAL;base=-EINVAL;gotoerr;}else{base=addr;}}/**Eachreservedareamustbeinitialisedlater,whenmorekernel*subsystems(likeslaballocator)areavailable.*/r->start=base;r->size=size;r->dev=dev;cma_reserved_count++;pr_info("CMA:reserved%ldMiBat%08lx\n",size/SZ_1M,(unsignedlong)base);/*Architecturespecificcontiguousmemoryfixup.*/dma_contiguous_early_fixup(base,size);return0;err:pr_err("CMA:failedtoreserve%ldMiB\n",size/SZ_1M);returnbase;}

由此可见,连续内存区域也是在内核启动的早期,通过__memblock_alloc_base()拿到的。

另外:

drivers/base/dma-contiguous.c里面的core_initcall()会导致cma_init_reserved_areas()被调用:

[cpp] view plaincopyprint?

    staticint__initcma_init_reserved_areas(void){structcma_reserved*r=cma_reserved;unsignedi=cma_reserved_count;pr_debug("%s()\n",__func__);for(;i;–i,++r){structcma*cma;cma=cma_create_area(PFN_DOWN(r->start),r->size>>PAGE_SHIFT);if(!IS_ERR(cma))dev_set_cma_area(r->dev,cma);}return0;}core_initcall(cma_init_reserved_areas);

cma_create_area()会调用cma_activate_area(),cma_activate_area()函数则会针对每个page调用:

init_cma_reserved_pageblock(pfn_to_page(base_pfn));

这个函数则会通过set_pageblock_migratetype(page, MIGRATE_CMA)将页设置为MIGRATE_CMA类型的:

[cpp] view plaincopyprint?

    #ifdefCONFIG_CMA /*Freewholepageblockandsetit’smigrationtypetoMIGRATE_CMA.*/void__initinit_cma_reserved_pageblock(structpage*page){unsignedi=pageblock_nr_pages;structpage*p=page;do{__ClearPageReserved(p);set_page_count(p,0);}while(++p,–i);set_page_refcounted(page);set_pageblock_migratetype(page,MIGRATE_CMA);__free_pages(page,pageblock_order);totalram_pages+=pageblock_nr_pages;}#endif

同时其中调用的__free_pages(page, pageblock_order);最终会调用到__free_one_page(page, zone, order, migratetype);相关的page会被加到MIGRATE_CMA的free_list上面去:

list_add(&page->lru, &zone->free_area[order].free_list[migratetype]);

申请连续内存

申请连续内存仍然使用标准的arch/arm/mm/dma-mapping.c中定义的dma_alloc_coherent()和dma_alloc_writecombine(),这二者会间接调用drivers/base/dma-contiguous.c中的

[cpp] view plaincopyprint?

    structpage*dma_alloc_from_contiguous(structdevice*dev,intcount,unsignedintalign)

->

[cpp] view plaincopyprint?

    structpage*dma_alloc_from_contiguous(structdevice*dev,intcount,unsignedintalign){…for(;;){pageno=bitmap_find_next_zero_area(cma->bitmap,cma->count,start,count,mask);if(pageno>=cma->count){ret=-ENOMEM;gotoerror;}pfn=cma->base_pfn+pageno;ret=alloc_contig_range(pfn,pfn+count,MIGRATE_CMA);if(ret==0){bitmap_set(cma->bitmap,pageno,count);break;}elseif(ret!=-EBUSY){gotoerror;}pr_debug("%s():memoryrangeat%pisbusy,retrying\n",__func__,pfn_to_page(pfn));/*tryagainwithabitdifferentmemorytarget*/start=pageno+mask+1;}…}

->

int alloc_contig_range(unsigned long start, unsigned long end,

unsigned migratetype)

需要隔离page,隔离page的作用通过代码的注释可以体现:

[cpp] view plaincopyprint?

    /**Whatwedohereiswemarkallpageblocksinrangeas*MIGRATE_ISOLATE.Becauseofthewaypageallocatorwork,we*aligntherangetoMAX_ORDERpagessothatpageallocator*won’ttrytomergebuddiesfromdifferentpageblocksand*changeMIGRATE_ISOLATEtosomeothermigrationtype.**OncethepageblocksaremarkedasMIGRATE_ISOLATE,we*migratethepagesfromanunalignedrange(ie.pagesthat*weareinterestedin).Thiswillputallthepagesin*rangebacktopageallocatorasMIGRATE_ISOLATE.**Whenthisisdone,wetakethepagesinrangefrompage*allocatorremovingthemfromthebuddysystem.Thisway*pageallocatorwillneverconsiderusingthem.**Thisletsusmarkthepageblocksbackas*MIGRATE_CMA/MIGRATE_MOVABLEsothatfreepagesinthe*MAX_ORDERalignedrangebutnotintheunaligned,original*rangeareputbacktopageallocatorsothatbuddycanuse*them.*/ret=start_isolate_page_range(pfn_align_to_maxpage_down(start),pfn_align_to_maxpage_up(end),migratetype);

简单地说,就是把相关的page标记为MIGRATE_ISOLATE,这样buddy系统就不会再使用他们。

[cpp] view plaincopyprint?

    /**start_isolate_page_range()–makepage-allocation-typeofrangeofpages*tobeMIGRATE_ISOLATE.*@start_pfn:ThelowerPFNoftherangetobeisolated.*@end_pfn:TheupperPFNoftherangetobeisolated.*@migratetype:migratetypetosetinerrorrecovery.**Makingpage-allocation-typetobeMIGRATE_ISOLATEmeansfreepagesin*therangewillneverbeallocated.Anyfreepagesandpagesfreedinthe*futurewillnotbeallocatedagain.**start_pfn/end_pfnmustbealignedtopageblock_order.*Returns0onsuccessand-EBUSYifanypartofrangecannotbeisolated.*/intstart_isolate_page_range(unsignedlongstart_pfn,unsignedlongend_pfn,unsignedmigratetype){unsignedlongpfn;unsignedlongundo_pfn;structpage*page;BUG_ON((start_pfn)&(pageblock_nr_pages-1));BUG_ON((end_pfn)&(pageblock_nr_pages-1));for(pfn=start_pfn;pfn<end_pfn;pfn+=pageblock_nr_pages){page=__first_valid_page(pfn,pageblock_nr_pages);if(page&&set_migratetype_isolate(page)){undo_pfn=pfn;gotoundo;}}return0;undo:for(pfn=start_pfn;pfn<undo_pfn;pfn+=pageblock_nr_pages)unset_migratetype_isolate(pfn_to_page(pfn),migratetype);return-EBUSY;}

接下来调用__alloc_contig_migrate_range()进行页面隔离和迁移:

[cpp] view plaincopyprint?

    staticint__alloc_contig_migrate_range(unsignedlongstart,unsignedlongend){/*Thisfunctionisbasedoncompact_zone()fromcompaction.c.*/unsignedlongpfn=start;unsignedinttries=0;intret=0;structcompact_controlcc={.nr_migratepages=0,.order=-1,.zone=page_zone(pfn_to_page(start)),.sync=true,};INIT_LIST_HEAD(&cc.migratepages);migrate_prep_local();while(pfn<end||!list_empty(&cc.migratepages)){if(fatal_signal_pending(current)){ret=-EINTR;break;}if(list_empty(&cc.migratepages)){cc.nr_migratepages=0;pfn=isolate_migratepages_range(cc.zone,&cc,pfn,end);if(!pfn){ret=-EINTR;break;}tries=0;}elseif(++tries==5){ret=ret<0?ret:-EBUSY;break;}ret=migrate_pages(&cc.migratepages,__alloc_contig_migrate_alloc,0,false,true);}putback_lru_pages(&cc.migratepages);returnret>0?0:ret;}

其中的函数migrate_pages()会完成页面的迁移,迁移过程中通过传入的__alloc_contig_migrate_alloc()申请新的page,并将老的page付给新的page:

[cpp] view plaincopyprint?

    intmigrate_pages(structlist_head*from,new_page_tget_new_page,unsignedlongprivate,boolofflining,boolsync){intretry=1;intnr_failed=0;intpass=0;structpage*page;structpage*page2;intswapwrite=current->flags&PF_SWAPWRITE;intrc;if(!swapwrite)current->flags|=PF_SWAPWRITE;for(pass=0;pass<10&&retry;pass++){retry=0;list_for_each_entry_safe(page,page2,from,lru){cond_resched();rc=unmap_and_move(get_new_page,private,page,pass>2,offlining,sync);switch(rc){case-ENOMEM:gotoout;case-EAGAIN:retry++;break;case0:break;default:/*Permanentfailure*/nr_failed++;break;}}}rc=0;…}

其中的unmap_and_move()函数较为关键,它定义在mm/migrate.c中

[cpp] view plaincopyprint?

    /**Obtainthelockonpage,removeallptesandmigratethepage*tothenewlyallocatedpageinnewpage.*/staticintunmap_and_move(new_page_tget_new_page,unsignedlongprivate,structpage*page,intforce,boolofflining,boolsync){intrc=0;int*result=NULL;structpage*newpage=get_new_page(page,private,&result);intremap_swapcache=1;intcharge=0;structmem_cgroup*mem=NULL;structanon_vma*anon_vma=NULL;…/*chargeagainstnewpage*/charge=mem_cgroup_prepare_migration(page,newpage,&mem);…if(PageWriteback(page)){if(!force||!sync)gotouncharge;wait_on_page_writeback(page);}/**Bytry_to_unmap(),page->mapcountgoesdownto0here.Inthiscase,*wecannotnoticethatanon_vmaisfreedwhilewemigratesapage.*Thisget_anon_vma()delaysfreeinganon_vmapointeruntiltheend*ofmigration.Filecachepagesarenoproblembecauseofpage_lock()*FileCachesmayusewrite_page()orlock_page()inmigration,then,*justcareAnonpagehere.*/if(PageAnon(page)){/**Onlypage_lock_anon_vma()understandsthesubtletiesof*gettingaholdonananon_vmafromoutsideoneofitsmms.*/anon_vma=page_lock_anon_vma(page);if(anon_vma){/**Takeareferencecountontheanon_vmaifthe*pageismappedsothatitisguaranteedto*existwhenthepageisremappedlater*/get_anon_vma(anon_vma);page_unlock_anon_vma(anon_vma);}elseif(PageSwapCache(page)){/**Wecannotbesurethattheanon_vmaofanunmapped*swapcachepageissafetousebecausewedon’t*knowinadvanceiftheVMAthatthispagebelonged*tostillexists.IftheVMAandotherssharingthe*datahavebeenfreed,thentheanon_vmacould*alreadybeinvalid.**Toavoidthispossibility,swapcachepagesget*migratedbutarenotremappedwhenmigration*completes*/remap_swapcache=0;}else{gotouncharge;}}…/*Establishmigrationptesorremoveptes*/try_to_unmap(page,TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);skip_unmap:if(!page_mapped(page))rc=move_to_new_page(newpage,page,remap_swapcache);if(rc&&remap_swapcache)remove_migration_ptes(page,page);/*Dropananon_vmareferenceifwetookone*/if(anon_vma)drop_anon_vma(anon_vma);uncharge:if(!charge)mem_cgroup_end_migration(mem,page,newpage,rc==0);unlock:unlock_page(page);move_newpage:…}

通过unmap_and_move(),老的page就被迁移过去新的page。

接下来要回收page,回收page的作用是,不至于因为拿了连续的内存后,系统变得内存饥饿:

->

[cpp] view plaincopyprint?

    /**Reclaimenoughpagestomakesurethatcontiguousallocation*willnotstarvethesystem.*/__reclaim_pages(zone,GFP_HIGHUSER_MOVABLE,end-start);

->

[cpp] view plaincopyprint?

    /**Triggermemorypressurebumptoreclaimsomepagesinordertobeableto*allocate’count’pagesinsinglepageunits.Doessimilarworkas*__alloc_pages_slowpath()function.*/staticint__reclaim_pages(structzone*zone,gfp_tgfp_mask,intcount){enumzone_typehigh_zoneidx=gfp_zone(gfp_mask);structzonelist*zonelist=node_zonelist(0,gfp_mask);intdid_some_progress=0;intorder=1;unsignedlongwatermark;/**Increaselevelofwatermarkstoforcekswapddohisjob*tostabiliseatnewwatermarklevel.*/__update_cma_watermarks(zone,count);/*Obeywatermarksasifthepagewasbeingallocated*/watermark=low_wmark_pages(zone)+count;while(!zone_watermark_ok(zone,0,watermark,0,0)){wake_all_kswapd(order,zonelist,high_zoneidx,zone_idx(zone));did_some_progress=__perform_reclaim(gfp_mask,order,zonelist,NULL);if(!did_some_progress){/*Exhaustedwhatcanbedonesoit’sblamotime*/out_of_memory(zonelist,gfp_mask,order,NULL);}}/*Restoreoriginalwatermarklevels.*/__update_cma_watermarks(zone,-count);returncount;}

释放连续内存内存释放的时候也比较简单,直接就是:

arch/arm/mm/dma-mapping.c:

[cpp] view plaincopyprint?

    voiddma_free_coherent(structdevice*dev,size_tsize,void*cpu_addr,dma_addr_thandle)

->

arch/arm/mm/dma-mapping.c:

[cpp] view plaincopyprint?

    staticvoid__free_from_contiguous(structdevice*dev,structpage*page,size_tsize){__dma_remap(page,size,pgprot_kernel);dma_release_from_contiguous(dev,page,size>>PAGE_SHIFT);}

->

[cpp] view plaincopyprint?

    booldma_release_from_contiguous(structdevice*dev,structpage*pages,intcount){…free_contig_range(pfn,count);..}

->

[cpp] view plaincopyprint?

    voidfree_contig_range(unsignedlongpfn,unsignednr_pages){for(;nr_pages–;++pfn)__free_page(pfn_to_page(pfn));}

将page交还给buddy。

内核内存分配的migratetype

内核内存分配的时候,带的标志是GFP_,但是GFP_可以转化为migratetype:

[cpp] view plaincopyprint?

    staticinlineintallocflags_to_migratetype(gfp_tgfp_flags){WARN_ON((gfp_flags&GFP_MOVABLE_MASK)==GFP_MOVABLE_MASK);if(unlikely(page_group_by_mobility_disabled))returnMIGRATE_UNMOVABLE;/*Groupbasedonmobility*/return(((gfp_flags&__GFP_MOVABLE)!=0)<<1)|((gfp_flags&__GFP_RECLAIMABLE)!=0);}

之后申请内存的时候,会对比迁移类型匹配的free_list:

[cpp] view plaincopyprint?

    page=get_page_from_freelist(gfp_mask|__GFP_HARDWALL,nodemask,order,zonelist,high_zoneidx,ALLOC_WMARK_LOW|ALLOC_CPUSET,preferred_zone,migratetype);

另外,笔者也编写了一个测试程序,透过它随时测试CMA的功能:

[cpp] view plaincopyprint?

    /**kernelmodulehelperfortestingCMA**LicensedunderGPLv2orlater.*/#include<linux/module.h>#include<linux/device.h> #include<linux/fs.h> #include<linux/miscdevice.h> #include<linux/dma-mapping.h>#defineCMA_NUM10 staticstructdevice*cma_dev;staticdma_addr_tdma_phys[CMA_NUM];staticvoid*dma_virt[CMA_NUM];/*anyreadrequestwillfreecoherentmemory,eg.*cat/dev/cma_test*/staticssize_tcma_test_read(structfile*file,char__user*buf,size_tcount,loff_t*ppos){inti;for(i=0;i<CMA_NUM;i++){if(dma_virt[i]){dma_free_coherent(cma_dev,(i+1)*SZ_1M,dma_virt[i],dma_phys[i]);_dev_info(cma_dev,"freevirt:%pphys:%p\n",dma_virt[i],(void*)dma_phys[i]);dma_virt[i]=NULL;break;}}return0;}/**anywriterequestwillalloccoherentmemory,eg.*echo0>/dev/cma_test*/staticssize_tcma_test_write(structfile*file,constchar__user*buf,size_tcount,loff_t*ppos){inti;intret;for(i=0;i<CMA_NUM;i++){if(!dma_virt[i]){dma_virt[i]=dma_alloc_coherent(cma_dev,(i+1)*SZ_1M,&dma_phys[i],GFP_KERNEL);if(dma_virt[i]){void*p;/*toucheverypageintheallocatedmemory*/for(p=dma_virt[i];p<dma_virt[i]+(i+1)*SZ_1M;p+=PAGE_SIZE)*(u32*)p=0;_dev_info(cma_dev,"allocvirt:%pphys:%p\n",dma_virt[i],(void*)dma_phys[i]);}else{dev_err(cma_dev,"nomeminCMAarea\n");ret=-ENOMEM;}break;}}returncount;}staticconststructfile_operationscma_test_fops={.owner=THIS_MODULE,.read=cma_test_read,.write=cma_test_write,};staticstructmiscdevicecma_test_misc={.name="cma_test",.fops=&cma_test_fops,};staticint__initcma_test_init(void){intret=0;ret=misc_register(&cma_test_misc);if(unlikely(ret)){pr_err("failedtoregistercmatestmiscdevice!\n");returnret;}cma_dev=cma_test_misc.this_device;cma_dev->coherent_dma_mask=~0;_dev_info(cma_dev,"registered.\n");returnret;}module_init(cma_test_init);staticvoid__exitcma_test_exit(void){misc_deregister(&cma_test_misc);}module_exit(cma_test_exit);MODULE_LICENSE("GPL");MODULE_AUTHOR("BarrySong<21cnbao@gmail.com>");MODULE_DESCRIPTION("kernelmoduletohelpthetestofCMA");MODULE_ALIAS("CMAtest");

申请内存:

[plain] view plaincopyprint?

    #echo0>/dev/cma_test

释放内存:

[plain] view plaincopyprint?

    #cat/dev/cma_test

参考链接:

[1] http://www.spinics.net/lists/arm-kernel/msg160854.html

[2] http://www.spinics.net/lists/arm-kernel/msg162063.html

[3] http://lwn.net/Articles/447405/

接受我们不能改变的一切,改变我们能改变的一切。

Linux内核最新的连续内存分配器(CMA)——避免预留大块内存 .

相关文章:

你感兴趣的文章:

标签云: