linux device driver

内核编程 函数总结以及模版 LINUX驱动详解

一,并发控制,concurrency同时操作相同的资源(hardware,memory,data)会引起并发.SMP,process preempt,interrupt,Tasklet,bottom half都会引起concurrnecy.在临界区(critical section)下解决竞争条件(race condition)的用互斥方法,包括:forbid interrupt,amotic operation,spin lock,semaphore中断屏蔽:可以避免中断和内核抢占进程资源(linux task schdule depend on interrupt),但不能SMP的并发,也不能常时间forbid(影响异步IO和进程调度).local_irq_disable()…critical section…local_irq_enable()local_irq_save() //关中断并保存屏蔽字lcoal_irq_restore()local_bh_disable()local_bh_enable()原子操作:void atomic_set( atomic_t *v, int i )atomic_t v = ATOMIC_INIT( 0 )atomic_read( atomic_t *v )void atomic_add( int i, atomic_t *v )void atomic_sub( int i, atomic *v )void atomic_inc( atomic_t *v )void atomic_dec( atomic_t *v )int atomic_inc_and_test( atomic_t *v )int atomic_dec_and_test( atomic_t *v )int atomic_sub_and_test( int i, atomic_t *v )int atomic_add_return( int i, atomic_t *v )int atomic_sub_return( int i, atomic_t *v )int atomic_inc_return( atomic_t *v )int atomic_dec_return( atomic_t *v )void set_bit( nr, void *addr ) //nr,addr的第nr位void clear_bit( nr, void *addr )void change_bit( nr, void *addr )int test_bit( nr, void *addr )int test_and_set_bit( nr, void *addr )int test_and_clear_bit( nr, void *addr )int test_and_change_bit( nr, void *addr )自旋锁:spin lock原子操作test-and-set某个内存变量,并忙等待(就是原地打转),所以不能等待大的critical section,另外要防止递归,copy_from_user,kmalloc等block引起的死锁.spinlock_t lock;spin_lock_init( &lock );spin_lock( &lock );spin_trylock( &lock );尽管自旋锁不受SMP和进程调度影响,但会受irq和bh影响,联合使用.spin_lock_irq(); spin_unlock_irq(); spin_lock_irqsave(); spin_unlock_irqrestore();spin_lock_bh(); spin_unlock_bh();读写自旋锁:rwlock读锁和写锁分开.rwlock_t my_rwlock = RW_LOCK_UNLOCKED;rwlock_init( &my_rwlock );read_lock( &lock );…read_unlock( &lock );write_lock_irqsave( &lock, flags );…write_unlock_irqrestore( &lock, flags );顺序锁:seqlock写锁互斥,读锁可重复,当读时执行了写操作,重新读取.void write_seqlock( seqlock_t *sl );int write_tryseqlock( seqlock_t *sl );void write_sequnlock( seqlock_t *sl );unsigned read_seqbegin( const seqlock_t *sl );int read_seqretry( const seqlock_t *sl, unsigned iv );do{ seqnum = read_seqbegin_irqsave( &seqlock_a, flags ); …}while( read_seqretry_irqrestore( &seqlock_a, seqnum, flags) );RCU:read-copy update机制在2.5.43引入,读不再受限制,写时先备份副本,修改副本,在所有对共享数据的操作完成时,用回调机制改变指针指到副本.rcu_read_lock() 相当于preempt_disable()rcu_read_unlock() 相当于preempt_enable()rcu_read_lock_bh() 相当于local_bh_disable()rcu_read_unlock_bh() 相当于local_bh_enable()synchronize_rcu(); rcu写单元调用,Block直到读完成.synchronzie_kernel(); 用来等待所有cpu处于可抢占状态.synchronize_sched()void fastcall call_rcu( struct rcu_head *head, void (*func)( struct rcu_head *rcu ) ); 挂接回调函数,注册到rcu_datavoid fastcall call_rcu_bh( struct rcu_head *head, void (*func)( struct rcu_head *rcu ) ); 挂接回调函数,注册到rcu_bh_datastatic inline void list_add_rcu( struct list_head *new, struct list_head *head ); rcu保护的链表操作static inline void list_add_tail_rcu( struct list_head *new, struct list_head *head );static inline void list_del_rcu( struct list_head *entry );static inline void list_replace_rcu( struct list_head *old, struct list_head *new );list_for_each_rcu( pos, head ); 宏,链表操作list_for_each_safe_rcu( pos, n, head );list_for_each_entry_rcu( pos, head, member );static inline void list_del_rcu( struct hlist_node *n );static inline void hlist_add_head_rcu( struct hlist_node *n, struct hlist_head *h ); 哈希链表hlist_for_each_rcu( pos, head );hlist_for_each_entry_rcu( tpos, pos, head, member );信号量:semaphore用于同步只有获的信号量的进程才能执行临界区代码,当获取不到信号量时,进程进入睡眠.struct semaphore sem;void sema_init( struct semaphore *sem, int val );void init_MUTEX( struct seamphore *sem ); DECLARE_MUTEX( name ); //宏void init_MUTEX_LOCKED( struct semaphore *sem ); DECLARE_MUTEX_LOCKED( name );void down( struct semaphore *sem ); 会睡眠,不能在中断上下文使用.int down_interruptible( struct semaphore *sem ); 睡眠可被signal打断.int down_trylock( struct semaphore *sem ); 试锁定.void up( struct semaphore *sem );//DECLARE_MUTEX( mutex_sem );down( &mount_sem );…critical section…up( &mount_sem );读写信号量:rw_semaphore rw_sem;init_rwsem( &rw_sem );down_read( &rw_sem );…up_read( &rw_sem );down_write( &rw_sem );…up_write( &rw_sem );完成量:linux还提供一种比信号量更好的机制completion,用于同步struct completion my_completion;init_completion( &my_completion ); DECLARE_COMPLETION( my_completion );void wait_for_completion( struct completion *c );void complete( struct competion *c ); 唤醒wait_for_completion等待void complete_all( struct completion *c );互斥体:linux内核中还存在mutexstruct mutex my_mutex;mutex_init( &my_mutex );mutex_lock( &my_mutex );…mutex_unlock( &my_mutex );自旋锁和信号量的使用:信号量和互斥体属于不同层次的互斥,前者的实现依赖于后者,为保证信号量操作的原子性,SMP中要用自旋锁互斥.1,临界区小时用自旋锁.2,信号量可以阻塞.3,中断中只能用自旋锁.二,阻塞和轮询等待队列当不能获得资源时,进程可以选择阻塞或非阻塞方式,linux驱动程序中,应用等待队列(wait queue)来唤醒阻塞的进程.wait_queue_head_t my_queue; 定义等待队列列头init_waitqueue_head( &my_queue ) DECLARE_WAIT_QUEUE_HEAD( name )初始化等待队列列头DECLARE_WAITQUEUE( name, tsk ) 定义等待队列void fastcall add_wait_queue( wait_queue_head_t *q, wait_queue_t *wait ) 添加等待队列void fastcall remove_wait_queue( wait_queue_head_t *q, wait_queue_t *wait )删除等待队列wait_event( queue, condition ) 等待事件wait_event_interruptible( queue, condition )wait_event_timeout( queue, condition, timeout )wait_event_interruptible_timeout( queue, condition, timeout )void wake_up( wait_queue_head_t, *queue ) 唤醒队列void wake_up_interruptible( wait_queue_head_t *queue )sleep_on( wait_queue_head_t *q ) 在等待队列上睡眠interruptible_sleep_on( wait_queue_head_t *q )等待队列实例:struct globalfifo_dev{ … wait_queue_head_t r_wait; wait_queue_head_t w_wait;}int globalfifo_init( void ){ … init_waitqueue_head( &globalfifo_devp->r_wait ); init_waitqueue_head( &globalfifo_devp->w_wait );}static ssize_t globalfifo_read( struct file *filp, char __user *buf, size_t count, ioff_t *ppos ){ DECLARE_WAITQUEU( wait, current ); add_wait_queue( &dev->r_wait, &wait ); 在成功读出后唤醒写队列. wake_up_interruptible( &dev->w_wait ); out2:remove_wait_queue( &dev->wait, &wait );}static ssize_t globalfifo_write( struct file *filp, const char __user *buf, size_t count, loff_t *ppos ){ DECLARE_WAITQUEUE( wait, currnt ); add_wait_queue( &dev->w_wait, &wait ); 如果写满dev->current_len==GLOBALFIFO_SIZE __set_current_state( TASK_INTERRUPTIBLE ); schedule(); if ( signal_pending( current ) ) { ret = -ERESTARTSYS; goto out2; } 不满则写 wake_up_interruptible( &dev->r_wait );}轮询操作:非阻塞方式I/O,poll操作本身不被阻塞,但会引起文件描述集中的至少一个文件的可访问和超时.在用户态,使用BSD UNIT select.int select( int numfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout );在内核,使用System V poll.unsigned int (*poll)( struct file *filp, struct poll_table *wait );void poll_wait( struct file *filp, wait_queue_heat_t *queue, poll_table *wait ); 注册poll_table到等待队列模板:static unsigned int xxx_poll( struct file *filp, poll_table *wait ){ unsigned int mask = 0; struct xxx_dev *dev = filp -> private_data; 获得设备结构体指针 … poll_wait( filp, &dev->r_wait, wait ); 加读等待队列头 poll_wait( filp, &dev->w_wait, wait ); 加写等待队列头 if( … ) { mask |= POLLOUT | POLLWRNORM; 标示数据可获得 } … return mask;}三,异步通知和异步I/O异步通知是设备驱动程序模拟一个中断通知应用程序,进行读写.属于异步通知的概念.阻塞IO,非阻塞IO和异步通知本身不存在优劣,应该根据不同的场景来选择.SIGNAL:64种,32个已定义,32个自定义.除了SIGSTOP和SIGKILL外,进程可以忽略或捕获其它的全部信号.捕获的意思是当信号到达进程时有相应的代码处理它.如果进程忽略这个信号,内核将采用默认行为处理.用户程序处理SIGNAL:void ( *signal( int signum, void (*handle)(int)) )( int );int sigaction( int signum, const struct sigaction *act, struct sigaction *oldact );实例:main(){ int oflags; signal( SIGIO, input_handler ); 连接本进程的SIGIO到input_handler. fcntl( STDIN_FILENO, F_SETOWN, getpid() ); 通过F_SETOWN命令连接STDIN_FILENO设备文件到本进程. oflags = fcntl( STDIN_FILENO, F_GETFL ); fcntl( STDIN_FILENO, F_SETFL, oflags|FASYNC); 设置设备文件支持FASYNC while(1);}内核和驱动发送SIGNAL:要使驱动程序支持异步通知机制1,支持F_SETOWN命令,内核完成filp->f_owner.对应与用户fcntl( fd, F_SETOWN, getpid() ).2,支持F_SETFL命令,每当FASYNC标志改变时,驱动中的fasync()将执行.因次驱动要有fasync()函数.对应于fcntl(fd,F_GETFL).3,当资源可用时,要有kill_fasync()函数发出signal.对应于signal().设备驱动中异步通知编程,主要用到一个数据结构和两个函数.struct fasync_structint fasync_helper( int fd, struct file *filp, int mode, struct fasync_struct **fa );viod kill_fasync( struct fasync_struct **fa, int sig, int band );模板:1,将fasync_struct结构体指针放入设备结构体中struct xxx_dev{ struct cdev cdev; … struct fasync_struct *async_queue; 异步结构体}2,支持fasync操作static int xxx_fasync( int fd, struct file *filp, int mode ){ struct xxx_dev *dev = filp->private_data; return fasync_helper( fd, filp, mode, &dev->async_queue );}3,写时发送SIGNALstatic ssize_t xxx_write( struct file *filp, const char __user *buf, size_t count, loff_t *f_pos ){ struct xxx_dev *dev = filp -> private_data; … if ( dev -> async_queue ) kill_fasync( &dev->async_queue, SIGIO, POLL_IN ); …}4,在文件关闭时,将文件从异步通知列表删除static int xxx_release( struct inode *inode, struct file *filp ){ struct xxx_dev *dev = filp->private_data; xxx_fasync( -1, filp, 0); 将文件从异步通知列表中删除. … return 0;}POSIX异步IO,AIO在2.6中引进内核,2.4中也有补丁.它不同于同步IO的阻塞等待,而是同时发起多个IO,每个IO有唯一的上下文.aiocb(AIO Control Block),它包括buffer,当IO完成时,它用来标识完成的IO操作.AIO和SELECT相似,Select对通知事件阻塞,AIO对IO调用阻塞.AIO的API:被GNU C lib包含,符合POSIX.1b的要求.int aio_read( struct aiocb *aiocbp ) 对一个文件描述符(文件,socket,pipe..)进行异步读操作int aio_write( struct aiocb *aiocbp )int aio_error( struct aiocb *aiocbp ) EINPROGRESS,ECANCELLED,-1ssize_t aio_return( struct aiocb *aiocbp ) 异步操作不能直接得到返回值,要等到aio_error!=EINPROGRESS.再用aio_return返回值.int aio_suspend( const struct aiocb *const cblist[], int n, const struct timespec *timeout ); 阻塞进程,直到异步IO完成返回结果,此时会产生一个信号,或发生其他超时操作.int aio_cancel( int fd, struct aiocb *aiocbp );ret:AIO_CANCELLED,AIO_NOTCANCELED,AIO_ALLDONE,然后可以用aio_error来验证. struct aiocb *cblist[MAX_LIST]; bzero( (char*)cblist, sizeof(cblist) ); cblist[0] = &my_aiocb; ret = aio_read( &my_aiocb ); ret = aio_suspend( cblist, MAX_LIST, NULL ); ret = aio_error( &my_aiocb );int lio_listio( int mode, struct aiocb *list[], int nent, struct sigevent *sig ); 在一个上下文中启动大量异步IO操作. struct aiocb aiocb1, aiocb2; struct aiocb *list[MAX_LIST]; …初始化第一个aiocb aiocb1.aio_fildes = fd; aiocb1.aio_buf = malloc( BUFSIZE+1 ); aiocb1.aio_nbytes = BUFSIZE; aiocb1.aio_offset = next_offset; aiocb1.aio_lio_opcode = LIO_READ; …初试化多个aiocb bzero( (char*)list, sizeof(list) ); list[0] = &aiocb1; list[1] = &aiocb2; … ret = lio_listio( LIO_WAIT, list, MAX_LIST, NULL );使用signal作为通知来返回AIO结果: void setup_io(…) 设置异步IO请求 { int fd; struct sigaction sig_act; struct aiocb my_aiocb; … 设置信号处理函数 sigemptyset( &sig_act.sa_mask ); sig_act.sa_flags = SA_SIGINFO; sig_act.sa_sigaction = aio_completion_handle; 设置AIO请求 bzero( (char*)&my_aiocb, sizeof( struct aiocb ) ); my_aiocb.aio_fildes = fd; my_aiocb.aio_buf = malloc( BUF_SIZE + 1 ); my_aiocb.nbytes = BUF_SIZE; my_aiocb.aio_offset = next_offset; 连接AIO请求和信号处理函数 my_aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL; my_aiocb.aio_sigevent.sigev_signo = SIGIO; my_aiocb.aio_sigevent.sigev_value.sival_ptr = &my_aiocb; 将信号与信号处理函数绑定 ret = sigaction( SIGIO, &sig_act, NULL); … ret = aio_read( &my_aiocb); 开始异步读 } 信号处理函数 void aio_completion_handler( int signo, siginfo_t *info, void *context ) { struct aiocb *req; if ( info->si_signo == SIGIO ) { req = (struct aiocb*)info->si_value.sival_ptr; 获得aiocb if ( aio_error(req) == 0 ) { ret = aio_return( req ); } } return; }使用回调函数(Callback)作为通知来返回AIO结果:/proc/sys/fs/aio-nr 文件,系统范围异步IO请求的数目/proc/sys/fs/aio-max-nr 文件,允许并发请求的最大数目 void setup_io(…) 设置异步IO请求 { int fd; struct aiocb my_aiocb; … 设置AIO请求 连接AIO请求和线程回调函数 my_aiocb.aio_sigevent.sigev_notify = SIGEV_THREAD; my_aiocb.aio_sigevent.notify_function = aio_completion_handler; my_aiocb.aio_sigevent.notify_attributes = NULL; my_aiocb.aio_sigevent.sigev_value.sival_ptr = &my_aiocb; … ret = aio_read( &my_aiocb); 开始异步读 }AIO与设备驱动:aiocb结构体中,ki_filp是file的指针.is_sync_kiocb()可以判断是否同步IO请求.块设备和网络设备驱动本身就是异步的,AIO用在字符型设备驱动中,用于改善异步IO的性能.例如磁带机.在file_operations中,用到3个AIO函数.ssize_t (*aio_read)(struct kiocb *iocb, char *buffer, szie_t count, loff_t offset);ssize_t (*aio_write)(struct kiocb *iocb, const char *buffer, size_t count, loff_t offset);int (*aio_fsync)(struct kiocb *iocb, int datasync);异步读static ssize_t xxx_aio_read( struct kiocb *iocb, char *buf, size_t count, loff_t pos ){ return xxx_defer_op( 0, iocb, buf, count, pos );}异步写static ssize_t xxx_aio_write( struct kiocb *iocb, const char *buf, size_t count, loff_t pos ){ return xxx_defer_op( 1, iocb, (char*)buf, count, pos );}初始化异步IOstatic int xxx_defer_op( int write, struct kiocb *iocb, char *buf, size_t count, loff_t pos ){ struct async_work *async_wk; int_result; if ( write ) result = xxx_write( iocb-> ki_filp, buf, count, &pos ); else result = xxx_read( iocb->ki_filp, buf, count, &pos ); if ( is_sync_kiocb(iocb) ) return result; 否则是异步IO: async_wk = kmalloc( sizeof(*async_wk), GFP_KERNEL ); async_work(异步工作)结构体将操作延后执行 if (async_wk == NULL) return result; 调度延迟的工作 async_wk->iocb = iocb; async_wk->result = result; INIT_WORK( &async_wk->work, xxx_do_deferred_op, async_wk ); schedule_delayed_work( &async_wk->work, HZ/100 ); 调度async_work执行 return – EIOCBQUEUED;}static void xxx_do_deferred_op( void *p ){ struct async_work *async_wk = (struct async_work*)p; aio_complete( async_wk->iocb, async_wk->result, 0 ); 通知内核驱动程序完成了操作 kfree( async_wk );}struct async_work{ struct kiocb *iocb; int result; 执行结果 struct work_struct work; 工作结构体}四,中断和时钟中断编程:内部中断,外部终端;可屏蔽中断,不屏蔽中断;向量中断,非向量中断(软件根据寄存器分地址).linux将中断分成顶半部,底半部.顶半部主要读取寄存器中的中断状态并清除中断标志.再登记中断(将底半部处理程序挂到该设备的底半部执行队列.)Vxworks中提供中断上下文和进程上下文相结合的机制.通过netJobAdd()将网络包的接收和上传交给tnetask任务去执行./proc/interrupts 文件中有中断状态的描述.int request_irq( unsigned int irq, 硬件中断号 void (*handler)(int irq, void *dev_id, struct pt_reqs *regs), 中断处理函数 unsigned long irqflags, 中断处理的属性,SA_INTERRUPT,SA_SHIRQ const char *devname, void *dev_id );void free_irq( unsigned int irq, void *dev_id );void enable_irq( int irq );void disable_irq_nosync( int irq ); 立即返回void disable_irq( int irq ); 等当前中断处理完后返回void local_irq_save( unsigned long flags ); 屏蔽本CPU的中断并保存进flagsvoid local_irq_restore( unsigned long flags );void local_irq_disable( void );void local_irq_enable( void );底半部机制:1,tasklet模板:DECLARE_TASKLET( xxx_tasklet, xxx_do_tasklet, 0 );xxx_do_tasklet( unsigned long ){…}irqreturn_t xxx_interrupt( int irq, void *dev_id, struct pt_reqs *reqs ){ … tasklet_schedule( &xxx_tasklet ); …}int __init xxx_init( void ){ … result = request_irq( xxx_irq, xxx_interrupt, SA_INTERRUPT, "xxx", NULL ); …}void __exit xxx_exit( void ){ … free_irq( xxx_irq, xxx_interrupt ); …}2,工作队列模板:struct work_struct xxx_wq;void xxx_do_work( insigned long ){…}irqreturn_t xxx_interrupt( int irq, void *dev_id, struct pt_regs *regs ){ … schedule_work( &xxx_wq ); …}int xxx_int( void ){ … result = request_irq( xxx_irq, xxx_interrupt, SA_INTERRUPT, "xxx", NULL ); … INIT_WORK( &xxx_wq, (void)(*)(void *)xxx_do_work, NULL ); …}void xxx_exit( void ){ … free( xxx_irq, xxx_interrupt ); …}3,软中断底半部硬中断是外部设备对CPU的中断;软中断是硬中断处理程序对内核的中断;信号是内核对某个进程的中断;tasklet基于软中断实现;softirq结构体包括软中断处理函数及其参数.open_sofirq注册软中断对应的处理函数,raise_softirq触发一个软中断.共享中断linux2.6支持共享中断,单跟硬件中断线上支持多个设备.irqreturn_t xxx_interrupt( int irq, void *dev_id, struct pt_regs *regs ){ … int status = read_int_status(); if ( !is_myint(dev_id_status) ) 所有共享的中断处理程序都会执行,要迅速判断执行 { return IRQ_NONE; } … return IRQ_HANDLED;}init xxx_init( void ){ … result = request_irq( sh_irq, xxx_interrupt, SA_SHIRQ, "xxx", xxx_dev ); 所有中断都要设为SA_SHIRQ,传入xxx_dev. …}内核定时器使用1,timer_list 结构 struct timer_list{ struct list_head entry; 定时器列表 unsigned long expires; 定时器到期时间jiffies void (*function)(unsigned long); 定时器处理函数 unsigned long data; 定时器处理函数参数 struct timer_base_s *base; };struct timer_list my_list;2,void init_timer( struct timer_list *timer ); TIMER_INITIALIZER( _function, _expires, _data ); DEFINE_TIMER( _name, _function, _expires, _data ); static inline void setup_timer( struct timer_list *timer, void(*function)(unsigned long), unsigned long data );3,void add_timer( struct timer_list *timer ); 注册内核定时器4,int del_timer( struct timer_list *timer );5,int mod_timer( struct timer_list *timer, unsigned long expires );模板:struct xxx_dev{ struct cdev cdev; … timer_list xxx_timer; 在设备结构体中添加定时器}xxx_fcntl(…){ struct xxx_dev *dev = filp->private_data; … init_timer( &dev->xxx_timer ); dev->xxx_timer.function = &xxx_do_timer; dev->xxx_timer.data = (unsigned long)dev; dev->xxx_timer.expires = jiffies + delay; 初始化定时器结构体 … add_timer( &dev->xxx_timer ); …}xxx_func2(…){ … del_timer( &dev->xxx_timer); …}static void xxx_do_timer( unsigned long arg ){ struct xxx_device *dev = (struct xxx_device *)(arg); … dev->xxx_timer.expire = jiffies + delay; 重设时间 add_timer( &dev->xxx_timer ); 再次注册 …}内核延时短延时void ndelay( unsigned long nsecs ); CPU等待void udelay( unsigned long usecs );void mdelay( unsigned long msecs );void msleep( unsigned int millisecs ); 睡眠相应的时间unsigned long msleep_interruptible( unsigned int millisecs );void ssleep( unsigned int second );长延时unsigned long delay = jiffies + 100; jiffies+2*HZwhile ( time_before( jiffies, delay ) ); time_after(a,b),用来比较时间睡着延时void msleep( unsigned int msecs ){ unsigned long timeout = msecs_to_jiffies( msecs ) + 1; while( timeout ) timeout = schedule_timeout_uninterruptible( timeout ); 调度到睡眠状态}unsigned long msleep_interruptible( unsigned int msecs ){ unsigned long timeout = msecs_to_jiffies(msecs) + 1; while ( timeout && !signal_pending(current) ) timeout = schedule_timeout_interruptible( timeout ); return jiffies_to_msecs( timeout );}或将当前进程添加到等待队列:sleep_on_timeout( wait_queue_head_t *q, unsigned long timeout );interruptible_sleep_on_timeout( wait_queue_head_t *q, unsigned long timeout );五,内存管理和I/O硬件基础内存空间和I/O空间X86中有I/O空间,ARM,MIPS上没有,只有内存空间,I/O空间可选. IN 累加器,{端口号|DX}; OUT {端口号|DX},累加器typedef void (*lpunction)();lpFunction lpReset = (lpFunction)0xF000FFF0;lpReset(); 相当于软启动MMU内存管理单元,用来实现虚拟和物理地址间的转换,内存访问权限保护,Cache缓存控制.它包括:TLB:Translation Lookaside Buffer,其中C位用于控制对应地址高速缓存,B位写缓存,访问权限和域位控制读写允许,不允许则发送一个存储器异常信号.TTW:Translation Table walk,如TLB中没有找到虚拟地址入口,则通过TTW遍历获取 地址转换信息和权限,放入到TLB中的空位或替换一个入口. 当访问权限允许时,对物理地址的访问发生在Cache或内存.ARM7TDMI中的S3C4B0X,Dragonball,ColdFire,Hitachi H8/300不带MMU,使用uclinux.S3C2410,2440带MMU.vivi中会建立一个一级页表.void mem_map_init(void){ #ifdef CONFIG_S3C2410_ANND_BOOT mem_map_nand_boot(); 最终会调用mem_mapping_linear,建立一级页表. #else mem_map_nor(); #endif cache_clean_invalidate(); 清空cache,使cache无效 tlb_invalidate(); 使快表tlb无效}static inline void mem_mapping_linear(void){ unsigned long pageoffset, sectionNumber; putstr( "MMU table base address = 0x%", (unsigned long)mmu_tlb_base ); //使用ARM920T内存映射的Section模式,分成4096*1MB,mmu_table转换表大小16KB. //内存映射模式包括:fault(无映射),Coarse Page(粗页表),Section(段),Fine Page(细页表). for( sectionNumber = 0; sectionNumber < 4096; sectionNumber++ ) { pageoffset = ( sectionNumber << 20 ); *( mmu_table_base + ( pageoffset>>20 ) ) = pageoffset | MMU_SECDESC; } //使SDRAM区域可缓存,0x30000000-0x33ffffff. DRAM_BASE = 0x30000000, DRAM_SIZE=64M. for ( pageoffset = DRAM_BASE; pageoffset < (DRAM_BASE+DRAM_SIZE); pageoffset += SZ_1M ) { *(mmu_table_base + ( pageoffset >> 20 )) = pageoffset | MMU_SECDESC | MMU_CACHEABLE; }}linux内存管理用户空间0到3GB 0xC0000000,内核空间3GB到4GB.都有相应的页表.内核1GB空间分为3G 物理内存映射区; MAX 896MB 隔离带; 虚拟内存分配区; VMALLOC_START~VMALLOC_END,vmalloc() 隔离带; 高端页面映射区; PKMAP_BASE 专用页面映射区; FIXADDR_START~FIXADDR_TOP4G 保留区;对大于4GB的物理内存,使用CPU的扩展分页(PAE)模式,64位页目录项.这需要CPU支持(Intel pentium Pro).用户空间申请和释放内存char *p = malloc(…);free( p );内核空间申请和释放内存void *kmalloc( size_t size, int flags );会阻塞,所以不能用于 中断上下文,自旋锁,中断处理函数,tasklet和内核定时器.这些用GFP_ATOMIC GFP_KERNEL: 在内核空间的进程中申请内存.底层依靠__get_free_pages()实现. GFP_USER: 为用户空间页分配内存. GFP_HIGHUSER: 为用户空间页分配内存,但是从高端内存分配. GFP_NOIO: 不允许任何I/O初始化. GFP_NOFS: 不允许任何文件系统调用. __GFP_DMA: 要求分配在能够DMA的内存区. __GFP_HIGHMEM:分配的内存可以位于高端内存. __GFP_COLD: 请求一个较长时间不访问的页. __GFP_NOWARN: 当一个分配无法满足时,阻止内核发出警告. __GFP_HIGH: 高优先级请求,要求分配保留页. __GFP_REPEAT: 分配失败则重复尝试. __GFP_NOFAIL: 标示只能成功. __GFP_NORETRY:申请不到,则立即放弃.__get_free_pages( unsigned int flags, unsigned int order ); order为0到11,1页到2048页.2的oreder次方.__get_free_page( int unsigned flag );get_zeroed_page( unsigned int flags );struct page *alloc_pages( int gfp_mask, unsigned long order ); 返回 分配的页描述符而非首地址.void free_page( unsigned long addr );void free_pages( unsigned long addr, insigned long order );void *vmalloc( unsigned long size ); 会新建页表,开销大.用于软件中较大的顺序缓冲区.小的页分配用kmalloc.void vfree( void *addr ); 内部调用kmalloc,GFP_KERNEL. 例如create_module()会用到. slab机制操作内存:struct kmem_cache *kmem_cache_create( const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void*, struct kmem_cache *, unsigned long), void (*dtor)(void*, struct kmem_cache *, unsigned long) ); 创建一个slab缓存,可以驻留任意数目大小一样的后备缓存.size是分配的数据结构的大小,flags是如何分配的位掩码. SLAB_NO_REAP :内存紧缺也不自动收缩这块缓存. SLAB_HWCACHE_ALIGN :数据对象对齐到一个缓存行. SLAB_CACHE_DMA :数据对象在DMA内存区分配.void *kmem_cache_alloc( struct kmem_cache *cachep, gfp_t flags ); 在kmem_cache_create()创建的slab后备缓冲中分配一块并返回首地址指针.void kmem_cache_free( struct kmem_cache *cachep, gfp_t flags );int kmem_cache_destory( struct kmem_cache *cachep ); static lmem_cache_t *xxx_cachep; xxx_cahep = kmem_cache_create( "xxx", sizeof(struct xxx), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); struct xxx *ctx; ctx = kmem_cache_alloc( xxx_cachep, GTP_KERNEL ); … kmem_cache_free( xxx_cachep, ctx ); kmem_cache_destory( xxx_cachep ); 内存池机制操作内存:创建内存池;mempool_t *mempool_create( int min_nr, 需要预分配对象的数目. mempool_alloc_t *alloc_fn, 指向内存池机制提供的标准对象的分配函数指针. mempool_alloc_t *free_fn, 指向内存池机制提供的标准对象的回收函数指针. void *pool_data 分配和回收函数用到的指针. ); alloc_fn的原型是: typedef void *(mempool_alloc_t)( int gfp_mask, void *pool_data ); free_fn的原型是: typedef void (mempool_alloc_t)( void *element, void *pool_data );void *mempool_alloc( mempool_t *pool, int gfp_mask ); 分配对象void mempool_free( void *element, mempool_t *pool ); 回收对象void mempool_destory( mempool_t *pool ); 回收内存池虚拟地址与物理地址的转换#define __pa(x) ( (unsigned long)(x) – PAGE_OFFSET )extern inline unsigned long virt_to_phys( volatile void *address ){ return __pa( address );}#define __va(x) ( (void *)(unsigned long)(x) + PAGE_OFFSET ) PAGE_OFFSET=3GBextern inline void *phys_to_virt( unsigned long address ){ return __va( address );}上述方法只适用与常规内存,高端内存的虚拟地址和物理地址不适用这样简单的换算关系.设备I/O端口和I/O内存的访问设备通常有一组寄存器,包括设备读,写和状态.既控制寄存器,数据寄存器和状态寄存器.当这些寄存器位于I/O空间,就叫IO端口.当这些寄存器位于内存空间,就叫IO内存.木已成舟便要顺其自然

linux device driver

相关文章:

你感兴趣的文章:

标签云: