static int
exec_mach_imgact(struct image_params *imgp)
{
struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
proc_t p = vfs_context_proc(imgp->ip_vfs_context);
int error = 0;
task_t task;
task_t new_task = NULL; /* protected by vfexec */
thread_t thread;
struct uthread *uthread;
vm_map_t old_map = VM_MAP_NULL;
vm_map_t map;
load_return_t lret;
load_result_t load_result;
struct _posix_spawnattr *psa = NULL;
int spawn = (imgp->ip_flags & IMGPF_SPAWN);
int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
int p_name_len;
/*
* make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
* is a reserved field on the end, so for the most part, we can
* treat them as if they were identical. Reverse-endian Mach-O
* binaries are recognized but not compatible.
*/
//检测header中的magic来确定是否符合Mach-O的条件
if ((mach_header->magic == MH_CIGAM) ||
(mach_header->magic == MH_CIGAM_64)) {
error = EBADARCH;
goto bad;
}
if ((mach_header->magic != MH_MAGIC) &&
(mach_header->magic != MH_MAGIC_64)) {
error = -1;
goto bad;
}
//检测header中的文件类型且必须为可执行文件
if (mach_header->filetype != MH_EXECUTE) {
error = -1;
goto bad;
}
//检测Mach-O文件的CPU种类与版本
if (imgp->ip_origcputype != 0) {
/* Fat header previously had an idea about this thin file */
if (imgp->ip_origcputype != mach_header->cputype ||
imgp->ip_origcpusubtype != mach_header->cpusubtype) {
error = EBADARCH;
goto bad;
}
} else {
imgp->ip_origcputype = mach_header->cputype;
imgp->ip_origcpusubtype = mach_header->cpusubtype;
}
task = current_task();
thread = current_thread();
uthread = get_bsdthread_info(thread);
if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
imgp->ip_flags |= IMGPF_IS_64BIT;
/* If posix_spawn binprefs exist, respect those prefs. */
psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
if (psa != NULL && psa->psa_binprefs[0] != 0) {
int pr = 0;
for (pr = 0; pr < NBINPREFS; pr++) {
cpu_type_t pref = psa->psa_binprefs[pr];
if (pref == 0) {
/* No suitable arch in the pref list */
error = EBADARCH;
goto bad;
}
if (pref == CPU_TYPE_ANY) {
/* Jump to regular grading */
goto grade;
}
if (pref == imgp->ip_origcputype) {
/* We have a match! */
goto grade;
}
}
error = EBADARCH;
goto bad;
}
grade:
if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) {
error = EBADARCH;
goto bad;
}
/* Copy in arguments/environment from the old process */
error = exec_extract_strings(imgp);
if (error)
goto bad;
error = exec_add_apple_strings(imgp);
if (error)
goto bad;
AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
imgp->ip_endargv - imgp->ip_startargv);
AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
imgp->ip_endenvv - imgp->ip_endargv);
/*
* We are being called to activate an image subsequent to a vfork()
* operation; in this case, we know that our task, thread, and
* uthread are actually those of our parent, and our proc, which we
* obtained indirectly from the image_params vfs_context_t, is the
* new child process.
*/
if (vfexec || spawn) {
if (vfexec) {
imgp->ip_new_thread = fork_create_child(task, NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
if (imgp->ip_new_thread == NULL) {
error = ENOMEM;
goto bad;
}
}
/* reset local idea of thread, uthread, task */
thread = imgp->ip_new_thread;
uthread = get_bsdthread_info(thread);
task = new_task = get_threadtask(thread);
map = get_task_map(task);
} else {
map = VM_MAP_NULL;
}
/*
* We set these flags here; this is OK, since if we fail after
* this point, we have already destroyed the parent process anyway.
*/
task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0);
if (imgp->ip_flags & IMGPF_IS_64BIT) {
task_set_64bit(task, TRUE);
OSBitOrAtomic(P_LP64, &p->p_flag);
} else {
task_set_64bit(task, FALSE);
OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
}
/*
* Load the Mach-O file.
*
* NOTE: An error after this point indicates we have potentially
* destroyed or overwritten some process state while attempting an
* execve() following a vfork(), which is an unrecoverable condition.
* We send the new process an immediate SIGKILL to avoid it executing
* any instructions in the mutated address space. For true spawns,
* this is not the case, and "too late" is still not too late to
* return an error code to the parent process.
*/
/*
* Actually load the image file we previously decided to load.
*/
//把Mach-O文件映射到内存中并调用load_machfile函数
lret = load_machfile(imgp, mach_header, thread, &map, &load_result);
if (lret != LOAD_SUCCESS) {
error = load_return_to_errno(lret);
goto badtoolate;
}
proc_lock(p);
p->p_cputype = imgp->ip_origcputype;
p->p_cpusubtype = imgp->ip_origcpusubtype;
proc_unlock(p);
vm_map_set_user_wire_limit(map, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
/*
* Set code-signing flags if this binary is signed, or if parent has
* requested them on exec.
*/
if (load_result.csflags & CS_VALID) {
imgp->ip_csflags |= load_result.csflags &
(CS_VALID|
CS_HARD|CS_KILL|CS_RESTRICT|CS_ENFORCEMENT|CS_REQUIRE_LV|CS_DYLD_PLATFORM|
CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT);
} else {
imgp->ip_csflags &= ~CS_VALID;
}
if (p->p_csflags & CS_EXEC_SET_HARD)
imgp->ip_csflags |= CS_HARD;
if (p->p_csflags & CS_EXEC_SET_KILL)
imgp->ip_csflags |= CS_KILL;
if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT)
imgp->ip_csflags |= CS_ENFORCEMENT;
if (p->p_csflags & CS_EXEC_SET_INSTALLER)
imgp->ip_csflags |= CS_INSTALLER;
/*
* Set up the system reserved areas in the new address space.
*/
vm_map_exec(map, task, (void *)p->p_fd->fd_rdir, cpu_type());
/*
* Close file descriptors which specify close-on-exec.
*/
fdexec(p, psa != NULL ? psa->psa_flags : 0);
/*
* deal with set[ug]id.
*/
error = exec_handle_sugid(imgp);
if (error) {
if (spawn || !vfexec) {
vm_map_deallocate(map);
}
goto badtoolate;
}
/*
* Commit to new map.
*
* Swap the new map for the old, which consumes our new map reference but
* each leaves us responsible for the old_map reference. That lets us get
* off the pmap associated with it, and then we can release it.
*/
if (!vfexec) {
old_map = swap_task_map(task, thread, map, !spawn);
vm_map_deallocate(old_map);
}
lret = activate_thread_state(thread, &load_result);
if (lret != KERN_SUCCESS) {
goto badtoolate;
}
/*
* deal with voucher on exec-calling thread.
*/
if (imgp->ip_new_thread == NULL)
thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL);
/* Make sure we won't interrupt ourself signalling a partial process */
if (!vfexec && !spawn && (p->p_lflag & P_LTRACED))
psignal(p, SIGTRAP);
if (load_result.unixproc &&
create_unix_stack(get_task_map(task),
&load_result,
p) != KERN_SUCCESS) {
error = load_return_to_errno(LOAD_NOSPACE);
goto badtoolate;
}
if (vfexec || spawn) {
old_map = vm_map_switch(get_task_map(task));
}
if (load_result.unixproc) {
user_addr_t ap;
/*
* Copy the strings area out into the new process address
* space.
*/
ap = p->user_stack;
error = exec_copyout_strings(imgp, &ap);
if (error) {
if (vfexec || spawn)
vm_map_switch(old_map);
goto badtoolate;
}
/* Set the stack */
thread_setuserstack(thread, ap);
}
if (load_result.dynlinker) {
uint64_t ap;
int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
/* Adjust the stack */
ap = thread_adjuserstack(thread, -new_ptr_size);
error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
if (error) {
if (vfexec || spawn)
vm_map_switch(old_map);
goto badtoolate;
}
//由dyld接手接下来的工作
task_set_dyld_info(task, load_result.all_image_info_addr,
load_result.all_image_info_size);
}
/* Avoid immediate VM faults back into kernel */
exec_prefault_data(p, imgp, &load_result);
if (vfexec || spawn) {
vm_map_switch(old_map);
}
/* Stop profiling */
stopprofclock(p);
/*
* Reset signal state.
*/
execsigs(p, thread);
/*
* need to cancel async IO requests that can be cancelled and wait for those
* already active. MAY BLOCK!
*/
_aio_exec( p );
#if SYSV_SHM
/* FIXME: Till vmspace inherit is fixed: */
if (!vfexec && p->vm_shm)
shmexec(p);
#endif
#if SYSV_SEM
/* Clean up the semaphores */
semexit(p);
#endif
/*
* Remember file name for accounting.
*/
p->p_acflag &= ~AFORK;
/*
* Set p->p_comm and p->p_name to the name passed to exec
*/
p_name_len = sizeof(p->p_name) - 1;
if(imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len)
imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len;
bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name,
(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
pal_dbg_set_task_name( p->task );
#if DEVELOPMENT || DEBUG
/*
* Update the pid an proc name for importance base if any
*/
task_importance_update_owner_info(p->task);
#endif
memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
// <rdar://6598155> dtrace code cleanup needed
#if CONFIG_DTRACE
/*
* Invalidate any predicate evaluation already cached for this thread by DTrace.
* That's because we've just stored to p_comm and DTrace refers to that when it
* evaluates the "execname" special variable. uid and gid may have changed as well.
*/
dtrace_set_thread_predcache(current_thread(), 0);
/*
* Free any outstanding lazy dof entries. It is imperative we
* always call dtrace_lazy_dofs_destroy, rather than null check
* and call if !NULL. If we NULL test, during lazy dof faulting
* we can race with the faulting code and proceed from here to
* beyond the helpers cleanup. The lazy dof faulting will then
* install new helpers which no longer belong to this process!
*/
dtrace_lazy_dofs_destroy(p);
/*
* Clean up any DTrace helpers for the process.
*/
if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) {
(*dtrace_helpers_cleanup)(p);
}
/*
* Cleanup the DTrace provider associated with this process.
*/
proc_lock(p);
if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) {
(*dtrace_fasttrap_exec_ptr)(p);
}
proc_unlock(p);
#endif
if (kdebug_enable) {
long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
/*
* Collect the pathname for tracing
*/
kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
if (vfexec || spawn) {
KERNEL_DEBUG_CONSTANT1(TRACE_DATA_EXEC | DBG_FUNC_NONE,
p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread));
KERNEL_DEBUG_CONSTANT1(TRACE_STRING_EXEC | DBG_FUNC_NONE,
dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread));
} else {
KERNEL_DEBUG_CONSTANT(TRACE_DATA_EXEC | DBG_FUNC_NONE,
p->p_pid ,0,0,0,0);
KERNEL_DEBUG_CONSTANT(TRACE_STRING_EXEC | DBG_FUNC_NONE,
dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
}
}
/*
* If posix_spawned with the START_SUSPENDED flag, stop the
* process before it runs.
*/
if (imgp->ip_px_sa != NULL) {
psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
proc_lock(p);
p->p_stat = SSTOP;
proc_unlock(p);
(void) task_suspend_internal(p->task);
}
}
/*
* mark as execed, wakeup the process that vforked (if any) and tell
* it that it now has its own resources back
*/
OSBitOrAtomic(P_EXEC, &p->p_flag);
proc_resetregister(p);
if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
proc_lock(p);
p->p_lflag &= ~P_LPPWAIT;
proc_unlock(p);
wakeup((caddr_t)p->p_pptr);
}
/*
* Pay for our earlier safety; deliver the delayed signals from
* the incomplete vfexec process now that it's complete.
*/
if (vfexec && (p->p_lflag & P_LTRACED)) {
psignal_vfork(p, new_task, thread, SIGTRAP);
}
goto done;
badtoolate:
/* Don't allow child process to execute any instructions */
if (!spawn) {
if (vfexec) {
psignal_vfork(p, new_task, thread, SIGKILL);
} else {
psignal(p, SIGKILL);
}
/* We can't stop this system call at this point, so just pretend we succeeded */
error = 0;
}
done:
if (!spawn) {
/* notify only if it has not failed due to FP Key error */
if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
proc_knote(p, NOTE_EXEC);
}
/* Drop extra references for cases where we don't expect the caller to clean up */
if (vfexec || (spawn && error == 0)) {
task_deallocate(new_task);
thread_deallocate(thread);
}
if (load_result.threadstate) {
kfree(load_result.threadstate, load_result.threadstate_sz);
load_result.threadstate = NULL;
}
bad:
return(error);
}