Mach-O加载流程

了解了Mach-O的文件格式,肯定需要了解Mach-O的加载流程,既然要研究OSX,这就是必不可少的一步,那么通过阅读源码可以比较清晰的认识这一流程
用户态在加载新的可执行文件时会有一种方式是通过exec*系列函数,所以我们先从exec*说起,由于exec*是都只是对系统调用execve()库的封装,所以我们先从一张流程图来看起

QQ20170304-185855@2x.jpg

1.1__mac_execve()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
int
__mac_execve(proc_t p, struct __mac_execve_args *uap, int32_t *retval)
{
char *bufp = NULL;
struct image_params *imgp;
......
context.vc_thread = current_thread();
context.vc_ucred = kauth_cred_proc_ref(p); /* XXX must NOT be kauth_cred_get() */
/* Allocate a big chunk for locals instead of using stack since these
* structures a pretty big.
*/
//申请一块内存用来存放imgp,vap,origavp的数据结构
MALLOC(bufp, char *, (sizeof(*imgp) + sizeof(*vap) + sizeof(*origvap)), M_TEMP, M_WAITOK | M_ZERO);
imgp = (struct image_params *) bufp;
if (bufp == NULL) {
error = ENOMEM;
goto exit_with_error;
}
vap = (struct vnode_attr *) (bufp + sizeof(*imgp));
origvap = (struct vnode_attr *) (bufp + sizeof(*imgp) + sizeof(*vap));
//初始化imgp
/* Initialize the common data in the image_params structure */
imgp->ip_user_fname = uap->fname;
imgp->ip_user_argv = uap->argp;
imgp->ip_user_envv = uap->envp;
imgp->ip_vattr = vap;
imgp->ip_origvattr = origvap;
imgp->ip_vfs_context = &context;
imgp->ip_flags = (is_64 ? IMGPF_WAS_64BIT : IMGPF_NONE) | ((p->p_flag & P_DISABLE_ASLR) ? IMGPF_DISABLE_ASLR : IMGPF_NONE);
imgp->ip_seg = (is_64 ? UIO_USERSPACE64 : UIO_USERSPACE32);
imgp->ip_mac_return = 0;
uthread = get_bsdthread_info(current_thread());
if (uthread->uu_flag & UT_VFORK) {
imgp->ip_flags |= IMGPF_VFORK_EXEC;
}
#if CONFIG_MACF
if (uap->mac_p != USER_ADDR_NULL) {
error = mac_execve_enter(uap->mac_p, imgp);
if (error) {
kauth_cred_unref(&context.vc_ucred);
goto exit_with_error;
}
}
#endif
//执行image函数
error = exec_activate_image(imgp);
//释放资源
kauth_cred_unref(&context.vc_ucred);
......
return(error);
}
  • 主要加载镜像进行数据的初始化以及资源的相关操作,具体的家在流程在exec_activate_image())

1.2exec_activate_image()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
static int
exec_activate_image(struct image_params *imgp)
{
struct nameidata *ndp = NULL;
const char *excpath;
int error;
int resid;
int once = 1; /* save SGUID-ness for interpreted files */
int i;
int itercount = 0;
proc_t p = vfs_context_proc(imgp->ip_vfs_context);
error = execargs_alloc(imgp);
if (error)
goto bad_notrans;
//保存程序路径
error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg, &excpath);
if (error) {
goto bad_notrans;
}
/* Use excpath, which contains the copyin-ed exec path */
DTRACE_PROC1(exec, uintptr_t, excpath);
MALLOC(ndp, struct nameidata *, sizeof(*ndp), M_TEMP, M_WAITOK | M_ZERO);
if (ndp == NULL) {
error = ENOMEM;
goto bad_notrans;
}
NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1,
UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
again:
error = namei(ndp);
if (error)
goto bad_notrans;
imgp->ip_ndp = ndp; /* successful namei(); call nameidone() later */
imgp->ip_vp = ndp->ni_vp; /* if set, need to vnode_put() at some point */
/*
* Before we start the transition from binary A to binary B, make
* sure another thread hasn't started exiting the process. We grab
* the proc lock to check p_lflag initially, and the transition
* mechanism ensures that the value doesn't change after we release
* the lock.
*/
proc_lock(p);
if (p->p_lflag & P_LEXIT) {
error = EDEADLK;
proc_unlock(p);
goto bad_notrans;
}
error = proc_transstart(p, 1, 0);
proc_unlock(p);
if (error)
goto bad_notrans;
error = exec_check_permissions(imgp);
if (error)
goto bad;
/* Copy; avoid invocation of an interpreter overwriting the original */
if (once) {
once = 0;
*imgp->ip_origvattr = *imgp->ip_vattr;
}
error = vn_rdwr(UIO_READ, imgp->ip_vp, imgp->ip_vdata, PAGE_SIZE, 0,
UIO_SYSSPACE, IO_NODELOCKED,
vfs_context_ucred(imgp->ip_vfs_context),
&resid, vfs_context_proc(imgp->ip_vfs_context));
if (error)
goto bad;
if (resid) {
memset(imgp->ip_vdata + (PAGE_SIZE - resid), 0x0, resid);
}
encapsulated_binary:
/* Limit the number of iterations we will attempt on each binary */
if (++itercount > EAI_ITERLIMIT) {
error = EBADEXEC;
goto bad;
}
error = -1;
//遍历execsw来确定不同的加载函数
for(i = 0; error == -1 && execsw[i].ex_imgact != NULL; i++) {
error = (*execsw[i].ex_imgact)(imgp);
switch (error) {
/* case -1: not claimed: continue */
case -2: /* Encapsulated binary, imgp->ip_XXX set for next iteration */
goto encapsulated_binary;
case -3: /* Interpreter */
#if CONFIG_MACF
/*
* Copy the script label for later use. Note that
* the label can be different when the script is
* actually read by the interpreter.
*/
if (imgp->ip_scriptlabelp)
mac_vnode_label_free(imgp->ip_scriptlabelp);
imgp->ip_scriptlabelp = mac_vnode_label_alloc();
if (imgp->ip_scriptlabelp == NULL) {
error = ENOMEM;
break;
}
mac_vnode_label_copy(imgp->ip_vp->v_label,
imgp->ip_scriptlabelp);
/*
* Take a ref of the script vnode for later use.
*/
if (imgp->ip_scriptvp)
vnode_put(imgp->ip_scriptvp);
if (vnode_getwithref(imgp->ip_vp) == 0)
imgp->ip_scriptvp = imgp->ip_vp;
#endif
nameidone(ndp);
vnode_put(imgp->ip_vp);
imgp->ip_vp = NULL; /* already put */
imgp->ip_ndp = NULL; /* already nameidone */
/* Use excpath, which exec_shell_imgact reset to the interpreter */
NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF,
UIO_SYSSPACE, CAST_USER_ADDR_T(excpath), imgp->ip_vfs_context);
proc_transend(p, 0);
goto again;
default:
break;
}
}
/*
* Call out to allow 3rd party notification of exec.
* Ignore result of kauth_authorize_fileop call.
*/
if (error == 0 && kauth_authorize_fileop_has_listeners()) {
kauth_authorize_fileop(vfs_context_ucred(imgp->ip_vfs_context),
KAUTH_FILEOP_EXEC,
(uintptr_t)ndp->ni_vp, 0);
}
if (error == 0) {
/*
* Reset atm context from task
*/
task_atm_reset(p->task);
/*
* Reset old bank context from task
*/
task_bank_reset(p->task);
}
bad:
proc_transend(p, 0);
bad_notrans:
if (imgp->ip_strings)
execargs_free(imgp);
if (imgp->ip_ndp)
nameidone(imgp->ip_ndp);
if (ndp)
FREE(ndp, M_TEMP);
return (error);
}
  • exec_activate_image()完成了镜像加载的所有工作,主要是寻找可执行文件并把他们拷贝在内存中,通过遍历execsw,根据可执行文件的不同类型来确定不同的加载函数,从if(error)可以看出这些加载过程如果不是被终止就是完成在整个流程的加载

1.3 exec_mach_imgact

1
2
3
4
5
6
7
8
9
struct execsw {
int (*ex_imgact)(struct image_params *);
const char *ex_name;
} execsw[] = {
{ exec_mach_imgact, "Mach-o Binary" },
{ exec_fat_imgact, "Fat Binary" },
{ exec_shell_imgact, "Interpreter Script" },
{ NULL, NULL}
}
  • 可以看出,OSX有三种可执行文件:
  1. Mach-o由exec_mach_imgact处理
  2. Fat Binary由exec_fat_imgact处理
  3. Interpreter Script由exec_shell_imgact处理
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
static int
exec_mach_imgact(struct image_params *imgp)
{
struct mach_header *mach_header = (struct mach_header *)imgp->ip_vdata;
proc_t p = vfs_context_proc(imgp->ip_vfs_context);
int error = 0;
task_t task;
task_t new_task = NULL; /* protected by vfexec */
thread_t thread;
struct uthread *uthread;
vm_map_t old_map = VM_MAP_NULL;
vm_map_t map;
load_return_t lret;
load_result_t load_result;
struct _posix_spawnattr *psa = NULL;
int spawn = (imgp->ip_flags & IMGPF_SPAWN);
int vfexec = (imgp->ip_flags & IMGPF_VFORK_EXEC);
int p_name_len;
/*
* make sure it's a Mach-O 1.0 or Mach-O 2.0 binary; the difference
* is a reserved field on the end, so for the most part, we can
* treat them as if they were identical. Reverse-endian Mach-O
* binaries are recognized but not compatible.
*/
//检测header中的magic来确定是否符合Mach-O的条件
if ((mach_header->magic == MH_CIGAM) ||
(mach_header->magic == MH_CIGAM_64)) {
error = EBADARCH;
goto bad;
}
if ((mach_header->magic != MH_MAGIC) &&
(mach_header->magic != MH_MAGIC_64)) {
error = -1;
goto bad;
}
//检测header中的文件类型且必须为可执行文件
if (mach_header->filetype != MH_EXECUTE) {
error = -1;
goto bad;
}
//检测Mach-O文件的CPU种类与版本
if (imgp->ip_origcputype != 0) {
/* Fat header previously had an idea about this thin file */
if (imgp->ip_origcputype != mach_header->cputype ||
imgp->ip_origcpusubtype != mach_header->cpusubtype) {
error = EBADARCH;
goto bad;
}
} else {
imgp->ip_origcputype = mach_header->cputype;
imgp->ip_origcpusubtype = mach_header->cpusubtype;
}
task = current_task();
thread = current_thread();
uthread = get_bsdthread_info(thread);
if ((mach_header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64)
imgp->ip_flags |= IMGPF_IS_64BIT;
/* If posix_spawn binprefs exist, respect those prefs. */
psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
if (psa != NULL && psa->psa_binprefs[0] != 0) {
int pr = 0;
for (pr = 0; pr < NBINPREFS; pr++) {
cpu_type_t pref = psa->psa_binprefs[pr];
if (pref == 0) {
/* No suitable arch in the pref list */
error = EBADARCH;
goto bad;
}
if (pref == CPU_TYPE_ANY) {
/* Jump to regular grading */
goto grade;
}
if (pref == imgp->ip_origcputype) {
/* We have a match! */
goto grade;
}
}
error = EBADARCH;
goto bad;
}
grade:
if (!grade_binary(imgp->ip_origcputype, imgp->ip_origcpusubtype & ~CPU_SUBTYPE_MASK)) {
error = EBADARCH;
goto bad;
}
/* Copy in arguments/environment from the old process */
error = exec_extract_strings(imgp);
if (error)
goto bad;
error = exec_add_apple_strings(imgp);
if (error)
goto bad;
AUDIT_ARG(argv, imgp->ip_startargv, imgp->ip_argc,
imgp->ip_endargv - imgp->ip_startargv);
AUDIT_ARG(envv, imgp->ip_endargv, imgp->ip_envc,
imgp->ip_endenvv - imgp->ip_endargv);
/*
* We are being called to activate an image subsequent to a vfork()
* operation; in this case, we know that our task, thread, and
* uthread are actually those of our parent, and our proc, which we
* obtained indirectly from the image_params vfs_context_t, is the
* new child process.
*/
if (vfexec || spawn) {
if (vfexec) {
imgp->ip_new_thread = fork_create_child(task, NULL, p, FALSE, (imgp->ip_flags & IMGPF_IS_64BIT));
if (imgp->ip_new_thread == NULL) {
error = ENOMEM;
goto bad;
}
}
/* reset local idea of thread, uthread, task */
thread = imgp->ip_new_thread;
uthread = get_bsdthread_info(thread);
task = new_task = get_threadtask(thread);
map = get_task_map(task);
} else {
map = VM_MAP_NULL;
}
/*
* We set these flags here; this is OK, since if we fail after
* this point, we have already destroyed the parent process anyway.
*/
task_set_dyld_info(task, MACH_VM_MIN_ADDRESS, 0);
if (imgp->ip_flags & IMGPF_IS_64BIT) {
task_set_64bit(task, TRUE);
OSBitOrAtomic(P_LP64, &p->p_flag);
} else {
task_set_64bit(task, FALSE);
OSBitAndAtomic(~((uint32_t)P_LP64), &p->p_flag);
}
/*
* Load the Mach-O file.
*
* NOTE: An error after this point indicates we have potentially
* destroyed or overwritten some process state while attempting an
* execve() following a vfork(), which is an unrecoverable condition.
* We send the new process an immediate SIGKILL to avoid it executing
* any instructions in the mutated address space. For true spawns,
* this is not the case, and "too late" is still not too late to
* return an error code to the parent process.
*/
/*
* Actually load the image file we previously decided to load.
*/
//把Mach-O文件映射到内存中并调用load_machfile函数
lret = load_machfile(imgp, mach_header, thread, &map, &load_result);
if (lret != LOAD_SUCCESS) {
error = load_return_to_errno(lret);
goto badtoolate;
}
proc_lock(p);
p->p_cputype = imgp->ip_origcputype;
p->p_cpusubtype = imgp->ip_origcpusubtype;
proc_unlock(p);
vm_map_set_user_wire_limit(map, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
/*
* Set code-signing flags if this binary is signed, or if parent has
* requested them on exec.
*/
if (load_result.csflags & CS_VALID) {
imgp->ip_csflags |= load_result.csflags &
(CS_VALID|
CS_HARD|CS_KILL|CS_RESTRICT|CS_ENFORCEMENT|CS_REQUIRE_LV|CS_DYLD_PLATFORM|
CS_EXEC_SET_HARD|CS_EXEC_SET_KILL|CS_EXEC_SET_ENFORCEMENT);
} else {
imgp->ip_csflags &= ~CS_VALID;
}
if (p->p_csflags & CS_EXEC_SET_HARD)
imgp->ip_csflags |= CS_HARD;
if (p->p_csflags & CS_EXEC_SET_KILL)
imgp->ip_csflags |= CS_KILL;
if (p->p_csflags & CS_EXEC_SET_ENFORCEMENT)
imgp->ip_csflags |= CS_ENFORCEMENT;
if (p->p_csflags & CS_EXEC_SET_INSTALLER)
imgp->ip_csflags |= CS_INSTALLER;
/*
* Set up the system reserved areas in the new address space.
*/
vm_map_exec(map, task, (void *)p->p_fd->fd_rdir, cpu_type());
/*
* Close file descriptors which specify close-on-exec.
*/
fdexec(p, psa != NULL ? psa->psa_flags : 0);
/*
* deal with set[ug]id.
*/
error = exec_handle_sugid(imgp);
if (error) {
if (spawn || !vfexec) {
vm_map_deallocate(map);
}
goto badtoolate;
}
/*
* Commit to new map.
*
* Swap the new map for the old, which consumes our new map reference but
* each leaves us responsible for the old_map reference. That lets us get
* off the pmap associated with it, and then we can release it.
*/
if (!vfexec) {
old_map = swap_task_map(task, thread, map, !spawn);
vm_map_deallocate(old_map);
}
lret = activate_thread_state(thread, &load_result);
if (lret != KERN_SUCCESS) {
goto badtoolate;
}
/*
* deal with voucher on exec-calling thread.
*/
if (imgp->ip_new_thread == NULL)
thread_set_mach_voucher(current_thread(), IPC_VOUCHER_NULL);
/* Make sure we won't interrupt ourself signalling a partial process */
if (!vfexec && !spawn && (p->p_lflag & P_LTRACED))
psignal(p, SIGTRAP);
if (load_result.unixproc &&
create_unix_stack(get_task_map(task),
&load_result,
p) != KERN_SUCCESS) {
error = load_return_to_errno(LOAD_NOSPACE);
goto badtoolate;
}
if (vfexec || spawn) {
old_map = vm_map_switch(get_task_map(task));
}
if (load_result.unixproc) {
user_addr_t ap;
/*
* Copy the strings area out into the new process address
* space.
*/
ap = p->user_stack;
error = exec_copyout_strings(imgp, &ap);
if (error) {
if (vfexec || spawn)
vm_map_switch(old_map);
goto badtoolate;
}
/* Set the stack */
thread_setuserstack(thread, ap);
}
if (load_result.dynlinker) {
uint64_t ap;
int new_ptr_size = (imgp->ip_flags & IMGPF_IS_64BIT) ? 8 : 4;
/* Adjust the stack */
ap = thread_adjuserstack(thread, -new_ptr_size);
error = copyoutptr(load_result.mach_header, ap, new_ptr_size);
if (error) {
if (vfexec || spawn)
vm_map_switch(old_map);
goto badtoolate;
}
//由dyld接手接下来的工作
task_set_dyld_info(task, load_result.all_image_info_addr,
load_result.all_image_info_size);
}
/* Avoid immediate VM faults back into kernel */
exec_prefault_data(p, imgp, &load_result);
if (vfexec || spawn) {
vm_map_switch(old_map);
}
/* Stop profiling */
stopprofclock(p);
/*
* Reset signal state.
*/
execsigs(p, thread);
/*
* need to cancel async IO requests that can be cancelled and wait for those
* already active. MAY BLOCK!
*/
_aio_exec( p );
#if SYSV_SHM
/* FIXME: Till vmspace inherit is fixed: */
if (!vfexec && p->vm_shm)
shmexec(p);
#endif
#if SYSV_SEM
/* Clean up the semaphores */
semexit(p);
#endif
/*
* Remember file name for accounting.
*/
p->p_acflag &= ~AFORK;
/*
* Set p->p_comm and p->p_name to the name passed to exec
*/
p_name_len = sizeof(p->p_name) - 1;
if(imgp->ip_ndp->ni_cnd.cn_namelen > p_name_len)
imgp->ip_ndp->ni_cnd.cn_namelen = p_name_len;
bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_name,
(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
p->p_name[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
if (imgp->ip_ndp->ni_cnd.cn_namelen > MAXCOMLEN)
imgp->ip_ndp->ni_cnd.cn_namelen = MAXCOMLEN;
bcopy((caddr_t)imgp->ip_ndp->ni_cnd.cn_nameptr, (caddr_t)p->p_comm,
(unsigned)imgp->ip_ndp->ni_cnd.cn_namelen);
p->p_comm[imgp->ip_ndp->ni_cnd.cn_namelen] = '\0';
pal_dbg_set_task_name( p->task );
#if DEVELOPMENT || DEBUG
/*
* Update the pid an proc name for importance base if any
*/
task_importance_update_owner_info(p->task);
#endif
memcpy(&p->p_uuid[0], &load_result.uuid[0], sizeof(p->p_uuid));
// <rdar://6598155> dtrace code cleanup needed
#if CONFIG_DTRACE
/*
* Invalidate any predicate evaluation already cached for this thread by DTrace.
* That's because we've just stored to p_comm and DTrace refers to that when it
* evaluates the "execname" special variable. uid and gid may have changed as well.
*/
dtrace_set_thread_predcache(current_thread(), 0);
/*
* Free any outstanding lazy dof entries. It is imperative we
* always call dtrace_lazy_dofs_destroy, rather than null check
* and call if !NULL. If we NULL test, during lazy dof faulting
* we can race with the faulting code and proceed from here to
* beyond the helpers cleanup. The lazy dof faulting will then
* install new helpers which no longer belong to this process!
*/
dtrace_lazy_dofs_destroy(p);
/*
* Clean up any DTrace helpers for the process.
*/
if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) {
(*dtrace_helpers_cleanup)(p);
}
/*
* Cleanup the DTrace provider associated with this process.
*/
proc_lock(p);
if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) {
(*dtrace_fasttrap_exec_ptr)(p);
}
proc_unlock(p);
#endif
if (kdebug_enable) {
long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;
/*
* Collect the pathname for tracing
*/
kdbg_trace_string(p, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);
if (vfexec || spawn) {
KERNEL_DEBUG_CONSTANT1(TRACE_DATA_EXEC | DBG_FUNC_NONE,
p->p_pid ,0,0,0, (uintptr_t)thread_tid(thread));
KERNEL_DEBUG_CONSTANT1(TRACE_STRING_EXEC | DBG_FUNC_NONE,
dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, (uintptr_t)thread_tid(thread));
} else {
KERNEL_DEBUG_CONSTANT(TRACE_DATA_EXEC | DBG_FUNC_NONE,
p->p_pid ,0,0,0,0);
KERNEL_DEBUG_CONSTANT(TRACE_STRING_EXEC | DBG_FUNC_NONE,
dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
}
}
/*
* If posix_spawned with the START_SUSPENDED flag, stop the
* process before it runs.
*/
if (imgp->ip_px_sa != NULL) {
psa = (struct _posix_spawnattr *) imgp->ip_px_sa;
if (psa->psa_flags & POSIX_SPAWN_START_SUSPENDED) {
proc_lock(p);
p->p_stat = SSTOP;
proc_unlock(p);
(void) task_suspend_internal(p->task);
}
}
/*
* mark as execed, wakeup the process that vforked (if any) and tell
* it that it now has its own resources back
*/
OSBitOrAtomic(P_EXEC, &p->p_flag);
proc_resetregister(p);
if (p->p_pptr && (p->p_lflag & P_LPPWAIT)) {
proc_lock(p);
p->p_lflag &= ~P_LPPWAIT;
proc_unlock(p);
wakeup((caddr_t)p->p_pptr);
}
/*
* Pay for our earlier safety; deliver the delayed signals from
* the incomplete vfexec process now that it's complete.
*/
if (vfexec && (p->p_lflag & P_LTRACED)) {
psignal_vfork(p, new_task, thread, SIGTRAP);
}
goto done;
badtoolate:
/* Don't allow child process to execute any instructions */
if (!spawn) {
if (vfexec) {
psignal_vfork(p, new_task, thread, SIGKILL);
} else {
psignal(p, SIGKILL);
}
/* We can't stop this system call at this point, so just pretend we succeeded */
error = 0;
}
done:
if (!spawn) {
/* notify only if it has not failed due to FP Key error */
if ((p->p_lflag & P_LTERM_DECRYPTFAIL) == 0)
proc_knote(p, NOTE_EXEC);
}
/* Drop extra references for cases where we don't expect the caller to clean up */
if (vfexec || (spawn && error == 0)) {
task_deallocate(new_task);
thread_deallocate(thread);
}
if (load_result.threadstate) {
kfree(load_result.threadstate, load_result.threadstate_sz);
load_result.threadstate = NULL;
}
bad:
return(error);
}
  • 这里主要对可执行文件的一些基本信息作了检测,如header,CPU,imgp等,然后把Mach-O文件映射到内存中,调用load_machfile()函数,由dyld加载

1.4 load_machfile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
load_return_t
load_machfile(
struct image_params *imgp,
struct mach_header *header,
thread_t thread,
vm_map_t *mapp,
load_result_t *result
)
{
struct vnode *vp = imgp->ip_vp;
off_t file_offset = imgp->ip_arch_offset;
off_t macho_size = imgp->ip_arch_size;
off_t file_size = imgp->ip_vattr->va_data_size;
vm_map_t new_map = *mapp;
pmap_t pmap = 0; /* protected by create_map */
vm_map_t map;
load_result_t myresult;
load_return_t lret;
boolean_t create_map = FALSE;
boolean_t enforce_hard_pagezero = TRUE;
int spawn = (imgp->ip_flags & IMGPF_SPAWN);
task_t task = current_task();
proc_t p = current_proc();
mach_vm_offset_t aslr_offset = 0;
mach_vm_offset_t dyld_aslr_offset = 0;
kern_return_t kret;
if (macho_size > file_size) {
return(LOAD_BADMACHO);
}
if (new_map == VM_MAP_NULL) {
create_map = TRUE;
}
/*
* If we are spawning, we have created backing objects for the process
* already, which include non-lazily creating the task map. So we
* are going to switch out the task map with one appropriate for the
* bitness of the image being loaded.
*/
if (spawn) {
create_map = TRUE;
}
if (create_map) {
task_t ledger_task;
if (imgp->ip_new_thread) {
ledger_task = get_threadtask(imgp->ip_new_thread);
} else {
ledger_task = task;
}
pmap = pmap_create(get_task_ledger(ledger_task),
(vm_map_size_t) 0,
((imgp->ip_flags & IMGPF_IS_64BIT) != 0));
pal_switch_pmap(thread, pmap, imgp->ip_flags & IMGPF_IS_64BIT);
map = vm_map_create(pmap,
0,
vm_compute_max_offset(((imgp->ip_flags & IMGPF_IS_64BIT) == IMGPF_IS_64BIT)),
TRUE);
} else
map = new_map;
#if (__ARM_ARCH_7K__ >= 2) && defined(PLATFORM_WatchOS)
/* enforce 16KB alignment for watch targets with new ABI */
vm_map_set_page_shift(map, SIXTEENK_PAGE_SHIFT);
#endif /* __arm64__ */
#ifndef CONFIG_ENFORCE_SIGNED_CODE
/* This turns off faulting for executable pages, which allows
* to circumvent Code Signing Enforcement. The per process
* flag (CS_ENFORCEMENT) is not set yet, but we can use the
* global flag.
*/
if ( !cs_enforcement(NULL) && (header->flags & MH_ALLOW_STACK_EXECUTION) )
vm_map_disable_NX(map);
#endif
/* Forcibly disallow execution from data pages on even if the arch
* normally permits it. */
////将内存设置为不可执行,用来防止产生溢出漏洞
if ((header->flags & MH_NO_HEAP_EXECUTION) && !(imgp->ip_flags & IMGPF_ALLOW_DATA_EXEC))
vm_map_disallow_data_exec(map);
/*
* Compute a random offset for ASLR, and an independent random offset for dyld.
*/
//计算ASLR的偏移量
if (!(imgp->ip_flags & IMGPF_DISABLE_ASLR)) {
uint64_t max_slide_pages;
max_slide_pages = vm_map_get_max_aslr_slide_pages(map);
aslr_offset = random();
aslr_offset %= max_slide_pages;
aslr_offset <<= vm_map_page_shift(map);
dyld_aslr_offset = random();
dyld_aslr_offset %= max_slide_pages;
dyld_aslr_offset <<= vm_map_page_shift(map);
}
if (!result)
result = &myresult;
*result = load_result_null;
//解析Mach-O文件
lret = parse_machfile(vp, map, thread, header, file_offset, macho_size,
0, (int64_t)aslr_offset, (int64_t)dyld_aslr_offset, result);
if (lret != LOAD_SUCCESS) {
if (create_map) {
vm_map_deallocate(map); /* will lose pmap reference too */
}
return(lret);
}
#if __x86_64__
/*
* On x86, for compatibility, don't enforce the hard page-zero restriction for 32-bit binaries.
*/
if ((imgp->ip_flags & IMGPF_IS_64BIT) == 0) {
enforce_hard_pagezero = FALSE;
}
#endif
/*
* Check to see if the page zero is enforced by the map->min_offset.
*/
if (enforce_hard_pagezero &&
(vm_map_has_hard_pagezero(map, 0x1000) == FALSE)) {
{
if (create_map) {
vm_map_deallocate(map); /* will lose pmap reference too */
}
return (LOAD_BADMACHO);
}
}
if (create_map) {
/*
* If this is an exec, then we are going to destroy the old
* task, and it's correct to halt it; if it's spawn, the
* task is not yet running, and it makes no sense.
*/
if (!spawn) {
/*
* Mark the task as halting and start the other
* threads towards terminating themselves. Then
* make sure any threads waiting for a process
* transition get informed that we are committed to
* this transition, and then finally complete the
* task halting (wait for threads and then cleanup
* task resources).
*
* NOTE: task_start_halt() makes sure that no new
* threads are created in the task during the transition.
* We need to mark the workqueue as exiting before we
* wait for threads to terminate (at the end of which
* we no longer have a prohibition on thread creation).
*
* Finally, clean up any lingering workqueue data structures
* that may have been left behind by the workqueue threads
* as they exited (and then clean up the work queue itself).
*/
kret = task_start_halt(task);
if (kret != KERN_SUCCESS) {
vm_map_deallocate(map); /* will lose pmap reference too */
return (LOAD_FAILURE);
}
proc_transcommit(p, 0);
workqueue_mark_exiting(p);
task_complete_halt(task);
workqueue_exit(p);
kqueue_dealloc(p->p_wqkqueue);
p->p_wqkqueue = NULL;
}
*mapp = map;
}
return(LOAD_SUCCESS);
}
int macho_printf = 0;
#define MACHO_PRINTF(args) \
do { \
if (macho_printf) { \
printf args; \
} \
} while (0)
  • 主要解析了Mach-O文件,设置内存不可执行,防止了产生溢出漏洞,还设置了ASLR偏移

1.5 parse_machfile

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
static
load_return_t
parse_machfile(
struct vnode *vp,
vm_map_t map,
thread_t thread,
struct mach_header *header,
off_t file_offset,
off_t macho_size,
int depth,
int64_t aslr_offset,
int64_t dyld_aslr_offset,
load_result_t *result
)
{
uint32_t ncmds;
struct load_command *lcp;
struct dylinker_command *dlp = 0;
integer_t dlarchbits = 0;
void * control;
load_return_t ret = LOAD_SUCCESS;
caddr_t addr;
void * kl_addr;
vm_size_t size,kl_size;
size_t offset;
size_t oldoffset; /* for overflow check */
int pass;
proc_t p = current_proc(); /* XXXX */
int error;
int resid = 0;
size_t mach_header_sz = sizeof(struct mach_header);
boolean_t abi64;
boolean_t got_code_signatures = FALSE;
int64_t slide = 0;
if (header->magic == MH_MAGIC_64 ||
header->magic == MH_CIGAM_64) {
mach_header_sz = sizeof(struct mach_header_64);
}
/*
* Break infinite recursion
*/
if (depth > 1) {
return(LOAD_FAILURE);
}
depth++;
/*
* Check to see if right machine type.
*/
if (((cpu_type_t)(header->cputype & ~CPU_ARCH_MASK) != (cpu_type() & ~CPU_ARCH_MASK)) ||
!grade_binary(header->cputype,
header->cpusubtype & ~CPU_SUBTYPE_MASK))
return(LOAD_BADARCH);
abi64 = ((header->cputype & CPU_ARCH_ABI64) == CPU_ARCH_ABI64);
switch (header->filetype) {
case MH_EXECUTE:
if (depth != 1) {
return (LOAD_FAILURE);
}
break;
case MH_DYLINKER:
if (depth != 2) {
return (LOAD_FAILURE);
}
break;
default:
return (LOAD_FAILURE);
}
/*
* Get the pager for the file.
*/
control = ubc_getobject(vp, UBC_FLAGS_NONE);
/*
* Map portion that must be accessible directly into
* kernel's map.
*/
if ((off_t)(mach_header_sz + header->sizeofcmds) > macho_size)
return(LOAD_BADMACHO);
/*
* Round size of Mach-O commands up to page boundry.
*/
size = round_page(mach_header_sz + header->sizeofcmds);
if (size <= 0)
return(LOAD_BADMACHO);
/*
* Map the load commands into kernel memory.
*/
addr = 0;
kl_size = size;
kl_addr = kalloc(size);
addr = (caddr_t)kl_addr;
if (addr == NULL)
return(LOAD_NOSPACE);
error = vn_rdwr(UIO_READ, vp, addr, size, file_offset,
UIO_SYSSPACE, 0, kauth_cred_get(), &resid, p);
if (error) {
if (kl_addr)
kfree(kl_addr, kl_size);
return(LOAD_IOERROR);
}
if (resid) {
/* We must be able to read in as much as the mach_header indicated */
if (kl_addr)
kfree(kl_addr, kl_size);
return(LOAD_BADMACHO);
}
/*
* For PIE and dyld, slide everything by the ASLR offset.
*/
if ((header->flags & MH_PIE) || (header->filetype == MH_DYLINKER)) {
slide = aslr_offset;
}
/*
* Scan through the commands, processing each one as necessary.
* We parse in three passes through the headers:
* 0: determine if TEXT and DATA boundary can be page-aligned
* 1: thread state, uuid, code signature
* 2: segments
* 3: dyld, encryption, check entry point
*/
for (pass = 0; pass <= 3; pass++) {
if (pass == 0) {
/* see if we need to adjust the slide to re-align... */
/* no re-alignment needed on X86_64 or ARM32 kernel */
continue;
} else if (pass == 1) {
}
/*
* Check that the entry point is contained in an executable segments
*/
if ((pass == 3) && (!result->using_lcmain && result->validentry == 0)) {
thread_state_initialize(thread);
ret = LOAD_FAILURE;
break;
}
/*
* Loop through each of the load_commands indicated by the
* Mach-O header; if an absurd value is provided, we just
* run off the end of the reserved section by incrementing
* the offset too far, so we are implicitly fail-safe.
*/
offset = mach_header_sz;
ncmds = header->ncmds;
while (ncmds--) {
/*
* Get a pointer to the command.
*/
lcp = (struct load_command *)(addr + offset);
oldoffset = offset;
offset += lcp->cmdsize;
/*
* Perform prevalidation of the struct load_command
* before we attempt to use its contents. Invalid
* values are ones which result in an overflow, or
* which can not possibly be valid commands, or which
* straddle or exist past the reserved section at the
* start of the image.
*/
if (oldoffset > offset ||
lcp->cmdsize < sizeof(struct load_command) ||
offset > header->sizeofcmds + mach_header_sz) {
ret = LOAD_BADMACHO;
break;
}
/*
* Act on struct load_command's for which kernel
* intervention is required.
*/
switch(lcp->cmd) {
//加载segment的数据并映射到进程的内存空间中
case LC_SEGMENT:
if (pass == 0) {
break;
}
if (pass != 2)
break;
if (abi64) {
/*
* Having an LC_SEGMENT command for the
* wrong ABI is invalid <rdar://problem/11021230>
*/
ret = LOAD_BADMACHO;
break;
}
ret = load_segment(lcp,
header->filetype,
control,
file_offset,
macho_size,
vp,
map,
slide,
result);
break;
case LC_SEGMENT_64:
if (pass != 2)
break;
if (!abi64) {
/*
* Having an LC_SEGMENT_64 command for the
* wrong ABI is invalid <rdar://problem/11021230>
*/
ret = LOAD_BADMACHO;
break;
}
ret = load_segment(lcp,
header->filetype,
control,
file_offset,
macho_size,
vp,
map,
slide,
result);
break;
//开一个Unix线程
case LC_UNIXTHREAD:
if (pass != 1)
break;
ret = load_unixthread(
(struct thread_command *) lcp,
thread,
slide,
result);
break;
case LC_MAIN:
if (pass != 1)
break;
if (depth != 1)
break;
ret = load_main(
(struct entry_point_command *) lcp,
thread,
slide,
result);
break;
//启动dyld
case LC_LOAD_DYLINKER:
if (pass != 3)
break;
if ((depth == 1) && (dlp == 0)) {
dlp = (struct dylinker_command *)lcp;
dlarchbits = (header->cputype & CPU_ARCH_MASK);
} else {
ret = LOAD_FAILURE;
}
break;
//加载UUID
case LC_UUID:
if (pass == 1 && depth == 1) {
ret = load_uuid((struct uuid_command *) lcp,
(char *)addr + mach_header_sz + header->sizeofcmds,
result);
}
break;
case LC_CODE_SIGNATURE:
/* CODE SIGNING */
if (pass != 1)
break;
/* pager -> uip ->
load signatures & store in uip
set VM object "signed_pages"
*/
ret = load_code_signature(
(struct linkedit_data_command *) lcp,
vp,
file_offset,
macho_size,
header->cputype,
result);
if (ret != LOAD_SUCCESS) {
printf("proc %d: load code signature error %d "
"for file \"%s\"\n",
p->p_pid, ret, vp->v_name);
/*
* Allow injections to be ignored on devices w/o enforcement enabled
*/
if (!cs_enforcement(NULL))
ret = LOAD_SUCCESS; /* ignore error */
} else {
got_code_signatures = TRUE;
}
if (got_code_signatures) {
unsigned tainted = CS_VALIDATE_TAINTED;
boolean_t valid = FALSE;
struct cs_blob *blobs;
vm_size_t off = 0;
if (cs_debug > 10)
printf("validating initial pages of %s\n", vp->v_name);
blobs = ubc_get_cs_blobs(vp);
while (off < size && ret == LOAD_SUCCESS) {
tainted = CS_VALIDATE_TAINTED;
valid = cs_validate_page(blobs,
NULL,
file_offset + off,
addr + off,
&tainted);
if (!valid || (tainted & CS_VALIDATE_TAINTED)) {
if (cs_debug)
printf("CODE SIGNING: %s[%d]: invalid initial page at offset %lld validated:%d tainted:%d csflags:0x%x\n",
vp->v_name, p->p_pid, (long long)(file_offset + off), valid, tainted, result->csflags);
if (cs_enforcement(NULL) ||
(result->csflags & (CS_HARD|CS_KILL|CS_ENFORCEMENT))) {
ret = LOAD_FAILURE;
}
result->csflags &= ~CS_VALID;
}
off += PAGE_SIZE;
}
}
break;
#if CONFIG_CODE_DECRYPTION
case LC_ENCRYPTION_INFO:
case LC_ENCRYPTION_INFO_64:
if (pass != 3)
break;
ret = set_code_unprotect(
(struct encryption_info_command *) lcp,
addr, map, slide, vp, file_offset,
header->cputype, header->cpusubtype);
if (ret != LOAD_SUCCESS) {
printf("proc %d: set_code_unprotect() error %d "
"for file \"%s\"\n",
p->p_pid, ret, vp->v_name);
/*
* Don't let the app run if it's
* encrypted but we failed to set up the
* decrypter. If the keys are missing it will
* return LOAD_DECRYPTFAIL.
*/
if (ret == LOAD_DECRYPTFAIL) {
/* failed to load due to missing FP keys */
proc_lock(p);
p->p_lflag |= P_LTERM_DECRYPTFAIL;
proc_unlock(p);
}
psignal(p, SIGKILL);
}
break;
#endif
default:
/* Other commands are ignored by the kernel */
ret = LOAD_SUCCESS;
break;
}
if (ret != LOAD_SUCCESS)
break;
}
if (ret != LOAD_SUCCESS)
break;
}
if (ret == LOAD_SUCCESS) {
if (! got_code_signatures) {
if (cs_enforcement(NULL)) {
ret = LOAD_FAILURE;
} else {
/*
* No embedded signatures: look for detached by taskgated,
* this is only done on OSX, on embedded platforms we expect everything
* to be have embedded signatures.
*/
struct cs_blob *blob;
blob = ubc_cs_blob_get(vp, -1, file_offset);
if (blob != NULL) {
unsigned int cs_flag_data = blob->csb_flags;
if(0 != ubc_cs_generation_check(vp)) {
if (0 != ubc_cs_blob_revalidate(vp, blob, 0)) {
/* clear out the flag data if revalidation fails */
cs_flag_data = 0;
result->csflags &= ~CS_VALID;
}
}
/* get flags to be applied to the process */
result->csflags |= cs_flag_data;
}
}
}
  • 根据load_command的不同,加载不同的函数

以上只是比较粗略的了解了Mach-O的加载流程,之后还需理解的更透彻,如有不足的地方还希望可以多多指正^_^
参考:

文章目录
  1. 1. 1.1__mac_execve()
  2. 2. 1.2exec_activate_image()
  3. 3. 1.3 exec_mach_imgact
  4. 4. 1.4 load_machfile
  5. 5. 1.5 parse_machfile
|