最終的な成績

Bでした

課題に関するファイル

課題に関するファイルはGitHubに置かれている

課題の提出方法に関する注意点

この授業中の問題は、
    Subject: Report on Compiler consturction Lecture Exercise 1.1

などというSubjectのメールにして、kono@ie.u-ryukyu.ac.jp まで送ること。

と書かれているが、consturctionがスペルミスで、スペルミスを訂正した状態、訂正しない状態で提出すればいいかわからない

ちなみに、著者はスペル訂正なし、スペル訂正ありの両方のバージョンのメールを送っている

1.1 LLVM

この問題を解く

sample.cという名前の、以下のような内容のファイルを作成した

extern int printf(const char *,...);
#define TYPE int
TYPE f(TYPE a, TYPE b) {
    return a + b;
}
int main() 
{
    TYPE a = 1;
    TYPE b = 2;
    printf("%x = %x + %x \n",f(a,b),a,b);
    return 0;
}

(1) cpp

   clang -E

での出力を調べる。
変換されている部分はどこか。 printf を protoptype ではなく #include <stdio.h> で定義した時はどうなるか。

$ clang -E sample.cを実行しあ

# 1 "sample.c"
# 1 "<built-in>" 1
# 1 "<built-in>" 3
# 368 "<built-in>" 3
# 1 "<command line>" 1
# 1 "<built-in>" 2
# 1 "sample.c" 2
extern int printf(const char *,...);

int f(int a, int b) {
    return a + b;
}
int main()
{
    int a = 1;
    int b = 2;
    printf("%x = %x + %x \n",f(a,b),a,b);
    return 0;
}

#define TYPE intによって「TYPE」が「int」にマクロ展開されている
インクルードファイルが展開されている

sample2.cというファイル名で中身が以下のファイルを用意した

printf を protoptype ではなく #include <stdio.h> で定義したファイルである

#include <stdio.h>
#define TYPE int
TYPE f(TYPE a, TYPE b) {
    return a + b;
}
int main() 
{
    TYPE a = 1;
    TYPE b = 2;
    printf("%x = %x + %x \n",f(a,b),a,b);
    return 0;
}

$ clang -E sample2.cを実行した

出力は、長すぎたので割愛

#include <stdio.h>でstdio.hというヘッダファイルがインクルードされ、そこに含まれるprintfなどの標準入出力関数のプロトタイプ宣言によって、「printf」関数に関する詳細な情報が展開されているのが原因

(2) アセンブラ

   clang -S  -O0

で出力されるアセンブラについて調べる。
   clang -S  -O

についても調べる。
関数f はどうなっているか。

$ clang -S -O0 sample.c

でアセンブラを出力し、sample.sというファイルに書き込む

 .section    __TEXT,__text,regular,pure_instructions
    .build_version macos, 12, 0 sdk_version 12, 0
    .globl  _f                              ## -- Begin function f
    .p2align    4, 0x90
_f:                                     ## @f
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
    movl    %edi, -4(%rbp)
    movl    %esi, -8(%rbp)
    movl    -4(%rbp), %eax
    addl    -8(%rbp), %eax
    popq    %rbp
    retq
    .cfi_endproc
                                        ## -- End function
    .globl  _main                           ## -- Begin function main
    .p2align    4, 0x90
_main:                                  ## @main
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
    subq    $16, %rsp
    movl    $0, -4(%rbp)
    movl    $1, -8(%rbp)
    movl    $2, -12(%rbp)
    movl    -8(%rbp), %edi
    movl    -12(%rbp), %esi
    callq   _f
    movl    %eax, %esi
    movl    -8(%rbp), %edx
    movl    -12(%rbp), %ecx
    leaq    L_.str(%rip), %rdi
    movb    $0, %al
    callq   _printf
    xorl    %eax, %eax
    addq    $16, %rsp
    popq    %rbp
    retq
    .cfi_endproc
                                        ## -- End function
    .section    __TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
    .asciz  "%x = %x + %x \n"

.subsections_via_symbols

C言語のソースコードsample.cがアセンブラ言語（x86_64アセンブラ）に変換されている。

f関数は、レジスタ%ediと%esiから引数aとbを取り出し、それらをメモリアドレス-4(%rbp)と-8(%rbp)に保存する。次に、-4(%rbp)と-8(%rbp)を加算し、その結果をレジスタ%eaxに格納する。最後に、%eaxに格納された値を返すために、retqで関数からのリターンを行う。

$ mv sample.s sample.O0.sでsample.sをsample.O0.sというファイル名にする

$ clang -S -O sample.cで最適化レベルを1にしている

O0は最適化レベルを0（最小）に設定し、最適化を行わないが、このため、生成されたアセンブラコードは実行速度が遅い可能性がある、しかし、デバッグやトラブルシューティングに役立つことがある

Oは最適化レベルを1に設定し、最適化を行う。このため、生成されたアセンブラコードは実行速度が速い可能性があるが、デバッグやトラブルシューティングには不向きである

$ mv sample.s sample.O.sを実行し、sample.O0.sとファイル名を区別できるようにした

sample.O.sの中身は以下のようになっている

 .section    __TEXT,__text,regular,pure_instructions
    .build_version macos, 12, 0 sdk_version 12, 0
    .globl  _f                              ## -- Begin function f
    .p2align    4, 0x90
_f:                                     ## @f
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
                                        ## kill: def $esi killed $esi def $rsi
                                        ## kill: def $edi killed $edi def $rdi
    leal    (%rdi,%rsi), %eax
    popq    %rbp
    retq
    .cfi_endproc
                                        ## -- End function
    .globl  _main                           ## -- Begin function main
    .p2align    4, 0x90
_main:                                  ## @main
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
    movl    $1, %edi
    movl    $2, %esi
    callq   _f
    leaq    L_.str(%rip), %rdi
    movl    %eax, %esi
    movl    $1, %edx
    movl    $2, %ecx
    xorl    %eax, %eax
    callq   _printf
    xorl    %eax, %eax
    popq    %rbp
    retq
    .cfi_endproc
                                        ## -- End function
    .section    __TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
    .asciz  "%x = %x + %x \n"

.subsections_via_symbols

-Oオプションで最適化したアセンブラの関数fも-O0オプションで最適化したアセンブラの関数fも実装は変わらない

(3) LLVM byte code

   clang  -emit-llvm -S 

LLVM バイトコードの出力が得られることを確認せよ。
アセンブラとの対応を示せ。

$ clang -emit-llvm -S sample.cでLLVM バイトコードをsample.llに出力した

出力は以下のようなもの

; ModuleID = 'sample.c'
source_filename = "sample.c"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx12.0.0"

@.str = private unnamed_addr constant [15 x i8] c"%x = %x + %x \0A\00", align 1

; Function Attrs: noinline nounwind optnone ssp uwtable
define i32 @f(i32 %0, i32 %1) #0 {
  %3 = alloca i32, align 4
  %4 = alloca i32, align 4
  store i32 %0, i32* %3, align 4
  store i32 %1, i32* %4, align 4
  %5 = load i32, i32* %3, align 4
  %6 = load i32, i32* %4, align 4
  %7 = add nsw i32 %5, %6
  ret i32 %7
}

; Function Attrs: noinline nounwind optnone ssp uwtable
define i32 @main() #0 {
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  %3 = alloca i32, align 4
  store i32 0, i32* %1, align 4
  store i32 1, i32* %2, align 4
  store i32 2, i32* %3, align 4
  %4 = load i32, i32* %2, align 4
  %5 = load i32, i32* %3, align 4
  %6 = call i32 @f(i32 %4, i32 %5)
  %7 = load i32, i32* %2, align 4
  %8 = load i32, i32* %3, align 4
  %9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i64 0, i64 0), i32 %6, i32 %7, i32 %8)
  ret i32 0
}

declare i32 @printf(i8*, ...) #1

attributes #0 = { noinline nounwind optnone ssp uwtable "darwin-stkchk-strong-link" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "darwin-stkchk-strong-link" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "probe-stack"="___chkstk_darwin" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" }

!llvm.module.flags = !{!0, !1, !2}
!llvm.ident = !{!3}

!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 12, i32 0]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{i32 7, !"PIC Level", i32 2}
!3 = !{!"Apple clang version 13.0.0 (clang-1300.0.29.3)"}

関数fの実装に限定して、アセンブラとLLVM バイトコードの対応を表にして説明する

アセンブラ	LLVM バイトコード	説明
_f:	define i32 @f(i32 %0, i32 %1) #0 {	関数fの定義
pushq %rbp	%3 = alloca i32, align 4	フレームポインタを退避
movq %rsp, %rbp	%4 = alloca i32, align 4	フレームポインタをセット
movl %edi, -4(%rbp)	store i32 %0, i32* %3, align 4	第一引数を格納
movl %esi, -8(%rbp)	store i32 %1, i32* %4, align 4	第二引数を格納
movl -4(%rbp), %eax	%5 = load i32, i32* %3, align 4	第一引数をレジスタにロード
addl -8(%rbp), %eax	%6 = load i32, i32* %4, align 4	第二引数をレジスタにロード
popq %rbp	%7 = add nsw i32 %5, %6	加算実行
retq	ret i32 %7	関数からの返却
.cfi_endproc	}	関数の終了

(4) a.out

出力される a.out を otool を使って調べる。

$ clang sample.cを実行して、a.outというファイルを出力した

$ otool -tv a.outを実行した

a.out:
(__TEXT,__text) section
_f:
0000000100003f20        pushq   %rbp
0000000100003f21        movq    %rsp, %rbp
0000000100003f24        movl    %edi, -0x4(%rbp)
0000000100003f27        movl    %esi, -0x8(%rbp)
0000000100003f2a        movl    -0x4(%rbp), %eax
0000000100003f2d        addl    -0x8(%rbp), %eax
0000000100003f30        popq    %rbp
0000000100003f31        retq
0000000100003f32        nopw    %cs:(%rax,%rax)
0000000100003f3c        nopl    (%rax)
_main:
0000000100003f40        pushq   %rbp
0000000100003f41        movq    %rsp, %rbp
0000000100003f44        subq    $0x10, %rsp
0000000100003f48        movl    $0x0, -0x4(%rbp)
0000000100003f4f        movl    $0x1, -0x8(%rbp)
0000000100003f56        movl    $0x2, -0xc(%rbp)
0000000100003f5d        movl    -0x8(%rbp), %edi
0000000100003f60        movl    -0xc(%rbp), %esi
0000000100003f63        callq   0x100003f20
0000000100003f68        movl    %eax, %esi
0000000100003f6a        movl    -0x8(%rbp), %edx
0000000100003f6d        movl    -0xc(%rbp), %ecx
0000000100003f70        leaq    0x2f(%rip), %rdi
0000000100003f77        movb    $0x0, %al
0000000100003f79        callq   0x100003f86
0000000100003f7e        xorl    %eax, %eax
0000000100003f80        addq    $0x10, %rsp
0000000100003f84        popq    %rbp
0000000100003f85        retq

各命令のメモリアドレスと対応するアセンブラが一緒に出力される

ARMアセンブラ

  -arch arm を付けて、ARMのアセンブラの出力を調べよ
  -arch i386 
  -arch x86_64 
   clang -print-targets

arm

$ clang -arch arm -S sample.cを実行し、armのアセンブラをsample.sに出力する $ $ mv sample.s sample.arm.sを実行して、ファイル名を変更する

sample.arm.sの中身

 .section    __TEXT,__text,regular,pure_instructions
    .build_version macos, 12, 0 sdk_version 12, 0
    .syntax unified
    .globl  _f                              @ -- Begin function f
    .p2align    2
    .code   32                              @ @f
_f:
@ %bb.0:
    sub sp, sp, #8
    str r0, [sp, #4]
    str r1, [sp]
    ldr r0, [sp, #4]
    ldr r1, [sp]
    add r0, r0, r1
    add sp, sp, #8
    bx  lr
                                        @ -- End function
    .globl  _main                           @ -- Begin function main
    .p2align    2
    .code   32                              @ @main
_main:
@ %bb.0:
    push    {r7, lr}
    mov r7, sp
    sub sp, sp, #16
    mov r0, #0
    str r0, [sp]                        @ 4-byte Spill
    str r0, [r7, #-4]
    mov r0, #1
    str r0, [sp, #8]
    mov r0, #2
    str r0, [sp, #4]
    ldr r0, [sp, #8]
    ldr r1, [sp, #4]
    bl  _f
    mov r1, r0
    ldr r2, [sp, #8]
    ldr r3, [sp, #4]
    ldr r0, LCPI1_0
LPC1_0:
    add r0, pc, r0
    bl  _printf
                                        @ kill: def $r1 killed $r0
    ldr r0, [sp]                        @ 4-byte Reload
    mov sp, r7
    pop {r7, lr}
    bx  lr
    .p2align    2
@ %bb.1:
    .data_region
LCPI1_0:
    .long   L_.str-(LPC1_0+8)
    .end_data_region
                                        @ -- End function
    .section    __TEXT,__cstring,cstring_literals
L_.str:                                 @ @.str
    .asciz  "%x = %x + %x \n"

.subsections_via_symbols

i386

$ clang -arch i386 -S sample.cを実行し、i386のアセンブラをsample.sに出力する $ $ mv sample.s sample.i386.sを実行して、ファイル名を変更する

sample.i386.sの中身

 .section    __TEXT,__text,regular,pure_instructions
    .build_version macos, 12, 0 sdk_version 12, 0
    .globl  _f                              ## -- Begin function f
    .p2align    4, 0x90
_f:                                     ## @f
    .cfi_startproc
## %bb.0:
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset %ebp, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register %ebp
    movl    12(%ebp), %eax
    movl    8(%ebp), %eax
    movl    8(%ebp), %eax
    addl    12(%ebp), %eax
    popl    %ebp
    retl
    .cfi_endproc
                                        ## -- End function
    .globl  _main                           ## -- Begin function main
    .p2align    4, 0x90
_main:                                  ## @main
    .cfi_startproc
## %bb.0:
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset %ebp, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register %ebp
    pushl   %esi
    subl    $36, %esp
    .cfi_offset %esi, -12
    calll   L1$pb
L1$pb:
    popl    %eax
    movl    %eax, -20(%ebp)                 ## 4-byte Spill
    movl    $0, -8(%ebp)
    movl    $1, -12(%ebp)
    movl    $2, -16(%ebp)
    movl    -12(%ebp), %ecx
    movl    -16(%ebp), %eax
    movl    %ecx, (%esp)
    movl    %eax, 4(%esp)
    calll   _f
    movl    %eax, %edx
    movl    -20(%ebp), %eax                 ## 4-byte Reload
    leal    L_.str-L1$pb(%eax), %esi
    movl    -12(%ebp), %ecx
    movl    -16(%ebp), %eax
    movl    %esi, (%esp)
    movl    %edx, 4(%esp)
    movl    %ecx, 8(%esp)
    movl    %eax, 12(%esp)
    calll   _printf
    xorl    %eax, %eax
    addl    $36, %esp
    popl    %esi
    popl    %ebp
    retl
    .cfi_endproc
                                        ## -- End function
    .section    __TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
    .asciz  "%x = %x + %x \n"

.subsections_via_symbols

x86_64

$ clang -arch x86_64 -S sample.cを実行し、x86_64のアセンブラをsample.sに出力する $ $ mv sample.s sample.x86_64.sを実行して、ファイル名を変更する

sample.x86_64.sの中身

 .section    __TEXT,__text,regular,pure_instructions
    .build_version macos, 12, 0 sdk_version 12, 0
    .globl  _f                              ## -- Begin function f
    .p2align    4, 0x90
_f:                                     ## @f
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
    movl    %edi, -4(%rbp)
    movl    %esi, -8(%rbp)
    movl    -4(%rbp), %eax
    addl    -8(%rbp), %eax
    popq    %rbp
    retq
    .cfi_endproc
                                        ## -- End function
    .globl  _main                           ## -- Begin function main
    .p2align    4, 0x90
_main:                                  ## @main
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
    subq    $16, %rsp
    movl    $0, -4(%rbp)
    movl    $1, -8(%rbp)
    movl    $2, -12(%rbp)
    movl    -8(%rbp), %edi
    movl    -12(%rbp), %esi
    callq   _f
    movl    %eax, %esi
    movl    -8(%rbp), %edx
    movl    -12(%rbp), %ecx
    leaq    L_.str(%rip), %rdi
    movb    $0, %al
    callq   _printf
    xorl    %eax, %eax
    addq    $16, %rsp
    popq    %rbp
    retq
    .cfi_endproc
                                        ## -- End function
    .section    __TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
    .asciz  "%x = %x + %x \n"

.subsections_via_symbols

clang -print-targets

$ clang -print-targetsを実行するとclangコンパイラにサポートされているターゲットアーキテクチャのリストが出力される

  Registered Targets:
    aarch64    - AArch64 (little endian)
    aarch64_32 - AArch64 (little endian ILP32)
    aarch64_be - AArch64 (big endian)
    arm        - ARM
    arm64      - ARM64 (little endian)
    arm64_32   - ARM64 (little endian ILP32)
    armeb      - ARM (big endian)
    thumb      - Thumb
    thumbeb    - Thumb (big endian)
    x86        - 32-bit X86: Pentium-Pro and above
    x86-64     - 64-bit X86: EM64T and AMD64

2.1

この問題を解く

以下のprogram check_endian.c がある。
    int check = 0x12345678;
    main()
    {
    char i, *ptr;
    
    ptr = (char *)&check; 
    i = ptr[1];
    return i;
    }

このprogramをcompileしたassemblerを、i386, emt64 のCPUで表示させて見よ。また、gdb で i にどのような値が入るかを確認せよ。そのCPUは、Little-Endian か Big-Endian かを答えよ。また、 trace の結果を、確認せよ。
Endian の変換はどのような時に必要になるか。どのようにすれば実現できるか?

Unix には、Builtin のEndianの変換関数がある。それを探し出せ。また、その実装がどうなっているかを調べよ。(ヒント: man -k を使う)

program check_endian.cというファイル名で中身が以下のようなファイルを作る

int check = 0x12345678;

int main()
{
    char i, *ptr;
    ptr = (char *)&check;
    i = ptr[1];
    return i;
}

compileしたassemblerを、i386, emt64 のCPUで表示

$ clang -arch i386 -S check_endian.c && mv check_endian.s check_endian.i386.sを実行して、check_endian.i386.sというファイルにi386のアセンブラを出力する

check_endian.i386.sの中身

 .section    __TEXT,__text,regular,pure_instructions
    .build_version macos, 12, 0 sdk_version 12, 0
    .globl  _main                           ## -- Begin function main
    .p2align    4, 0x90
_main:                                  ## @main
    .cfi_startproc
## %bb.0:
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset %ebp, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register %ebp
    subl    $12, %esp
    calll   L0$pb
L0$pb:
    popl    %eax
    leal    _check-L0$pb(%eax), %eax
    movl    $0, -4(%ebp)
    movl    %eax, -12(%ebp)
    movl    -12(%ebp), %eax
    movb    1(%eax), %al
    movb    %al, -5(%ebp)
    movsbl  -5(%ebp), %eax
    addl    $12, %esp
    popl    %ebp
    retl
    .cfi_endproc
                                        ## -- End function
    .section    __DATA,__data
    .globl  _check                          ## @check
    .p2align    2
_check:
    .long   305419896                       ## 0x12345678

.subsections_via_symbols

$ clang -arch x86_64 -S check_endian.c && mv check_endian.s check_endian.x86_64.sを実行して、check_endian.x86_64.sというファイルにemt64のアセンブラを出力する

check_endian.x86_64.sの中身

 .section    __TEXT,__text,regular,pure_instructions
    .build_version macos, 12, 0 sdk_version 12, 0
    .globl  _main                           ## -- Begin function main
    .p2align    4, 0x90
_main:                                  ## @main
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
    leaq    _check(%rip), %rax
    movl    $0, -4(%rbp)
    movq    %rax, -16(%rbp)
    movq    -16(%rbp), %rax
    movb    1(%rax), %al
    movb    %al, -5(%rbp)
    movsbl  -5(%rbp), %eax
    popq    %rbp
    retq
    .cfi_endproc
                                        ## -- End function
    .section    __DATA,__data
    .globl  _check                          ## @check
    .p2align    2
_check:
    .long   305419896                       ## 0x12345678

.subsections_via_symbols

gdb で i にどのような値が入るかを確認

このようなファイル構成を作るためにファイルを移動、作成する

.
├── c_env
│   ├── Dockerfile
│   ├── check_endian.c
│   ├── check_endian.i386.s
│   └── check_endian.x86_64.s
└── docker-compose.yml

docker-comopse.yml

services:
  c_env:
    build: ./c_env/
    container_name: c_env
    security_opt:
      - seccomp:unconfined
    volumes:
      - "./c_env:/workplace"
    tty: true

c_env/Dockerfile

FROM ubuntu:22.04
RUN mkdir workplace &&\
    apt-get update &&\
    apt-get -y install lldb gdb cmake manpages-dev
WORKDIR /workplace/

Docker Desktopを起動する

$ docker-compose up -d

$ docker-compose exec c_env bash

$ gcc -m64 -g -O0 -o check_endian check_endian.cでアーキテクチャをx86_64で指定して、コンパイルする

$ gdb check_endianを実行した

(gdb) break check_endian.c:8で8行目にbreakpointを設置

(gdb) runで実行した

(gdb) info locals iを実行したら、i = 86 'V'と出力された。

CPUは、Little-Endian か Big-Endian か

(gdb) x/20b &checkで「check」変数に格納されている値を20バイト分のバイナリ値として表示した

0x555555558010 <check>: 120     86      52      18      0       0       0       0
0x555555558018: 0       0       0       0       0       0       0       0
0x555555558020: 0       0       0       0

最初の8バイト目（0x555555558010）に格納されている値は、 120 86 52 18 0 0 0 0だった

little-endianでは、低いメモリアドレスに格納されるバイトが先頭バイトになるため、0x555555558010のように0が最後のバイトに格納される

trace の結果

(gdb) exitで一回抜ける gdb check_endianでもう一度gdbに入る

(gdb) tbreak main トレースポイント設置 (gdb) run 実行 (gdb) backtraceで、関数の呼び出し順や、どの関数からどの関数へと呼び出されたのかなどの現在のトレースポイントまでのトレース情報を一覧表示している

#0  main () at check_endian.c:6

Endian の変換

Endian の変換はどのような時に必要になるか。どのようにすれば実現できるか?

Endian変換は、バイトオーダーが異なるシステム間でのデータのやりとりや、データの保存・読み込みなどで必要になる

例: ネットワークでバイナリを送る場合

C言語では、「htons」や「htonl」などの関数を使ってEndian変換を実現できる。

コマンドでは、「xxd」や「od」などのコマンドを使って、ファイルの内容を16進数形式で表示し、手作業でEndian変換を行うこともできる。

UNIXのbuiltinのEndianの変換関数の場所と実装

endian_conversion.cというファイル名で以下のような内容のファイルを用意する

#include <stdio.h>
#include <arpa/inet.h>

int main() {
    unsigned short x = 12345;
    unsigned short y = htons(x);

    printf("x = %d\n", x);
    printf("y = %d\n", y);

    return 0;
}

$ clang -E endian_conversion.c | grep arpa/inetを実行

# 1 "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/arpa/inet.h" 1 3 4
# 68 "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/arpa/inet.h" 3 4
# 69 "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/arpa/inet.h" 2 3 4
# 70 "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/arpa/inet.h" 2 3 4
# 72 "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/arpa/inet.h" 2 3 4

/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/arpa/inet.hを見る

htonsの実装がない、代わりに#include <machine/endian.h>という行を見つけた

/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/machine/endian.hを見る

まだ、htonsの実装がない、代わりに#include "i386/endian.h"という行を見つけた

/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/i386/endian.hを見る

まだ、htonsの実装がない、代わりに#include <sys/_endian.h>という行を見つけた

/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include/sys/_endian.hを見る

#if defined(lint)

__BEGIN_DECLS
__uint16_t      ntohs(__uint16_t);
__uint16_t      htons(__uint16_t);
__uint32_t      ntohl(__uint32_t);
__uint32_t      htonl(__uint32_t);
__END_DECLS

このようにhtonsがマクロとして定義されていた

3.1 intel64 (emt64) のアセンブラ

この問題を解く

3.1の課題ページのurlがhttps://ie.u-ryukyu.ac.jp/~kono/lecture/compiler/ex/005になっているが、https://ie.u-ryukyu.ac.jp/~kono/lecture/compiler/ex/005.htmlに修正したら、見れた。メールを出すときにもその旨を伝えておくといいかもしれない。

課題ページにあるtest1.cをダウンロードするか、自前でtest1.cを用意する

extern int printf(const char *,...);

unsigned char a[] = {
      0x01,
      0x02,
      0x03,
      0x04,
      0x05,
      0x06,
      0x07,
      0x08,
      0x55,
      0x12,
};

long
test(unsigned char *a, long j)
{
   return j;
}

int
main() 
{
    long x = 0;
    x = test(a,x);
    printf("x = %lx\n", x);
    return 0;
}

test1.sと a.outを用意する

$ clang -S test1.cでアセンブラを出力しtest1.sに書き込む

$ clang test1.sでa.outを用意する

CPU の endian を x/20x と x/20b を使って確認する

$ lldb a.out

(lldb) b test

(lldb) run

Process 52594 launched: 'a.out' (x86_64)
Process 52594 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
    frame #0: 0x0000000100003f20 a.out`test
a.out`test:
->  0x100003f20 <+0>: pushq  %rbp
    0x100003f21 <+1>: movq   %rsp, %rbp
    0x100003f24 <+4>: movq   %rdi, -0x8(%rbp)
    0x100003f28 <+8>: movq   %rsi, -0x10(%rbp)
Target 0: (a.out) stopped.

(lldb) x/20x &a

0x100008010: 0x04030201 0x08070605 0x00001255 0x00000000
0x100008020: 0x00000000 0x00000000 0x00000000 0x00000000
0x100008030: 0x00000000 0x00000000 0x00000000 0x00000000
0x100008040: 0x00000000 0x00000000 0x00000000 0x00000000
0x100008050: 0x00000000 0x00000000 0x00000000 0x00000000

(lldb) x/20b &a

0x100008010: 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08
0x100008018: 0x55 0x12 0x00 0x00 0x00 0x00 0x00 0x00
0x100008020: 0x00 0x00 0x00 0x00

レジスタの値を調べる

(lldb) p $rdi
(unsigned long) $2 = 4295000080
(lldb) p $rsi
(unsigned long) $3 = 0

lldbからログアウトする(ctrl-d)

test1.s を書き換えて、さまざまなアセンブラ命令を試す

書き換えずにコピーを用意してアセンブラ命令を出す

$ cp test1.s test2.sでtest2.sの中身をコピーしたtest2.sを作成した

以下のように編集した

test2.s

 .section    __TEXT,__text,regular,pure_instructions
    .build_version macos, 12, 0 sdk_version 12, 0
    .globl  _test                           ## -- Begin function test
    .p2align    4, 0x90
_test:                                  ## @test
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
    movq    %rdi, -8(%rbp)
    movq    %rsi, -16(%rbp)
    movq    (%rdi),%rax
    popq    %rbp
    retq
    .cfi_endproc
                                        ## -- End function
    .globl  _main                           ## -- Begin function main
    .p2align    4, 0x90
_main:                                  ## @main
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
    subq    $16, %rsp
    movl    $0, -4(%rbp)
    movq    $0, -16(%rbp)
    movq    -16(%rbp), %rsi
    leaq    _a(%rip), %rdi
    callq   _test
    movq    %rax, -16(%rbp)
    movq    -16(%rbp), %rsi
    leaq    L_.str(%rip), %rdi
    movb    $0, %al
    callq   _printf
    xorl    %eax, %eax
    addq    $16, %rsp
    popq    %rbp
    retq
    .cfi_endproc
                                        ## -- End function
    .section    __DATA,__data
    .globl  _a                              ## @a
_a:
    .ascii  "\001\002\003\004\005\006\007\bU\022"

    .section    __TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
    .asciz  "x = %lx\n"

.subsections_via_symbols

test1.sとtest2.sの差分、上がtest1.sで下がtest2.s

13,16c13,16
<    movq    %rdi, -8(%rbp)
<    movq    %rsi, -16(%rbp)
<    movq    -16(%rbp), %rax
<    popq    %rbp
---
>    movq    %rdi, -8(%rbp)
>    movq    %rsi, -16(%rbp)
>    movq    (%rdi),%rax
>    popq    %rbp

$ mv a.out a.1.outでa.outのtest1.sバージョンのバックアップを取った

$ clang test2.sでa.outを生成した

lldbで以下のような作業をした

$ lldb a.out
(lldb) target create "a.out"
Current executable set to 'a.out' (x86_64).
(lldb) b test
Breakpoint 1: where = a.out`test, address = 0x0000000100003f20
(lldb) process launch
Process 53418 launched: 'a.out' (x86_64)
Process 53418 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
    frame #0: 0x0000000100003f20 a.out`test
a.out`test:
->  0x100003f20 <+0>: pushq  %rbp
    0x100003f21 <+1>: movq   %rsp, %rbp
    0x100003f24 <+4>: movq   %rdi, -0x8(%rbp)
    0x100003f28 <+8>: movq   %rsi, -0x10(%rbp)
Target 0: (a.out) stopped.
(lldb) stepi
Process 53418 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f21 a.out`test + 1
a.out`test:
->  0x100003f21 <+1>:  movq   %rsp, %rbp
    0x100003f24 <+4>:  movq   %rdi, -0x8(%rbp)
    0x100003f28 <+8>:  movq   %rsi, -0x10(%rbp)
    0x100003f2c <+12>: movq   (%rdi), %rax
Target 0: (a.out) stopped.
(lldb) stepi
Process 53418 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f24 a.out`test + 4
a.out`test:
->  0x100003f24 <+4>:  movq   %rdi, -0x8(%rbp)
    0x100003f28 <+8>:  movq   %rsi, -0x10(%rbp)
    0x100003f2c <+12>: movq   (%rdi), %rax
    0x100003f2f <+15>: popq   %rbp
Target 0: (a.out) stopped.
(lldb) stepi
Process 53418 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f28 a.out`test + 8
a.out`test:
->  0x100003f28 <+8>:  movq   %rsi, -0x10(%rbp)
    0x100003f2c <+12>: movq   (%rdi), %rax
    0x100003f2f <+15>: popq   %rbp
    0x100003f30 <+16>: retq
Target 0: (a.out) stopped.
(lldb) stepi
Process 53418 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f2c a.out`test + 12
a.out`test:
->  0x100003f2c <+12>: movq   (%rdi), %rax
    0x100003f2f <+15>: popq   %rbp
    0x100003f30 <+16>: retq
    0x100003f31 <+17>: nopw   %cs:(%rax,%rax)
Target 0: (a.out) stopped.
(lldb) stepi
Process 53418 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f2f a.out`test + 15
a.out`test:
->  0x100003f2f <+15>: popq   %rbp
    0x100003f30 <+16>: retq
    0x100003f31 <+17>: nopw   %cs:(%rax,%rax)
    0x100003f3b <+27>: nopl   (%rax,%rax)
Target 0: (a.out) stopped.
(lldb) p (void*) $rax
(void *) $0 = 0x0807060504030201
(lldb) p (void*) $ah
(void *) $1 = 0x0000000000000002
(lldb) p (void*) $al
(void *) $2 = 0x0000000000000001
(lldb) p (void*) $eax
(void *) $3 = 0x0000000004030201
(lldb) p (void*) $rbx
(void *) $4 = 0x00000001000c0060

作業のログをもとにmovq (%rdi),%raxを実行した時点の各レジスタの値を表にした。

レジスタ	格納されている値
%rax	0x0807060504030201
%ah	0x0000000000000002
%al	0x0000000000000001
%eax	0x0000000004030201
%rbx	0x00000001000c0060

test1.s を書き換えて、さまざまなアセンブラ命令を試す(2)

$ cp test1.s test3.sでtest3.sの中身をコピーしたtest3.sを作成した

以下のように編集した

test3.s

 .section    __TEXT,__text,regular,pure_instructions
    .build_version macos, 12, 0 sdk_version 12, 0
    .globl  _test                           ## -- Begin function test
    .p2align    4, 0x90
_test:                                  ## @test
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    (%rdi), %rax
    movq    2(%rdi), %rax
    movq    2(%rdi), %rbx
    movq   %rdi, %rbx
    addq    $0x4, %rbx
    movl    (%rbx), %eax
    incq    %rbx
    movq    (%rbx),%rax
    movq    $0x4, %rax
    movl    4(%rbx,%rax),%ecx
    leaq    4(%rbx,%rax),%rcx
    movb    2(%rbx),%al
    movsbl  2(%rbx),%eax
    movsbl  3(%rbx),%eax
    movsbq  3(%rbx),%rax
    leaq    8(%rbx,%rax),%rcx
    leaq    8(%rbx,%rax,2),%rcx
    ret
    .cfi_endproc
                                        ## -- End function
    .globl  _main                           ## -- Begin function main
    .p2align    4, 0x90
_main:                                  ## @main
    .cfi_startproc
## %bb.0:
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register %rbp
    subq    $16, %rsp
    movl    $0, -4(%rbp)
    movq    $0, -16(%rbp)
    movq    -16(%rbp), %rsi
    leaq    _a(%rip), %rdi
    callq   _test
    movq    %rax, -16(%rbp)
    movq    -16(%rbp), %rsi
    leaq    L_.str(%rip), %rdi
    movb    $0, %al
    callq   _printf
    xorl    %eax, %eax
    addq    $16, %rsp
    popq    %rbp
    retq
    .cfi_endproc
                                        ## -- End function
    .section    __DATA,__data
    .globl  _a                              ## @a
_a:
    .ascii  "\001\002\003\004\005\006\007\bU\022"

    .section    __TEXT,__cstring,cstring_literals
L_.str:                                 ## @.str
    .asciz  "x = %lx\n"

.subsections_via_symbols

test1.sとtest3.sの差分、上がtest1.sで下がtest3.s

11,17c11,28
<    movq    %rsp, %rbp
<    .cfi_def_cfa_register %rbp
<    movq    %rdi, -8(%rbp)
<    movq    %rsi, -16(%rbp)
<    movq    -16(%rbp), %rax
<    popq    %rbp
<    retq
---
>    movq    (%rdi), %rax
>    movq    2(%rdi), %rax
>    movq    2(%rdi), %rbx
>    movq   %rdi, %rbx
>    addq    $0x4, %rbx
>    movl    (%rbx), %eax
>    incq    %rbx
>    movq    (%rbx),%rax
>    movq    $0x4, %rax
>    movl    4(%rbx,%rax),%ecx
>    leaq    4(%rbx,%rax),%rcx
>    movb    2(%rbx),%al
>    movsbl  2(%rbx),%eax
>    movsbl  3(%rbx),%eax
>    movsbq  3(%rbx),%rax
>    leaq    8(%rbx,%rax),%rcx
>    leaq    8(%rbx,%rax,2),%rcx
>    ret

$ mv a.out a.2.outでa.outのtest2.sバージョンのバックアップを取った

$ clang test3.sでa.outを生成した

lldbで以下のような作業をした

$ lldb a.out
(lldb) target create "a.out"
Current executable set to 'a.out' (x86_64).
(lldb) b test
Breakpoint 1: where = a.out`test, address = 0x0000000100003ef0
(lldb) process launch
Process 53662 launched: 'a.out' (x86_64)
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
    frame #0: 0x0000000100003ef0 a.out`test
a.out`test:
->  0x100003ef0 <+0>: pushq  %rbp
    0x100003ef1 <+1>: movq   (%rdi), %rax
    0x100003ef4 <+4>: movq   0x2(%rdi), %rax
    0x100003ef8 <+8>: movq   0x2(%rdi), %rbx
Target 0: (a.out) stopped.
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003ef1 a.out`test + 1
a.out`test:
->  0x100003ef1 <+1>:  movq   (%rdi), %rax
    0x100003ef4 <+4>:  movq   0x2(%rdi), %rax
    0x100003ef8 <+8>:  movq   0x2(%rdi), %rbx
    0x100003efc <+12>: movq   %rdi, %rbx
Target 0: (a.out) stopped.
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003ef4 a.out`test + 4
a.out`test:
->  0x100003ef4 <+4>:  movq   0x2(%rdi), %rax
    0x100003ef8 <+8>:  movq   0x2(%rdi), %rbx
    0x100003efc <+12>: movq   %rdi, %rbx
    0x100003eff <+15>: addq   $0x4, %rbx
Target 0: (a.out) stopped.
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003ef8 a.out`test + 8
a.out`test:
->  0x100003ef8 <+8>:  movq   0x2(%rdi), %rbx
    0x100003efc <+12>: movq   %rdi, %rbx
    0x100003eff <+15>: addq   $0x4, %rbx
    0x100003f03 <+19>: movl   (%rbx), %eax
Target 0: (a.out) stopped.
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003efc a.out`test + 12
a.out`test:
->  0x100003efc <+12>: movq   %rdi, %rbx
    0x100003eff <+15>: addq   $0x4, %rbx
    0x100003f03 <+19>: movl   (%rbx), %eax
    0x100003f05 <+21>: incq   %rbx
Target 0: (a.out) stopped.
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003eff a.out`test + 15
a.out`test:
->  0x100003eff <+15>: addq   $0x4, %rbx
    0x100003f03 <+19>: movl   (%rbx), %eax
    0x100003f05 <+21>: incq   %rbx
    0x100003f08 <+24>: movq   (%rbx), %rax
Target 0: (a.out) stopped.
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f03 a.out`test + 19
a.out`test:
->  0x100003f03 <+19>: movl   (%rbx), %eax
    0x100003f05 <+21>: incq   %rbx
    0x100003f08 <+24>: movq   (%rbx), %rax
    0x100003f0b <+27>: movq   $0x4, %rax
Target 0: (a.out) stopped.
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f05 a.out`test + 21
a.out`test:
->  0x100003f05 <+21>: incq   %rbx
    0x100003f08 <+24>: movq   (%rbx), %rax
    0x100003f0b <+27>: movq   $0x4, %rax
    0x100003f12 <+34>: movl   0x4(%rbx,%rax), %ecx
Target 0: (a.out) stopped.
(lldb) p (void*) $eax
(void *) $0 = 0x0000000008070605
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f08 a.out`test + 24
a.out`test:
->  0x100003f08 <+24>: movq   (%rbx), %rax
    0x100003f0b <+27>: movq   $0x4, %rax
    0x100003f12 <+34>: movl   0x4(%rbx,%rax), %ecx
    0x100003f16 <+38>: leaq   0x4(%rbx,%rax), %rcx
Target 0: (a.out) stopped.
(lldb) p (void*) $rbx
(void *) $0 = 0x0000000100008015
(lldb) p (void*) $rax
(void *) $1 = 0x0000000008070605
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f0b a.out`test + 27
a.out`test:
->  0x100003f0b <+27>: movq   $0x4, %rax
    0x100003f12 <+34>: movl   0x4(%rbx,%rax), %ecx
    0x100003f16 <+38>: leaq   0x4(%rbx,%rax), %rcx
    0x100003f1b <+43>: movb   0x2(%rbx), %al
Target 0: (a.out) stopped.
(lldb) p (void*) $rax
(void *) $1 = 0x0000001255080706
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f12 a.out`test + 34
a.out`test:
->  0x100003f12 <+34>: movl   0x4(%rbx,%rax), %ecx
    0x100003f16 <+38>: leaq   0x4(%rbx,%rax), %rcx
    0x100003f1b <+43>: movb   0x2(%rbx), %al
    0x100003f1e <+46>: movsbl 0x2(%rbx), %eax
Target 0: (a.out) stopped.
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f16 a.out`test + 38
a.out`test:
->  0x100003f16 <+38>: leaq   0x4(%rbx,%rax), %rcx
    0x100003f1b <+43>: movb   0x2(%rbx), %al
    0x100003f1e <+46>: movsbl 0x2(%rbx), %eax
    0x100003f22 <+50>: movsbl 0x3(%rbx), %eax
Target 0: (a.out) stopped.
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f1b a.out`test + 43
a.out`test:
->  0x100003f1b <+43>: movb   0x2(%rbx), %al
    0x100003f1e <+46>: movsbl 0x2(%rbx), %eax
    0x100003f22 <+50>: movsbl 0x3(%rbx), %eax
    0x100003f26 <+54>: movsbq 0x3(%rbx), %rax
Target 0: (a.out) stopped.
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f1e a.out`test + 46
a.out`test:
->  0x100003f1e <+46>: movsbl 0x2(%rbx), %eax
    0x100003f22 <+50>: movsbl 0x3(%rbx), %eax
    0x100003f26 <+54>: movsbq 0x3(%rbx), %rax
    0x100003f2b <+59>: leaq   0x8(%rbx,%rax), %rcx
Target 0: (a.out) stopped.
(lldb) p (void*) $eax
(void *) $2 = 0x0000000000000008
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f22 a.out`test + 50
a.out`test:
->  0x100003f22 <+50>: movsbl 0x3(%rbx), %eax
    0x100003f26 <+54>: movsbq 0x3(%rbx), %rax
    0x100003f2b <+59>: leaq   0x8(%rbx,%rax), %rcx
    0x100003f30 <+64>: leaq   0x8(%rbx,%rax,2), %rcx
Target 0: (a.out) stopped.
(lldb) p (void*) $eax
(void *) $3 = 0x0000000000000008
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f26 a.out`test + 54
a.out`test:
->  0x100003f26 <+54>: movsbq 0x3(%rbx), %rax
    0x100003f2b <+59>: leaq   0x8(%rbx,%rax), %rcx
    0x100003f30 <+64>: leaq   0x8(%rbx,%rax,2), %rcx
    0x100003f35 <+69>: retq
Target 0: (a.out) stopped.
(lldb) p (void*) $eax
(void *) $4 = 0x0000000000000055
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f2b a.out`test + 59
a.out`test:
->  0x100003f2b <+59>: leaq   0x8(%rbx,%rax), %rcx
    0x100003f30 <+64>: leaq   0x8(%rbx,%rax,2), %rcx
    0x100003f35 <+69>: retq
    0x100003f36 <+70>: nopw   %cs:(%rax,%rax)
Target 0: (a.out) stopped.
(lldb) p (void*) $rax
(void *) $5 = 0x0000000000000055
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f30 a.out`test + 64
a.out`test:
->  0x100003f30 <+64>: leaq   0x8(%rbx,%rax,2), %rcx
    0x100003f35 <+69>: retq
    0x100003f36 <+70>: nopw   %cs:(%rax,%rax)

a.out`main:
    0x100003f40 <+0>:  pushq  %rbp
Target 0: (a.out) stopped.
(lldb) p (void*) $rcx
(void *) $6 = 0x0000000100008072
(lldb) stepi
Process 53662 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = instruction step into
    frame #0: 0x0000000100003f35 a.out`test + 69
a.out`test:
->  0x100003f35 <+69>: retq
    0x100003f36 <+70>: nopw   %cs:(%rax,%rax)

a.out`main:
    0x100003f40 <+0>:  pushq  %rbp
    0x100003f41 <+1>:  movq   %rsp, %rbp
Target 0: (a.out) stopped.
(lldb) p (void*) $rcx
(void *) $7 = 0x00000001000080c7

作業ログをもとに課題ページで問われている質問とその解答の表を導出した

質問	解答
movl (%rbx),%eaxを実行した時の%eaxの値は?	0x0000000008070605
incl %rbxを実行してmovq (%rbx),%raxを実行した時の%raxの値は?	0x0000001255080706
movl 4(%rbx,%rax),%ecxを実行した時の%ecxレジスタの値がロードするメモリのアドレスは?	0x0000000108078624
movb 2(%rbx),%alを実行した時の%eaxの値は?	0x0000000000000008
movsbl 2(%rbx),%eaxを実行した時の%eaxの値は?	0x0000000000000008
movsbl 3(%rbx),%eaxを実行した時の%eaxの値は?	0x0000000000000055
movsbq 3(%rbx),%raxを実行した時の%raxの値は?	0x0000000000000055
leaq 8(%rbx,%eax),%rcxを実行した時の%rcxの値は?	0x0000000100008072
leaq 8(%rbx,%eax,2),%rcxを実行した時の%rcxの値は?	0x00000001000080c7

4.1

この問題を解く

以下の式を手で木に変換して見よ。さらに、これをスタックを使って計算するintel64の命令に落として見よ。examples のコンパイラを使った結果と比較してみよ。また、gdb で実際にどのような動作をするかを調べてみよ。
3-(4-2)
0+(1+(3-2))-(0+(1+(2-3)))

問題11.1

この問題を解く

記号処理とは文字などで表されたデータを処理することである。これに対して、音声解析や画像処理、ロボットの動作などは、非記号処理と言える。非記号処理におけるコンパイラの役割について、1000-2000字程度で考察せよ。

サノメモ

コンパイラ構成論

最終的な成績

課題に関するファイル

課題の提出方法に関する注意点

1.1 LLVM

(1) cpp

(2) アセンブラ

(3) LLVM byte code

(4) a.out

ARMアセンブラ

2.1

compileしたassemblerを、i386, emt64 のCPUで表示

gdb で i にどのような値が入るかを確認

CPUは、Little-Endian か Big-Endian か

trace の結果

Endian の変換

UNIXのbuiltinのEndianの変換関数の場所と実装

3.1 intel64 (emt64) のアセンブラ

test1.sと a.outを用意する

CPU の endian を x/20x と x/20b を使って確認する

test1.s を書き換えて、さまざまなアセンブラ命令を試す

test1.s を書き換えて、さまざまなアセンブラ命令を試す(2)

4.1

問題11.1