0x2B | ~0x2B - Hamlet

111,111,111 x 111,111,111 = 12,345,678,987,654,321 - Useless Fact

the brown-dragon blog

Pointers vs Arrays

2009-08-30

One small point that often gets overlooked in writing C/C++ programs is the difference between pointers and arrays.

Syntactically pointers and arrays are very similar and, in the (very common) case of parameters to function they are actually defined to be identical in the C99 standard.

So it is not surprising that we don't often consider the difference between pointers and arrays. However, in the spirit of unnecessary (and probably useless optimization), keeping the difference in mind is a good thing.

For example,:

    int main (int argc, char **argv)
and
    int main (int argc, char *argv[])

are identical but

pva-arr.c

char foo ()
{
        char  arr[] = "hello world!"; /* Using ARRAY */
    return ptr [7];
}

and

pva-ptr.c

char foo ()
{
        char *ptr   = "hello world!"; /* Using POINTER */
    return ptr [7];
}

are not!

We can easily see this from the disassembly - both GNU's gcc and Microsoft's cl generate different code for them like so:

pva-gcc.out

;(pva-arr gcc 3.4.4 output)            |;(pva-ptr gcc 3.4.4 output)
                                       +
    .file   "dpta.c"                   |    .file   "dpta.c"
    .section .rdata,"dr"               |    .section .rdata,"dr"
LC0:                                   |LC0:
    .ascii "hello world!\0"            |    .ascii "hello world!\0"
    .text                              |    .text
.globl _foo                            |.globl _foo
    .def _foo; .scl 2; .type 32; .endef|    .def _foo; .scl 2; .type 32; .endef
_foo:                                  |_foo:
    pushl   %ebp                       |    pushl   %ebp
    movl    %esp, %ebp                 |    movl    %esp, %ebp
    subl    $40, %esp                  |    subl    $40, %esp
    movl    $LC0, -12(%ebp)            |    movl    $LC0, -12(%ebp)
    movl    LC0, %eax                  |    movl    LC0, %eax
    movl    %eax, -40(%ebp)            |    movl    %eax, -40(%ebp)
    movl    LC0+4, %eax                |    movl    LC0+4, %eax
    movl    %eax, -36(%ebp)            |    movl    %eax, -36(%ebp)
    movl    LC0+8, %eax                |    movl    LC0+8, %eax
    movl    %eax, -32(%ebp)            |    movl    %eax, -32(%ebp)
    movzbl  LC0+12, %eax               |    movzbl  LC0+12, %eax
    movb    %al, -28(%ebp)             |    movb    %al, -28(%ebp)
                                       |
                                       |
                                       |    movl    -12(%ebp), %eax
;EXTRA INSTRUCTIONS =============>>    |    addl    $7, %eax
                                       |
                                       |
    movsbl  -33(%ebp),%eax             |    movsbl  (%eax),%eax
    leave                              |    leave
    ret                                |    ret

pva-msvc.out

;(pva-arr.c cl 13.10.3077 output)         |;(pva-ptr.c 13.10.3077 output)
                                          +
    TITLE   dpta.c                        |    TITLE   dpta.c
    .386P                                 |    .386P
include listing.inc                       |include listing.inc
if @Version gt 510                        |if @Version gt 510
.model FLAT                               |.model FLAT
else                                      |else
_TEXT   SEGMENT PARA USE32 PUBLIC 'CODE'  |_TEXT   SEGMENT PARA USE32 PUBLIC 'CODE'
_TEXT   ENDS                              |_TEXT   ENDS
_DATA   SEGMENT DWORD USE32 PUBLIC 'DATA' |_DATA   SEGMENT DWORD USE32 PUBLIC 'DATA'
_DATA   ENDS                              |_DATA   ENDS
CONST   SEGMENT DWORD USE32 PUBLIC 'CONST'|CONST   SEGMENT DWORD USE32 PUBLIC 'CONST'
CONST   ENDS                              |CONST   ENDS
_BSS    SEGMENT DWORD USE32 PUBLIC 'BSS'  |_BSS    SEGMENT DWORD USE32 PUBLIC 'BSS'
_BSS    ENDS                              |_BSS    ENDS
$$SYMBOLS   SEGMENT BYTE USE32 'DEBSYM'   |$$SYMBOLS   SEGMENT BYTE USE32 'DEBSYM'
$$SYMBOLS   ENDS                          |$$SYMBOLS   ENDS
_TLS    SEGMENT DWORD USE32 PUBLIC 'TLS'  |_TLS    SEGMENT DWORD USE32 PUBLIC 'TLS'
_TLS    ENDS                              |_TLS    ENDS
FLAT    GROUP _DATA, CONST, _BSS          |FLAT    GROUP _DATA, CONST, _BSS
    ASSUME  CS: FLAT, DS: FLAT, SS: FLAT  |    ASSUME  CS: FLAT, DS: FLAT, SS: FLAT
endif                                     |endif
                                          |
INCLUDELIB LIBC                           |INCLUDELIB LIBC
INCLUDELIB OLDNAMES                       |INCLUDELIB OLDNAMES
                                          |
_DATA   SEGMENT                           |_DATA   SEGMENT
$SG471  DB  'hello world!', 00H           |$SG471  DB  'hello world!', 00H
    ORG $+3                               |    ORG $+3
$SG473  DB  'hello world!', 00H           |$SG473  DB  'hello world!', 00H
_DATA   ENDS                              |_DATA   ENDS
PUBLIC  _foo                              |PUBLIC  _foo
_TEXT   SEGMENT                           |_TEXT   SEGMENT
_ptr$ = -20                               |_ptr$ = -20
_arr$ = -16                               |_arr$ = -16
_foo    PROC NEAR                         |_foo    PROC NEAR
    push    ebp                           |    push    ebp
    mov ebp, esp                          |    mov ebp, esp
    sub esp, 20                           |    sub esp, 20
    mov DWORD PTR _ptr$[ebp], OFFSET FLAT>|    mov DWORD PTR _ptr$[ebp], OFFSET FLAT:$SG471
    mov eax, DWORD PTR $SG473             |    mov eax, DWORD PTR $SG473
    mov DWORD PTR _arr$[ebp], eax         |    mov DWORD PTR _arr$[ebp], eax
    mov ecx, DWORD PTR $SG473+4           |    mov ecx, DWORD PTR $SG473+4
    mov DWORD PTR _arr$[ebp+4], ecx       |    mov DWORD PTR _arr$[ebp+4], ecx
    mov edx, DWORD PTR $SG473+8           |    mov edx, DWORD PTR $SG473+8
    mov DWORD PTR _arr$[ebp+8], edx       |    mov DWORD PTR _arr$[ebp+8], edx
    mov al, BYTE PTR $SG473+12            |    mov al, BYTE PTR $SG473+12
    mov BYTE PTR _arr$[ebp+12], al        |    mov BYTE PTR _arr$[ebp+12], al
                                          |
                                          |
; EXTRA INSTRUCTION  ============>>       |    mov ecx, DWORD PTR _ptr$[ebp]
                                          |
                                          |
    mov al, BYTE PTR _arr$[ebp+7]         |    mov al, BYTE PTR [ecx+7]
    mov esp, ebp                          |    mov esp, ebp
    pop ebp                               |    pop ebp
    ret 0                                 |    ret 0
_foo    ENDP                              |_foo    ENDP
_TEXT   ENDS                              |_TEXT   ENDS
END                                       |END

As expected, the array version is marginally more efficient than the pointer version. Another minor point to keep in the back of your mind.

Other Posts

(ordered by Tags then Date)