src/libc.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143

#include "global.h"
#include <stddef.h>

#define LBLOCKSIZE (sizeof(long))

// Nonzero if (long)X contains a NULL byte.
#define CONTAINSNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)

// Nonzero if X is not aligned on a "long" boundary.
#define UNALIGNED(X) ((long)X & (LBLOCKSIZE - 1))

void *memcpy(void *dst0, const void *src0, size_t len0)
{
    char *dst = dst0;
    const char *src = src0;
    long *aligned_dst;
    const long *aligned_src;
    unsigned int len = len0;

    // If the size is small, or either src or dst is unaligned,
    // then go to the byte copy loop. This should be rare.
    if (len >= 16 && !(UNALIGNED(src) | UNALIGNED(dst)))
    {
        aligned_dst = (long *)dst;
        aligned_src = (long *)src;

        // Copy 4X long words at a time if possible.
        while (len >= 16)
        {
            *aligned_dst++ = *aligned_src++;
            *aligned_dst++ = *aligned_src++;
            *aligned_dst++ = *aligned_src++;
            *aligned_dst++ = *aligned_src++;
            len -= 16;
        }

        // Copy one long word at a time if possible
        while (len >= 4)
        {
            *aligned_dst++ = *aligned_src++;
            len -= 4;
        }

        dst = (char *)aligned_dst;
        src = (char *)aligned_src;
    }

    // Pick up any remaining bytes with a byte copier.
    while (len--)
        *dst++ = *src++;

    return dst0;
}

void *memset(void *m, int c, size_t n)
{
    char *s = (char *)m;
    int count, i;
    unsigned long buffer;
    unsigned long *aligned_addr;
    unsigned char *unaligned_addr;

    // If the size is small or m is unaligned,
    // then go to the byte copy loop. This should be rare.
    if (n >= LBLOCKSIZE && !UNALIGNED(m))
    {
        // We know that n is large and m is word-aligned.
        aligned_addr = (unsigned long *)m;

        // Store C into each char sized location in buffer so that
        // we can set large blocks quickly.
        c &= 0xFF;
        if (LBLOCKSIZE == 4)
        {
            buffer = (c << 8) | c;
            buffer |= (buffer << 16);
        }
        else
        {
            buffer = 0;
            for (i = 0; i < LBLOCKSIZE; i++)
                buffer = (buffer << 8) | c;
        }

        while (n >= LBLOCKSIZE * 4)
        {
            *aligned_addr++ = buffer;
            *aligned_addr++ = buffer;
            *aligned_addr++ = buffer;
            *aligned_addr++ = buffer;
            n -= LBLOCKSIZE * 4;
        }
        while (n >= LBLOCKSIZE)
        {
            *aligned_addr++ = buffer;
            n -= LBLOCKSIZE;
        }

        s = (char *)aligned_addr;
    }

    // Pick up the remainder with a bytewise loop.
    while (n--)
        *s++ = (char)c;

    return m;
}

int strcmp(const char *s1, const char *s2)
{
    unsigned long *a1;
    unsigned long *a2;

    // If s1 or s2 are unaligned, then skip this and compare bytes.
    if (!(UNALIGNED(s1) | UNALIGNED(s2)))
    {
        // Compare them a word at a time.
        a1 = (unsigned long *)s1;
        a2 = (unsigned long *)s2;
        while (*a1 == *a2)
        {
            // If *a1 == *a2, and we find a null in *a1,
            // then the strings must be equal, so return zero.
            if (CONTAINSNULL(*a1))
                return 0;

            a1++;
            a2++;
        }

        s1 = (char *)a1;
        s2 = (char *)a2;
    }

    // Check the remaining few bytes.
    while (*s1 != '\0' && *s1 == *s2)
    {
        s1++;
        s2++;
    }

    return (*(unsigned char *) s1) - (*(unsigned char *) s2);
}