-
Notifications
You must be signed in to change notification settings - Fork 0
/
lz77.c
118 lines (95 loc) · 3.51 KB
/
lz77.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <glib.h> // Dynamic arrays
#define L_LOOKAHEAD 3
#define L_SEARCH 5
const char* const original = "in ulm und um ulm wachsen ulmen."; // Input string
char *search = NULL, *lookahead = NULL; // Window pointers
char *m_search = NULL, *m_lookahead = NULL; // Match pointers
short s_search = 1; // Search window size
typedef struct {
short start;
short length;
char first_mismatch;
} reference;
int main() {
// Pointer to input
char *input = original;
// Initialize dynamic symbol array
GArray *symbols = g_array_new(false, false, sizeof(reference));
printf("A,L,Z\n");
printf("-----\n");
while (*(input + s_search) != '\0') {
// Set window pointers
search = input;
lookahead = input + s_search;
// Print status info
printf("Search: [");
for (int i = 0; i < s_search; i++) {
printf("%c", *(search + i));
}
printf("], lookahead: [");
for (int i = 0; i < L_LOOKAHEAD; i++) {
if (*(lookahead + i) == '\0') break;
printf("%c", *(lookahead + i));
}
printf("]\n");
// Initialize variables for substring matching
char *match = NULL;
reference matchref = {0, 0, *lookahead};
// Find longest match in search window
for (int i = 0; i < s_search; i++) {
// If the first char matches, see how long the match is.
if (*(search + i) == *lookahead) {
// Store reference to current item in search part
match = search + i;
// Move pointers ahead in both windows until no more matches occur
do {
m_search = match;
m_lookahead = lookahead;
if (*m_search != *m_lookahead) {
break;
}
while (*(++m_search) == *(++m_lookahead) && *m_lookahead != '\0') {
// Advance pointers until no more matches occur
}
if (m_search - match >= matchref.length) {
matchref.start = lookahead - match;
matchref.length = m_search - match;
matchref.first_mismatch = *m_lookahead;
}
} while (++match != lookahead);
}
}
printf("%i,%i,%c\n", matchref.start, matchref.length, matchref.first_mismatch);
g_array_append_val(symbols, matchref);
if (matchref.length > 0) {
// output
if (s_search + matchref.length + 1 <= L_SEARCH) {
s_search += matchref.length + 1;
} else {
input += matchref.length + 1;
}
} else {
// output
if (s_search < L_SEARCH) {
s_search++;
} else {
input++;
}
}
}
// Print normal and encoded version
printf("\nOriginal version: \"%s\"\n", original);
printf("Original length: %zu\n", strlen(original));
printf("Compressed version: \"");
reference *symbol = NULL;
for (int i = 0; i < symbols->len; i++) {
symbol = &g_array_index(symbols, reference, i);
printf("%i%i%c", symbol->start, symbol->length, symbol->first_mismatch);
}
printf("\"\n");
printf("Compressed length: %i", symbols->len * 3);
printf(" (%f%%)\n", (float)(symbols->len * 3) / strlen(original) * 100);
}