#include <sys/queue.h>
#include <ctype.h>
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include <unistd.h>
#include <assert.h>
#include <arraylist.h>
#include <diff_main.h>
#include "diff_internal.h"
#include "diff_debug.h"
inline enum diff_chunk_type
diff_chunk_type(const struct diff_chunk *chunk)
{
if (!chunk->left_count && !chunk->right_count)
return CHUNK_EMPTY;
if (!chunk->solved)
return CHUNK_ERROR;
if (!chunk->right_count)
return CHUNK_MINUS;
if (!chunk->left_count)
return CHUNK_PLUS;
if (chunk->left_count != chunk->right_count)
return CHUNK_ERROR;
return CHUNK_SAME;
}
static int
read_at(FILE *f, off_t at_pos, unsigned char *buf, size_t len)
{
ssize_t r;
if (fseeko(f, at_pos, SEEK_SET) == -1)
return errno;
r = fread(buf, sizeof(char), len, f);
if ((r == 0 || r < len) && ferror(f))
return EIO;
if (r != len)
return EIO;
return 0;
}
static int
buf_cmp(const unsigned char *left, size_t left_len,
const unsigned char *right, size_t right_len,
bool ignore_whitespace)
{
int cmp;
if (ignore_whitespace) {
int il = 0, ir = 0;
while (il < left_len && ir < right_len) {
unsigned char cl = left[il];
unsigned char cr = right[ir];
if (isspace((unsigned char)cl) && il < left_len) {
il++;
continue;
}
if (isspace((unsigned char)cr) && ir < right_len) {
ir++;
continue;
}
if (cl > cr)
return 1;
if (cr > cl)
return -1;
il++;
ir++;
}
while (il < left_len) {
unsigned char cl = left[il++];
if (!isspace((unsigned char)cl))
return 1;
}
while (ir < right_len) {
unsigned char cr = right[ir++];
if (!isspace((unsigned char)cr))
return -1;
}
return 0;
}
cmp = memcmp(left, right, MIN(left_len, right_len));
if (cmp)
return cmp;
if (left_len == right_len)
return 0;
return (left_len > right_len) ? 1 : -1;
}
int
diff_atom_cmp(int *cmp,
const struct diff_atom *left,
const struct diff_atom *right)
{
off_t remain_left, remain_right;
int flags = (left->root->diff_flags | right->root->diff_flags);
bool ignore_whitespace = (flags & DIFF_FLAG_IGNORE_WHITESPACE);
if (!left->len && !right->len) {
*cmp = 0;
return 0;
}
if (!ignore_whitespace) {
if (!right->len) {
*cmp = 1;
return 0;
}
if (!left->len) {
*cmp = -1;
return 0;
}
}
if (left->at != NULL && right->at != NULL) {
*cmp = buf_cmp(left->at, left->len, right->at, right->len,
ignore_whitespace);
return 0;
}
remain_left = left->len;
remain_right = right->len;
while (remain_left > 0 || remain_right > 0) {
const size_t chunksz = 8192;
unsigned char buf_left[chunksz], buf_right[chunksz];
const uint8_t *p_left, *p_right;
off_t n_left, n_right;
int r;
if (!remain_right) {
*cmp = 1;
return 0;
}
if (!remain_left) {
*cmp = -1;
return 0;
}
n_left = MIN(chunksz, remain_left);
n_right = MIN(chunksz, remain_right);
if (left->at == NULL) {
r = read_at(left->root->f,
left->pos + (left->len - remain_left),
buf_left, n_left);
if (r) {
*cmp = 0;
return r;
}
p_left = buf_left;
} else {
p_left = left->at + (left->len - remain_left);
}
if (right->at == NULL) {
r = read_at(right->root->f,
right->pos + (right->len - remain_right),
buf_right, n_right);
if (r) {
*cmp = 0;
return r;
}
p_right = buf_right;
} else {
p_right = right->at + (right->len - remain_right);
}
r = buf_cmp(p_left, n_left, p_right, n_right,
ignore_whitespace);
if (r) {
*cmp = r;
return 0;
}
remain_left -= n_left;
remain_right -= n_right;
}
*cmp = 0;
return 0;
}
int
diff_atom_same(bool *same,
const struct diff_atom *left,
const struct diff_atom *right)
{
int cmp;
int r;
if (left->hash != right->hash) {
*same = false;
return 0;
}
r = diff_atom_cmp(&cmp, left, right);
if (r) {
*same = true;
return r;
}
*same = (cmp == 0);
return 0;
}
static struct diff_chunk *
diff_state_add_solved_chunk(struct diff_state *state,
const struct diff_chunk *chunk)
{
diff_chunk_arraylist_t *result;
struct diff_chunk *new_chunk;
enum diff_chunk_type last_t;
enum diff_chunk_type new_t;
struct diff_chunk *last;
result = &state->result->chunks;
last_t = result->len ? diff_chunk_type(&result->head[result->len - 1])
: CHUNK_EMPTY;
new_t = diff_chunk_type(chunk);
debug("ADD %s chunk #%u:\n", chunk->solved ? "solved" : "UNSOLVED",
result->len);
debug("L\n");
debug_dump_atoms(&state->left, chunk->left_start, chunk->left_count);
debug("R\n");
debug_dump_atoms(&state->right, chunk->right_start, chunk->right_count);
if (result->len) {
last = &result->head[result->len - 1];
assert(chunk->left_start
== last->left_start + last->left_count);
assert(chunk->right_start
== last->right_start + last->right_count);
}
if (new_t == last_t) {
new_chunk = &result->head[result->len - 1];
new_chunk->left_count += chunk->left_count;
new_chunk->right_count += chunk->right_count;
debug(" - added chunk touches previous one of same type, joined:\n");
debug("L\n");
debug_dump_atoms(&state->left, new_chunk->left_start, new_chunk->left_count);
debug("R\n");
debug_dump_atoms(&state->right, new_chunk->right_start, new_chunk->right_count);
} else if (last_t == CHUNK_PLUS && new_t == CHUNK_MINUS) {
enum diff_chunk_type prev_last_t =
result->len > 1 ?
diff_chunk_type(&result->head[result->len - 2])
: CHUNK_EMPTY;
if (prev_last_t == CHUNK_MINUS) {
new_chunk = &result->head[result->len - 2];
new_chunk->left_count += chunk->left_count;
new_chunk->right_count += chunk->right_count;
debug(" - added minus-chunk follows plus-chunk,"
" put before that plus-chunk and joined"
" with preceding minus-chunk:\n");
debug("L\n");
debug_dump_atoms(&state->left, new_chunk->left_start, new_chunk->left_count);
debug("R\n");
debug_dump_atoms(&state->right, new_chunk->right_start, new_chunk->right_count);
} else {
ARRAYLIST_INSERT(new_chunk, *result, result->len - 1);
if (!new_chunk)
return NULL;
*new_chunk = *chunk;
last = &result->head[result->len - 1];
new_chunk->right_start = last->right_start;
debug(" - added minus-chunk follows plus-chunk,"
" put before that plus-chunk\n");
}
last = &result->head[result->len - 1];
last->left_start = new_chunk->left_start
+ new_chunk->left_count;
} else {
ARRAYLIST_ADD(new_chunk, *result);
if (!new_chunk)
return NULL;
*new_chunk = *chunk;
}
return new_chunk;
}
struct diff_chunk *
diff_state_add_chunk(struct diff_state *state, bool solved,
struct diff_atom *left_start, unsigned int left_count,
struct diff_atom *right_start, unsigned int right_count)
{
struct diff_chunk *new_chunk;
struct diff_chunk chunk = {
.solved = solved,
.left_start = left_start,
.left_count = left_count,
.right_start = right_start,
.right_count = right_count,
};
if (!solved || state->temp_result.len) {
debug("ADD %s chunk to temp result:\n",
chunk.solved ? "solved" : "UNSOLVED");
debug("L\n");
debug_dump_atoms(&state->left, left_start, left_count);
debug("R\n");
debug_dump_atoms(&state->right, right_start, right_count);
ARRAYLIST_ADD(new_chunk, state->temp_result);
if (!new_chunk)
return NULL;
*new_chunk = chunk;
return new_chunk;
}
return diff_state_add_solved_chunk(state, &chunk);
}
static void
diff_data_init_root(struct diff_data *d, FILE *f, const uint8_t *data,
unsigned long long len, int diff_flags)
{
*d = (struct diff_data){
.f = f,
.pos = 0,
.data = data,
.len = len,
.root = d,
.diff_flags = diff_flags,
};
}
void
diff_data_init_subsection(struct diff_data *d, struct diff_data *parent,
struct diff_atom *from_atom, unsigned int atoms_count)
{
struct diff_atom *last_atom;
debug("diff_data %p parent %p from_atom %p atoms_count %u\n",
d, parent, from_atom, atoms_count);
debug(" from_atom ");
debug_dump_atom(parent, NULL, from_atom);
if (atoms_count == 0) {
*d = (struct diff_data){
.f = NULL,
.pos = 0,
.data = NULL,
.len = 0,
.root = parent->root,
.atoms.head = NULL,
.atoms.len = atoms_count,
};
return;
}
last_atom = from_atom + atoms_count - 1;
*d = (struct diff_data){
.f = NULL,
.pos = from_atom->pos,
.data = from_atom->at,
.len = (last_atom->pos + last_atom->len) - from_atom->pos,
.root = parent->root,
.atoms.head = from_atom,
.atoms.len = atoms_count,
};
debug("subsection:\n");
debug_dump(d);
}
void
diff_data_free(struct diff_data *diff_data)
{
if (!diff_data)
return;
if (diff_data->atoms.allocated)
ARRAYLIST_FREE(diff_data->atoms);
}
int
diff_algo_none(const struct diff_algo_config *algo_config,
struct diff_state *state)
{
debug("\n** %s\n", __func__);
debug("left:\n");
debug_dump(&state->left);
debug("right:\n");
debug_dump(&state->right);
debug_dump_myers_graph(&state->left, &state->right, NULL, NULL, 0, NULL,
0);
struct diff_atom *l = state->left.atoms.head;
unsigned int l_len = state->left.atoms.len;
struct diff_atom *r = state->right.atoms.head;
unsigned int r_len = state->right.atoms.len;
unsigned int equal_atoms_start = 0;
unsigned int equal_atoms_end = 0;
unsigned int l_idx = 0;
unsigned int r_idx = 0;
while (equal_atoms_start < l_len
&& equal_atoms_start < r_len) {
int err;
bool same;
err = diff_atom_same(&same, &l[equal_atoms_start],
&r[equal_atoms_start]);
if (err)
return err;
if (!same)
break;
equal_atoms_start++;
}
while (equal_atoms_end < (l_len - equal_atoms_start)
&& equal_atoms_end < (r_len - equal_atoms_start)) {
int err;
bool same;
err = diff_atom_same(&same, &l[l_len - 1 - equal_atoms_end],
&r[r_len - 1 - equal_atoms_end]);
if (err)
return err;
if (!same)
break;
equal_atoms_end++;
}
if (equal_atoms_start) {
if (!diff_state_add_chunk(state, true,
l, equal_atoms_start,
r, equal_atoms_start))
return ENOMEM;
l_idx += equal_atoms_start;
r_idx += equal_atoms_start;
}
if (equal_atoms_start + equal_atoms_end < l_len) {
unsigned int add_len = l_len - equal_atoms_start - equal_atoms_end;
if (!diff_state_add_chunk(state, true,
&l[l_idx], add_len,
&r[r_idx], 0))
return ENOMEM;
l_idx += add_len;
}
if (equal_atoms_start + equal_atoms_end < r_len) {
unsigned int add_len = r_len - equal_atoms_start - equal_atoms_end;
if (!diff_state_add_chunk(state, true,
&l[l_idx], 0,
&r[r_idx], add_len))
return ENOMEM;
r_idx += add_len;
}
if (equal_atoms_end) {
if (!diff_state_add_chunk(state, true,
&l[l_idx], equal_atoms_end,
&r[r_idx], equal_atoms_end))
return ENOMEM;
}
return DIFF_RC_OK;
}
static int
diff_run_algo(const struct diff_algo_config *algo_config,
struct diff_state *state)
{
int rc;
if (!algo_config || !algo_config->impl
|| !state->recursion_depth_left
|| !state->left.atoms.len || !state->right.atoms.len) {
debug("Fall back to diff_algo_none():%s%s%s\n",
(!algo_config || !algo_config->impl) ? " no-cfg" : "",
(!state->recursion_depth_left) ? " max-depth" : "",
(!state->left.atoms.len || !state->right.atoms.len)?
" trivial" : "");
return diff_algo_none(algo_config, state);
}
ARRAYLIST_FREE(state->temp_result);
ARRAYLIST_INIT(state->temp_result, DIFF_RESULT_ALLOC_BLOCKSIZE);
rc = algo_config->impl(algo_config, state);
switch (rc) {
case DIFF_RC_USE_DIFF_ALGO_FALLBACK:
debug("Got DIFF_RC_USE_DIFF_ALGO_FALLBACK (%p)\n",
algo_config->fallback_algo);
rc = diff_run_algo(algo_config->fallback_algo, state);
goto return_rc;
case DIFF_RC_OK:
break;
default:
goto return_rc;
}
int i;
for (i = 0; i < state->temp_result.len; i++) {
struct diff_chunk *c = &state->temp_result.head[i];
if (c->solved) {
diff_state_add_solved_chunk(state, c);
continue;
}
struct diff_state inner_state = {
.result = state->result,
.recursion_depth_left = state->recursion_depth_left - 1,
.kd_buf = state->kd_buf,
.kd_buf_size = state->kd_buf_size,
};
diff_data_init_subsection(&inner_state.left, &state->left,
c->left_start, c->left_count);
diff_data_init_subsection(&inner_state.right, &state->right,
c->right_start, c->right_count);
rc = diff_run_algo(algo_config->inner_algo, &inner_state);
state->kd_buf = inner_state.kd_buf;
state->kd_buf_size = inner_state.kd_buf_size;
if (rc != DIFF_RC_OK)
goto return_rc;
}
rc = DIFF_RC_OK;
return_rc:
ARRAYLIST_FREE(state->temp_result);
return rc;
}
int
diff_atomize_file(struct diff_data *d,
const struct diff_config *config,
FILE *f, const uint8_t *data, off_t len, int diff_flags)
{
if (!config->atomize_func)
return EINVAL;
diff_data_init_root(d, f, data, len, diff_flags);
return config->atomize_func(config->atomize_func_data, d);
}
struct diff_result *
diff_main(const struct diff_config *config, struct diff_data *left,
struct diff_data *right)
{
struct diff_result *result = malloc(sizeof(struct diff_result));
if (!result)
return NULL;
*result = (struct diff_result){};
result->left = left;
result->right = right;
struct diff_state state = {
.result = result,
.recursion_depth_left = config->max_recursion_depth ?
config->max_recursion_depth : UINT_MAX,
.kd_buf = NULL,
.kd_buf_size = 0,
};
diff_data_init_subsection(&state.left, left,
left->atoms.head,
left->atoms.len);
diff_data_init_subsection(&state.right, right,
right->atoms.head,
right->atoms.len);
result->rc = diff_run_algo(config->algo, &state);
free(state.kd_buf);
return result;
}
void
diff_result_free(struct diff_result *result)
{
if (!result)
return;
ARRAYLIST_FREE(result->chunks);
free(result);
}
int
diff_result_contains_printable_chunks(struct diff_result *result)
{
struct diff_chunk *c;
enum diff_chunk_type t;
int i;
for (i = 0; i < result->chunks.len; i++) {
c = &result->chunks.head[i];
t = diff_chunk_type(c);
if (t == CHUNK_MINUS || t == CHUNK_PLUS)
return 1;
}
return 0;
}