* Copyright (C) the libgit2 contributors. All rights reserved.
* This file is part of libgit2, distributed under the GNU GPL v2 with
* a Linking Exception. For full terms see the included COPYING file.
#ifndef INCLUDE_git_diff_h__
#define INCLUDE_git_diff_h__
* @brief Git tree and file differencing routines.
* Flags for diff options. A combination of these flags can be passed
* in via the `flags` value in the `git_diff_options`.
/** Normal diff, the default */
* Options controlling which files will be in the diff
/** Reverse the sides of the diff */
GIT_DIFF_REVERSE = (1u << 0),
/** Include ignored files in the diff */
GIT_DIFF_INCLUDE_IGNORED = (1u << 1),
/** Even with GIT_DIFF_INCLUDE_IGNORED, an entire ignored directory
* will be marked with only a single entry in the diff; this flag
* adds all files under the directory as IGNORED entries, too.
GIT_DIFF_RECURSE_IGNORED_DIRS = (1u << 2),
/** Include untracked files in the diff */
GIT_DIFF_INCLUDE_UNTRACKED = (1u << 3),
/** Even with GIT_DIFF_INCLUDE_UNTRACKED, an entire untracked
* directory will be marked with only a single entry in the diff
* (a la what core Git does in `git status`); this flag adds *all*
* files under untracked directories as UNTRACKED entries, too.
GIT_DIFF_RECURSE_UNTRACKED_DIRS = (1u << 4),
/** Include unmodified files in the diff */
GIT_DIFF_INCLUDE_UNMODIFIED = (1u << 5),
/** Normally, a type change between files will be converted into a
* DELETED record for the old and an ADDED record for the new; this
* options enabled the generation of TYPECHANGE delta records.
GIT_DIFF_INCLUDE_TYPECHANGE = (1u << 6),
/** Even with GIT_DIFF_INCLUDE_TYPECHANGE, blob->tree changes still
* generally show as a DELETED blob. This flag tries to correctly
* label blob->tree transitions as TYPECHANGE records with new_file's
* mode set to tree. Note: the tree SHA will not be available.
GIT_DIFF_INCLUDE_TYPECHANGE_TREES = (1u << 7),
/** Ignore file mode changes */
GIT_DIFF_IGNORE_FILEMODE = (1u << 8),
/** Treat all submodules as unmodified */
GIT_DIFF_IGNORE_SUBMODULES = (1u << 9),
/** Use case insensitive filename comparisons */
GIT_DIFF_IGNORE_CASE = (1u << 10),
/** May be combined with `GIT_DIFF_IGNORE_CASE` to specify that a file
* that has changed case will be returned as an add/delete pair.
GIT_DIFF_INCLUDE_CASECHANGE = (1u << 11),
/** If the pathspec is set in the diff options, this flags indicates
* that the paths will be treated as literal paths instead of
* fnmatch patterns. Each path in the list must either be a full
* path to a file or a directory. (A trailing slash indicates that
* the path will _only_ match a directory). If a directory is
* specified, all children will be included.
GIT_DIFF_DISABLE_PATHSPEC_MATCH = (1u << 12),
/** Disable updating of the `binary` flag in delta records. This is
* useful when iterating over a diff if you don't need hunk and data
* callbacks and want to avoid having to load file completely.
GIT_DIFF_SKIP_BINARY_CHECK = (1u << 13),
/** When diff finds an untracked directory, to match the behavior of
* core Git, it scans the contents for IGNORED and UNTRACKED files.
* If *all* contents are IGNORED, then the directory is IGNORED; if
* any contents are not IGNORED, then the directory is UNTRACKED.
* This is extra work that may not matter in many cases. This flag
* turns off that scan and immediately labels an untracked directory
* as UNTRACKED (changing the behavior to not match core Git).
GIT_DIFF_ENABLE_FAST_UNTRACKED_DIRS = (1u << 14),
/** When diff finds a file in the working directory with stat
* information different from the index, but the OID ends up being the
* same, write the correct stat information into the index. Note:
* without this flag, diff will always leave the index untouched.
GIT_DIFF_UPDATE_INDEX = (1u << 15),
/** Include unreadable files in the diff */
GIT_DIFF_INCLUDE_UNREADABLE = (1u << 16),
/** Include unreadable files in the diff */
GIT_DIFF_INCLUDE_UNREADABLE_AS_UNTRACKED = (1u << 17),
* Options controlling how output will be generated
/** Use a heuristic that takes indentation and whitespace into account
* which generally can produce better diffs when dealing with ambiguous
GIT_DIFF_INDENT_HEURISTIC = (1u << 18),
/** Treat all files as text, disabling binary attributes & detection */
GIT_DIFF_FORCE_TEXT = (1u << 20),
/** Treat all files as binary, disabling text diffs */
GIT_DIFF_FORCE_BINARY = (1u << 21),
/** Ignore all whitespace */
GIT_DIFF_IGNORE_WHITESPACE = (1u << 22),
/** Ignore changes in amount of whitespace */
GIT_DIFF_IGNORE_WHITESPACE_CHANGE = (1u << 23),
/** Ignore whitespace at end of line */
GIT_DIFF_IGNORE_WHITESPACE_EOL = (1u << 24),
/** When generating patch text, include the content of untracked
* files. This automatically turns on GIT_DIFF_INCLUDE_UNTRACKED but
* it does not turn on GIT_DIFF_RECURSE_UNTRACKED_DIRS. Add that
* flag if you want the content of every single UNTRACKED file.
GIT_DIFF_SHOW_UNTRACKED_CONTENT = (1u << 25),
/** When generating output, include the names of unmodified files if
* they are included in the git_diff. Normally these are skipped in
* the formats that list files (e.g. name-only, name-status, raw).
* Even with this, these will not be included in patch format.
GIT_DIFF_SHOW_UNMODIFIED = (1u << 26),
/** Use the "patience diff" algorithm */
GIT_DIFF_PATIENCE = (1u << 28),
/** Take extra time to find minimal diff */
GIT_DIFF_MINIMAL = (1u << 29),
/** Include the necessary deflate / delta information so that `git-apply`
* can apply given diff information to binary files.
GIT_DIFF_SHOW_BINARY = (1u << 30),
* The diff object that contains all individual file deltas.
* A `diff` represents the cumulative list of differences between two
* snapshots of a repository (possibly filtered by a set of file name
* Calculating diffs is generally done in two phases: building a list of
* diffs then traversing it. This makes is easier to share logic across
* the various types of diffs (tree vs tree, workdir vs index, etc.), and
* also allows you to insert optional diff post-processing phases,
* such as rename detection, in between the steps. When you are done with
* a diff object, it must be freed.
* This is an opaque structure which will be allocated by one of the diff
* generator functions below (such as `git_diff_tree_to_tree`). You are
* responsible for releasing the object memory when done, using the
* `git_diff_free()` function.
typedef struct git_diff git_diff;
* Flags for the delta object and the file objects on each side.
* These flags are used for both the `flags` value of the `git_diff_delta`
* and the flags for the `git_diff_file` objects representing the old and
* new sides of the delta. Values outside of this public range should be
* considered reserved for internal or future use.
GIT_DIFF_FLAG_BINARY = (1u << 0), /**< file(s) treated as binary data */
GIT_DIFF_FLAG_NOT_BINARY = (1u << 1), /**< file(s) treated as text data */
GIT_DIFF_FLAG_VALID_ID = (1u << 2), /**< `id` value is known correct */
GIT_DIFF_FLAG_EXISTS = (1u << 3), /**< file exists at this side of the delta */
* What type of change is described by a git_diff_delta?
* `GIT_DELTA_RENAMED` and `GIT_DELTA_COPIED` will only show up if you run
* `git_diff_find_similar()` on the diff object.
* `GIT_DELTA_TYPECHANGE` only shows up given `GIT_DIFF_INCLUDE_TYPECHANGE`
* in the option flags (otherwise type changes will be split into ADDED /
GIT_DELTA_UNMODIFIED = 0, /**< no changes */
GIT_DELTA_ADDED = 1, /**< entry does not exist in old version */
GIT_DELTA_DELETED = 2, /**< entry does not exist in new version */
GIT_DELTA_MODIFIED = 3, /**< entry content changed between old and new */
GIT_DELTA_RENAMED = 4, /**< entry was renamed between old and new */
GIT_DELTA_COPIED = 5, /**< entry was copied from another old entry */
GIT_DELTA_IGNORED = 6, /**< entry is ignored item in workdir */
GIT_DELTA_UNTRACKED = 7, /**< entry is untracked item in workdir */
GIT_DELTA_TYPECHANGE = 8, /**< type of entry changed between old and new */
GIT_DELTA_UNREADABLE = 9, /**< entry is unreadable */
GIT_DELTA_CONFLICTED = 10, /**< entry in the index is conflicted */
* Description of one side of a delta.
* Although this is called a "file", it could represent a file, a symbolic
* link, a submodule commit id, or even a tree (although that only if you
* are tracking type changes or ignored/untracked directories).
* The `id` is the `git_oid` of the item. If the entry represents an
* absent side of a diff (e.g. the `old_file` of a `GIT_DELTA_ADDED` delta),
* then the oid will be zeroes.
* `path` is the NUL-terminated path to the entry relative to the working
* directory of the repository.
* `size` is the size of the entry in bytes.
* `flags` is a combination of the `git_diff_flag_t` types
* `mode` is, roughly, the stat() `st_mode` value for the item. This will
* be restricted to one of the `git_filemode_t` values.
* The `id_abbrev` represents the known length of the `id` field, when
* converted to a hex string. It is generally `GIT_OID_HEXSZ`, unless this
* delta was created from reading a patch file, in which case it may be
* abbreviated to something reasonable, like 7 characters.
* Description of changes to one entry.
* A `delta` is a file pair with an old and new revision. The old version
* may be absent if the file was just created and the new version may be
* absent if the file was deleted. A diff is mostly just a list of deltas.
* When iterating over a diff, this will be passed to most callbacks and
* you can use the contents to understand exactly what has changed.
* The `old_file` represents the "from" side of the diff and the `new_file`
* represents to "to" side of the diff. What those means depend on the
* function that was used to generate the diff and will be documented below.
* You can also use the `GIT_DIFF_REVERSE` flag to flip it around.
* Although the two sides of the delta are named "old_file" and "new_file",
* they actually may correspond to entries that represent a file, a symbolic
* link, a submodule commit id, or even a tree (if you are tracking type
* changes or ignored/untracked directories).
* Under some circumstances, in the name of efficiency, not all fields will
* be filled in, but we generally try to fill in as much as possible. One
* example is that the "flags" field may not have either the `BINARY` or the
* `NOT_BINARY` flag set to avoid examining file contents if you do not pass
* in hunk and/or line callbacks to the diff foreach iteration function. It
* will just use the git attributes for those files.
* The similarity score is zero unless you call `git_diff_find_similar()`
* which does a similarity analysis of files in the diff. Use that
* function to do rename and copy detection, and to split heavily modified
* files in add/delete pairs. After that call, deltas with a status of
* GIT_DELTA_RENAMED or GIT_DELTA_COPIED will have a similarity score
* between 0 and 100 indicating how similar the old and new sides are.
* If you ask `git_diff_find_similar` to find heavily modified files to
* break, but to not *actually* break the records, then GIT_DELTA_MODIFIED
* records may have a non-zero similarity score if the self-similarity is
* below the split threshold. To display this value like core Git, invert
* the score (a la `printf("M%03d", 100 - delta->similarity)`).
uint32_t flags; /**< git_diff_flag_t values */
uint16_t similarity; /**< for RENAMED and COPIED, value 0-100 */
uint16_t nfiles; /**< number of files in this delta */
* Diff notification callback function.
* The callback will be called for each file, just before the `git_diff_delta`
* gets inserted into the diff.
* - returns < 0, the diff process will be aborted.
* - returns > 0, the delta will not be inserted into the diff, but the
* diff process continues.
* - returns 0, the delta is inserted into the diff, and the diff process
typedef int GIT_CALLBACK(git_diff_notify_cb)(
const git_diff *diff_so_far,
const git_diff_delta *delta_to_add,
const char *matched_pathspec,
* Diff progress callback.
* Called before each file comparison.
* @param diff_so_far The diff being generated.
* @param old_path The path to the old file or NULL.
* @param new_path The path to the new file or NULL.
* @return Non-zero to abort the diff.
typedef int GIT_CALLBACK(git_diff_progress_cb)(
const git_diff *diff_so_far,
* Structure describing options about how the diff should be executed.
* Setting all values of the structure to zero will yield the default
* values. Similarly, passing NULL for the options structure will
* give the defaults. The default values are marked below.
unsigned int version; /**< version for the struct */
* A combination of `git_diff_option_t` values above.
* Defaults to GIT_DIFF_NORMAL
/* options controlling which files are in the diff */
/** Overrides the submodule ignore setting for all submodules in the diff. */
git_submodule_ignore_t ignore_submodules;
* An array of paths / fnmatch patterns to constrain diff.
* All paths are included by default.
* An optional callback function, notifying the consumer of changes to
* the diff as new deltas are added.
git_diff_notify_cb notify_cb;
* An optional callback function, notifying the consumer of which files
* are being examined as the diff is generated.
git_diff_progress_cb progress_cb;
/** The payload to pass to the callback functions. */
/* options controlling how to diff text is generated */
* The number of unchanged lines that define the boundary of a hunk
* (and to display before and after). Defaults to 3.
* The maximum number of unchanged lines between hunk boundaries before
* the hunks will be merged into one. Defaults to 0.
uint32_t interhunk_lines;
* The abbreviation length to use when formatting object ids.
* Defaults to the value of 'core.abbrev' from the config, or 7 if unset.
* A size (in bytes) above which a blob will be marked as binary
* automatically; pass a negative value to disable.
* The virtual "directory" prefix for old file names in hunk headers.
* The virtual "directory" prefix for new file names in hunk headers.
/* The current version of the diff options structure */
#define GIT_DIFF_OPTIONS_VERSION 1
/* Stack initializer for diff options. Alternatively use
* `git_diff_options_init` programmatic initialization.
#define GIT_DIFF_OPTIONS_INIT \
{GIT_DIFF_OPTIONS_VERSION, 0, GIT_SUBMODULE_IGNORE_UNSPECIFIED, {NULL,0}, NULL, NULL, NULL, 3}
* Initialize git_diff_options structure
* Initializes a `git_diff_options` with default values. Equivalent to creating
* an instance with GIT_DIFF_OPTIONS_INIT.
* @param opts The `git_diff_options` struct to initialize.
* @param version The struct version; pass `GIT_DIFF_OPTIONS_VERSION`.
* @return Zero on success; -1 on failure.
GIT_EXTERN(int) git_diff_options_init(
* When iterating over a diff, callback that will be made per file.
* @param delta A pointer to the delta data for the file
* @param progress Goes from 0 to 1 over the diff
* @param payload User-specified pointer from foreach function
typedef int GIT_CALLBACK(git_diff_file_cb)(
const git_diff_delta *delta,
#define GIT_DIFF_HUNK_HEADER_SIZE 128
* When producing a binary diff, the binary data returned will be
* either the deflated full ("literal") contents of the file, or
* the deflated binary delta between the two sides (whichever is
/** There is no binary delta. */
/** The binary data is the literal contents of the file. */
/** The binary data is the delta from one side to the other. */
/** The contents of one of the files in a binary diff. */
/** The type of binary data for this file. */
/** The binary data, deflated. */
/** The length of the binary data. */
/** The length of the binary data after inflation. */