pocketlang/cli/thirdparty/cwalk/cwalk.c

1440 lines
48 KiB
C
Raw Normal View History

2021-05-29 02:53:46 +08:00
#include <assert.h>
#include <ctype.h>
/* Modified By : https://www.github.com/ThakeeNathees */
#include "cwalk.h"
/* -------------------------------------------------- */
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
/**
* We try to default to a different path style depending on the operating
* system. So this should detect whether we should use windows or unix paths.
*/
#if defined(WIN32) || defined(_WIN32) || \
defined(__WIN32) && !defined(__CYGWIN__)
static enum cwk_path_style path_style = CWK_STYLE_WINDOWS;
#else
static enum cwk_path_style path_style = CWK_STYLE_UNIX;
#endif
/**
* This is a list of separators used in different styles. Windows can read
* multiple separators, but it generally outputs just a backslash. The output
* will always use the first character for the output.
*/
static const char* separators[] = {
"\\/", // CWK_STYLE_WINDOWS
"/" // CWK_STYLE_UNIX
};
/**
* A joined path represents multiple path strings which are concatenated, but
* not (necessarily) stored in contiguous memory. The joined path allows to
* iterate over the segments as if it was one piece of path.
*/
struct cwk_segment_joined
{
struct cwk_segment segment;
const char** paths;
size_t path_index;
};
static size_t cwk_path_output_sized(char* buffer, size_t buffer_size,
size_t position, const char* str, size_t length)
{
size_t amount_written;
// First we determine the amount which we can write to the buffer. There are
// three cases. In the first case we have enough to store the whole string in
// it. In the second one we can only store a part of it, and in the third we
// have no space left.
if (buffer_size > position + length) {
amount_written = length;
} else if (buffer_size > position) {
amount_written = buffer_size - position;
} else {
amount_written = 0;
}
// If we actually want to write out something we will do that here. We will
// always append a '\0', this way we are guaranteed to have a valid string at
// all times.
if (amount_written > 0) {
memmove(&buffer[position], str, amount_written);
}
// Return the theoretical length which would have been written when everything
// would have fit in the buffer.
return length;
}
static size_t cwk_path_output_current(char* buffer, size_t buffer_size,
size_t position)
{
// We output a "current" directory, which is a single character. This
// character is currently not style dependant.
return cwk_path_output_sized(buffer, buffer_size, position, ".", 1);
}
static size_t cwk_path_output_back(char* buffer, size_t buffer_size,
size_t position)
{
// We output a "back" directory, which ahs two characters. This
// character is currently not style dependant.
return cwk_path_output_sized(buffer, buffer_size, position, "..", 2);
}
static size_t cwk_path_output_separator(char* buffer, size_t buffer_size,
size_t position)
{
// We output a separator, which is a single character.
return cwk_path_output_sized(buffer, buffer_size, position,
separators[path_style], 1);
}
static size_t cwk_path_output_dot(char* buffer, size_t buffer_size,
size_t position)
{
// We output a dot, which is a single character. This is used for extensions.
return cwk_path_output_sized(buffer, buffer_size, position, ".", 1);
}
static size_t cwk_path_output(char* buffer, size_t buffer_size, size_t position,
const char* str)
{
size_t length;
// This just does a sized output internally, but first measuring the
// null-terminated string.
length = strlen(str);
return cwk_path_output_sized(buffer, buffer_size, position, str, length);
}
static void cwk_path_terminate_output(char* buffer, size_t buffer_size,
size_t pos)
{
if (buffer_size > 0) {
if (pos >= buffer_size) {
buffer[buffer_size - 1] = '\0';
} else {
buffer[pos] = '\0';
}
}
}
static bool cwk_path_is_string_equal(const char* first, const char* second,
size_t n)
{
// If the path style is UNIX, we will compare case sensitively. This can be
// done easily using strncmp.
if (path_style == CWK_STYLE_UNIX) {
return strncmp(first, second, n) == 0;
}
// However, if this is windows we will have to compare case insensitively.
// Since there is no standard method to do that we will have to do it on our
// own.
while (*first && *second && n > 0) {
// We can consider the string to be not equal if the two lowercase
// characters are not equal.
if (tolower(*first++) != tolower(*second++)) {
return false;
}
--n;
}
// We can consider the string to be equal if we either reached n == 0 or both
// cursors point to a null character.
return n == 0 || (*first == '\0' && *second == '\0');
}
static const char* cwk_path_find_next_stop(const char* c)
{
// We just move forward until we find a '\0' or a separator, which will be our
// next "stop".
while (*c != '\0' && !cwk_path_is_separator(c)) {
++c;
}
// Return the pointer of the next stop.
return c;
}
static const char* cwk_path_find_previous_stop(const char* begin, const char* c)
{
// We just move back until we find a separator or reach the beginning of the
// path, which will be our previous "stop".
while (c > begin && !cwk_path_is_separator(c)) {
--c;
}
// Return the pointer to the previous stop. We have to return the first
// character after the separator, not on the separator itself.
if (cwk_path_is_separator(c)) {
return c + 1;
} else {
return c;
}
}
static bool cwk_path_get_first_segment_without_root(const char* path,
const char* segments, struct cwk_segment* segment)
{
// Let's remember the path. We will move the path pointer afterwards, that's
// why this has to be done first.
segment->path = path;
segment->segments = segments;
segment->begin = segments;
segment->end = segments;
segment->size = 0;
// Now let's check whether this is an empty string. An empty string has no
// segment it could use.
if (*segments == '\0') {
return false;
}
// If the string starts with separators, we will jump over those. If there is
// only a slash and a '\0' after it, we can't determine the first segment
// since there is none.
while (cwk_path_is_separator(segments)) {
++segments;
if (*segments == '\0') {
return false;
}
}
// So this is the beginning of our segment.
segment->begin = segments;
// Now let's determine the end of the segment, which we do by moving the path
// pointer further until we find a separator.
segments = cwk_path_find_next_stop(segments);
// And finally, calculate the size of the segment by subtracting the position
// from the end.
segment->size = (size_t)(segments - segment->begin);
segment->end = segments;
// Tell the caller that we found a segment.
return true;
}
static bool cwk_path_get_last_segment_without_root(const char* path,
struct cwk_segment* segment)
{
// Now this is fairly similar to the normal algorithm, however, it will assume
// that there is no root in the path. So we grab the first segment at this
// position, assuming there is no root.
if (!cwk_path_get_first_segment_without_root(path, path, segment)) {
return false;
}
// Now we find our last segment. The segment struct of the caller
// will contain the last segment, since the function we call here will not
// change the segment struct when it reaches the end.
while (cwk_path_get_next_segment(segment)) {
// We just loop until there is no other segment left.
}
return true;
}
static bool cwk_path_get_first_segment_joined(const char** paths,
struct cwk_segment_joined* sj)
{
bool result;
// Prepare the first segment. We position the joined segment on the first path
// and assign the path array to the struct.
sj->path_index = 0;
sj->paths = paths;
// We loop through all paths until we find one which has a segment. The result
// is stored in a variable, so we can let the caller know whether we found one
// or not.
result = false;
while (paths[sj->path_index] != NULL &&
(result = cwk_path_get_first_segment(paths[sj->path_index],
&sj->segment)) == false) {
++sj->path_index;
}
return result;
}
static bool cwk_path_get_next_segment_joined(struct cwk_segment_joined* sj)
{
bool result;
if (sj->paths[sj->path_index] == NULL) {
// We reached already the end of all paths, so there is no other segment
// left.
return false;
} else if (cwk_path_get_next_segment(&sj->segment)) {
// There was another segment on the current path, so we are good to
// continue.
return true;
}
// We try to move to the next path which has a segment available. We must at
// least move one further since the current path reached the end.
result = false;
do {
++sj->path_index;
// And we obviously have to stop this loop if there are no more paths left.
if (sj->paths[sj->path_index] == NULL) {
break;
}
// Grab the first segment of the next path and determine whether this path
// has anything useful in it. There is one more thing we have to consider
// here - for the first time we do this we want to skip the root, but
// afterwards we will consider that to be part of the segments.
result = cwk_path_get_first_segment_without_root(sj->paths[sj->path_index],
sj->paths[sj->path_index], &sj->segment);
} while (!result);
// Finally, report the result back to the caller.
return result;
}
static bool cwk_path_get_previous_segment_joined(struct cwk_segment_joined* sj)
{
bool result;
if (*sj->paths == NULL) {
// It's possible that there is no initialized segment available in the
// struct since there are no paths. In that case we can return false, since
// there is no previous segment.
return false;
} else if (cwk_path_get_previous_segment(&sj->segment)) {
// Now we try to get the previous segment from the current path. If we can
// do that successfully, we can let the caller know that we found one.
return true;
}
result = false;
do {
// We are done once we reached index 0. In that case there are no more
// segments left.
if (sj->path_index == 0) {
break;
}
// There is another path which we have to inspect. So we decrease the path
// index.
--sj->path_index;
// If this is the first path we will have to consider that this path might
// include a root, otherwise we just treat is as a segment.
if (sj->path_index == 0) {
result = cwk_path_get_last_segment(sj->paths[sj->path_index],
&sj->segment);
} else {
result = cwk_path_get_last_segment_without_root(sj->paths[sj->path_index],
&sj->segment);
}
} while (!result);
return result;
}
static bool cwk_path_segment_back_will_be_removed(struct cwk_segment_joined* sj)
{
enum cwk_segment_type type;
int counter;
// We are handling back segments here. We must verify how many back segments
// and how many normal segments come before this one to decide whether we keep
// or remove it.
// The counter determines how many normal segments are our current segment,
// which will popped off before us. If the counter goes above zero it means
// that our segment will be popped as well.
counter = 0;
// We loop over all previous segments until we either reach the beginning,
// which means our segment will not be dropped or the counter goes above zero.
while (cwk_path_get_previous_segment_joined(sj)) {
// Now grab the type. The type determines whether we will increase or
// decrease the counter. We don't handle a CWK_CURRENT frame here since it
// has no influence.
type = cwk_path_get_segment_type(&sj->segment);
if (type == CWK_NORMAL) {
// This is a normal segment. The normal segment will increase the counter
// since it neutralizes one back segment. If we go above zero we can
// return immediately.
++counter;
if (counter > 0) {
return true;
}
} else if (type == CWK_BACK) {
// A CWK_BACK segment will reduce the counter by one. We can not remove a
// back segment as long we are not above zero since we don't have the
// opposite normal segment which we would remove.
--counter;
}
}
// We never got a count larger than zero, so we will keep this segment alive.
return false;
}
static bool cwk_path_segment_normal_will_be_removed(
struct cwk_segment_joined* sj)
{
enum cwk_segment_type type;
int counter;
// The counter determines how many segments are above our current segment,
// which will popped off before us. If the counter goes below zero it means
// that our segment will be popped as well.
counter = 0;
// We loop over all following segments until we either reach the end, which
// means our segment will not be dropped or the counter goes below zero.
while (cwk_path_get_next_segment_joined(sj)) {
// First, grab the type. The type determines whether we will increase or
// decrease the counter. We don't handle a CWK_CURRENT frame here since it
// has no influence.
type = cwk_path_get_segment_type(&sj->segment);
if (type == CWK_NORMAL) {
// This is a normal segment. The normal segment will increase the counter
// since it will be removed by a "../" before us.
++counter;
} else if (type == CWK_BACK) {
// A CWK_BACK segment will reduce the counter by one. If we are below zero
// we can return immediately.
--counter;
if (counter < 0) {
return true;
}
}
}
// We never got a negative count, so we will keep this segment alive.
return false;
}
static bool
cwk_path_segment_will_be_removed(const struct cwk_segment_joined* sj,
bool absolute)
{
enum cwk_segment_type type;
struct cwk_segment_joined sjc;
// We copy the joined path so we don't need to modify it.
sjc = *sj;
// First we check whether this is a CWK_CURRENT or CWK_BACK segment, since
// those will always be dropped.
type = cwk_path_get_segment_type(&sj->segment);
if (type == CWK_CURRENT) {
return true;
} else if (type == CWK_BACK && absolute) {
return true;
} else if (type == CWK_BACK) {
return cwk_path_segment_back_will_be_removed(&sjc);
} else {
return cwk_path_segment_normal_will_be_removed(&sjc);
}
}
static bool
cwk_path_segment_joined_skip_invisible(struct cwk_segment_joined* sj,
bool absolute)
{
while (cwk_path_segment_will_be_removed(sj, absolute)) {
if (!cwk_path_get_next_segment_joined(sj)) {
return false;
}
}
return true;
}
static void cwk_path_get_root_windows(const char* path, size_t* length)
{
const char* c;
bool is_device_path;
// We can not determine the root if this is an empty string. So we set the
// root to NULL and the length to zero and cancel the whole thing.
c = path;
*length = 0;
if (!*c) {
return;
}
// Now we have to verify whether this is a windows network path (UNC), which
// we will consider our root.
if (cwk_path_is_separator(c)) {
++c;
// Check whether the path starts with a single back slash, which means this
// is not a network path - just a normal path starting with a backslash.
if (!cwk_path_is_separator(c)) {
// Okay, this is not a network path but we still use the backslash as a
// root.
++(*length);
return;
}
// A device path is a path which starts with "\\." or "\\?". A device path
// can be a UNC path as well, in which case it will take up one more
// segment. So, this is a network or device path. Skip the previous
// separator. Now we need to determine whether this is a device path. We
// might advance one character here if the server name starts with a '?' or
// a '.', but that's fine since we will search for a separator afterwards
// anyway.
++c;
is_device_path = (*c == '?' || *c == '.') && cwk_path_is_separator(++c);
if (is_device_path) {
// That's a device path, and the root must be either "\\.\" or "\\?\"
// which is 4 characters long. (at least that's how Windows
// GetFullPathName behaves.)
*length = 4;
return;
}
// We will grab anything up to the next stop. The next stop might be a '\0'
// or another separator. That will be the server name.
c = cwk_path_find_next_stop(c);
// If this is a separator and not the end of a string we wil have to include
// it. However, if this is a '\0' we must not skip it.
while (cwk_path_is_separator(c)) {
++c;
}
// We are now skipping the shared folder name, which will end after the
// next stop.
c = cwk_path_find_next_stop(c);
// Then there might be a separator at the end. We will include that as well,
// it will mark the path as absolute.
if (cwk_path_is_separator(c)) {
++c;
}
// Finally, calculate the size of the root.
*length = (size_t)(c - path);
return;
}
// Move to the next and check whether this is a colon.
if (*++c == ':') {
*length = 2;
// Now check whether this is a backslash (or slash). If it is not, we could
// assume that the next character is a '\0' if it is a valid path. However,
// we will not assume that - since ':' is not valid in a path it must be a
// mistake by the caller than. We will try to understand it anyway.
if (cwk_path_is_separator(++c)) {
*length = 3;
}
}
}
static void cwk_path_get_root_unix(const char* path, size_t* length)
{
// The slash of the unix path represents the root. There is no root if there
// is no slash.
if (cwk_path_is_separator(path)) {
*length = 1;
} else {
*length = 0;
}
}
static bool cwk_path_is_root_absolute(const char* path, size_t length)
{
// This is definitely not absolute if there is no root.
if (length == 0) {
return false;
}
// If there is a separator at the end of the root, we can safely consider this
// to be an absolute path.
return cwk_path_is_separator(&path[length - 1]);
}
static size_t cwk_path_join_and_normalize_multiple(const char** paths,
char* buffer, size_t buffer_size)
{
size_t pos;
bool absolute, has_segment_output;
struct cwk_segment_joined sj;
// We initialize the position after the root, which should get us started.
cwk_path_get_root(paths[0], &pos);
// Determine whether the path is absolute or not. We need that to determine
// later on whether we can remove superfluous "../" or not.
absolute = cwk_path_is_root_absolute(paths[0], pos);
// First copy the root to the output. We will not modify the root.
cwk_path_output_sized(buffer, buffer_size, 0, paths[0], pos);
// So we just grab the first segment. If there is no segment we will always
// output a "/", since we currently only support absolute paths here.
if (!cwk_path_get_first_segment_joined(paths, &sj)) {
goto done;
}
// Let's assume that we don't have any segment output for now. We will toggle
// this flag once there is some output.
has_segment_output = false;
do {
// Check whether we have to drop this segment because of resolving a
// relative path or because it is a CWK_CURRENT segment.
if (cwk_path_segment_will_be_removed(&sj, absolute)) {
continue;
}
// We add a separator if we previously wrote a segment. The last segment
// must not have a trailing separator. This must happen before the segment
// output, since we would override the null terminating character with
// reused buffers if this was done afterwards.
if (has_segment_output) {
pos += cwk_path_output_separator(buffer, buffer_size, pos);
}
// Remember that we have segment output, so we can handle the trailing slash
// later on. This is necessary since we might have segments but they are all
// removed.
has_segment_output = true;
// Write out the segment but keep in mind that we need to follow the
// buffer size limitations. That's why we use the path output functions
// here.
pos += cwk_path_output_sized(buffer, buffer_size, pos, sj.segment.begin,
sj.segment.size);
} while (cwk_path_get_next_segment_joined(&sj));
// Remove the trailing slash, but only if we have segment output. We don't
// want to remove anything from the root.
if (!has_segment_output && pos == 0) {
// This may happen if the path is absolute and all segments have been
// removed. We can not have an empty output - and empty output means we stay
// in the current directory. So we will output a ".".
assert(absolute == false);
pos += cwk_path_output_current(buffer, buffer_size, pos);
}
// We must append a '\0' in any case, unless the buffer size is zero. If the
// buffer size is zero, which means we can not.
done:
cwk_path_terminate_output(buffer, buffer_size, pos);
// And finally let our caller know about the total size of the normalized
// path.
return pos;
}
size_t cwk_path_get_absolute(const char* base, const char* path, char* buffer,
size_t buffer_size)
{
size_t i;
const char* paths[4];
// The basename should be an absolute path if the caller is using the API
// correctly. However, he might not and in that case we will append a fake
// root at the beginning.
if (cwk_path_is_absolute(base)) {
i = 0;
} else if (path_style == CWK_STYLE_WINDOWS) {
paths[0] = "\\";
i = 1;
} else {
paths[0] = "/";
i = 1;
}
if (cwk_path_is_absolute(path)) {
// If the submitted path is not relative the base path becomes irrelevant.
// We will only normalize the submitted path instead.
paths[i++] = path;
paths[i] = NULL;
} else {
// Otherwise we append the relative path to the base path and normalize it.
// The result will be a new absolute path.
paths[i++] = base;
paths[i++] = path;
paths[i] = NULL;
}
// Finally join everything together and normalize it.
return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
}
static void cwk_path_skip_segments_until_diverge(struct cwk_segment_joined* bsj,
struct cwk_segment_joined* osj, bool absolute, bool* base_available,
bool* other_available)
{
// Now looping over all segments until they start to diverge. A path may
// diverge if two segments are not equal or if one path reaches the end.
do {
// Check whether there is anything available after we skip everything which
// is invisible. We do that for both paths, since we want to let the caller
// know which path has some trailing segments after they diverge.
*base_available = cwk_path_segment_joined_skip_invisible(bsj, absolute);
*other_available = cwk_path_segment_joined_skip_invisible(osj, absolute);
// We are done if one or both of those paths reached the end. They either
// diverge or both reached the end - but in both cases we can not continue
// here.
if (!*base_available || !*other_available) {
break;
}
// Compare the content of both segments. We are done if they are not equal,
// since they diverge.
if (!cwk_path_is_string_equal(bsj->segment.begin, osj->segment.begin,
bsj->segment.size)) {
break;
}
// We keep going until one of those segments reached the end. The next
// segment might be invisible, but we will check for that in the beginning
// of the loop once again.
*base_available = cwk_path_get_next_segment_joined(bsj);
*other_available = cwk_path_get_next_segment_joined(osj);
} while (*base_available && *other_available);
}
size_t cwk_path_get_relative(const char* base_directory, const char* path,
char* buffer, size_t buffer_size)
{
size_t pos, base_root_length, path_root_length;
bool absolute, base_available, other_available, has_output;
const char* base_paths[2], * other_paths[2];
struct cwk_segment_joined bsj, osj;
pos = 0;
// First we compare the roots of those two paths. If the roots are not equal
// we can't continue, since there is no way to get a relative path from
// different roots.
cwk_path_get_root(base_directory, &base_root_length);
cwk_path_get_root(path, &path_root_length);
if (base_root_length != path_root_length ||
!cwk_path_is_string_equal(base_directory, path, base_root_length)) {
cwk_path_terminate_output(buffer, buffer_size, pos);
return pos;
}
// Verify whether this is an absolute path. We need to know that since we can
// remove all back-segments if it is.
absolute = cwk_path_is_root_absolute(base_directory, base_root_length);
// Initialize our joined segments. This will allow us to use the internal
// functions to skip until diverge and invisible. We only have one path in
// them though.
base_paths[0] = base_directory;
base_paths[1] = NULL;
other_paths[0] = path;
other_paths[1] = NULL;
cwk_path_get_first_segment_joined(base_paths, &bsj);
cwk_path_get_first_segment_joined(other_paths, &osj);
// Okay, now we skip until the segments diverge. We don't have anything to do
// with the segments which are equal.
cwk_path_skip_segments_until_diverge(&bsj, &osj, absolute, &base_available,
&other_available);
// Assume there is no output until we have got some. We will need this
// information later on to remove trailing slashes or alternatively output a
// current-segment.
has_output = false;
// So if we still have some segments left in the base path we will now output
// a back segment for all of them.
if (base_available) {
do {
// Skip any invisible segment. We don't care about those and we don't need
// to navigate back because of them.
if (!cwk_path_segment_joined_skip_invisible(&bsj, absolute)) {
break;
}
// Toggle the flag if we have output. We need to remember that, since we
// want to remove the trailing slash.
has_output = true;
// Output the back segment and a separator. No need to worry about the
// superfluous segment since it will be removed later on.
pos += cwk_path_output_back(buffer, buffer_size, pos);
pos += cwk_path_output_separator(buffer, buffer_size, pos);
} while (cwk_path_get_next_segment_joined(&bsj));
}
// And if we have some segments available of the target path we will output
// all of those.
if (other_available) {
do {
// Again, skip any invisible segments since we don't need to navigate into
// them.
if (!cwk_path_segment_joined_skip_invisible(&osj, absolute)) {
break;
}
// Toggle the flag if we have output. We need to remember that, since we
// want to remove the trailing slash.
has_output = true;
// Output the current segment and a separator. No need to worry about the
// superfluous segment since it will be removed later on.
pos += cwk_path_output_sized(buffer, buffer_size, pos, osj.segment.begin,
osj.segment.size);
pos += cwk_path_output_separator(buffer, buffer_size, pos);
} while (cwk_path_get_next_segment_joined(&osj));
}
// If we have some output by now we will have to remove the trailing slash. We
// simply do that by moving back one character. The terminate output function
// will then place the '\0' on this position. Otherwise, if there is no
// output, we will have to output a "current directory", since the target path
// points to the base path.
if (has_output) {
--pos;
} else {
pos += cwk_path_output_current(buffer, buffer_size, pos);
}
// Finally, we can terminate the output - which means we place a '\0' at the
// current position or at the end of the buffer.
cwk_path_terminate_output(buffer, buffer_size, pos);
return pos;
}
size_t cwk_path_join(const char* path_a, const char* path_b, char* buffer,
size_t buffer_size)
{
const char* paths[3];
// This is simple. We will just create an array with the two paths which we
// wish to join.
paths[0] = path_a;
paths[1] = path_b;
paths[2] = NULL;
// And then call the join and normalize function which will do the hard work
// for us.
return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
}
size_t cwk_path_join_multiple(const char** paths, char* buffer,
size_t buffer_size)
{
// We can just call the internal join and normalize function for this one,
// since it will handle everything.
return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
}
void cwk_path_get_root(const char* path, size_t* length)
{
// We use a different implementation here based on the configuration of the
// library.
if (path_style == CWK_STYLE_WINDOWS) {
cwk_path_get_root_windows(path, length);
} else {
cwk_path_get_root_unix(path, length);
}
}
size_t cwk_path_change_root(const char* path, const char* new_root,
char* buffer, size_t buffer_size)
{
const char* tail;
size_t root_length, path_length, tail_length, new_root_length, new_path_size;
// First we need to determine the actual size of the root which we will
// change.
cwk_path_get_root(path, &root_length);
// Now we determine the sizes of the new root and the path. We need that to
// determine the size of the part after the root (the tail).
new_root_length = strlen(new_root);
path_length = strlen(path);
// Okay, now we calculate the position of the tail and the length of it.
tail = path + root_length;
tail_length = path_length - root_length;
// We first output the tail and then the new root, that's because the source
// path and the buffer may be overlapping. This way the root will not
// overwrite the tail.
cwk_path_output_sized(buffer, buffer_size, new_root_length, tail,
tail_length);
cwk_path_output_sized(buffer, buffer_size, 0, new_root, new_root_length);
// Finally we calculate the size o the new path and terminate the output with
// a '\0'.
new_path_size = tail_length + new_root_length;
cwk_path_terminate_output(buffer, buffer_size, new_path_size);
return new_path_size;
}
bool cwk_path_is_absolute(const char* path)
{
size_t length;
// We grab the root of the path. This root does not include the first
// separator of a path.
cwk_path_get_root(path, &length);
// Now we can determine whether the root is absolute or not.
return cwk_path_is_root_absolute(path, length);
}
bool cwk_path_is_relative(const char* path)
{
// The path is relative if it is not absolute.
return !cwk_path_is_absolute(path);
}
void cwk_path_get_basename(const char* path, const char** basename,
size_t* length)
{
struct cwk_segment segment;
// We get the last segment of the path. The last segment will contain the
// basename if there is any. If there are no segments we will set the basename
// to NULL and the length to 0.
if (!cwk_path_get_last_segment(path, &segment)) {
*basename = NULL;
*length = 0;
return;
}
// Now we can just output the segment contents, since that's our basename.
// There might be trailing separators after the basename, but the size does
// not include those.
*basename = segment.begin;
*length = segment.size;
}
size_t cwk_path_change_basename(const char* path, const char* new_basename,
char* buffer, size_t buffer_size)
{
struct cwk_segment segment;
size_t pos, root_size, new_basename_size;
// First we try to get the last segment. We may only have a root without any
// segments, in which case we will create one.
if (!cwk_path_get_last_segment(path, &segment)) {
// So there is no segment in this path. First we grab the root and output
// that. We are not going to modify the root in any way.
cwk_path_get_root(path, &root_size);
pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size);
// We have to trim the separators from the beginning of the new basename.
// This is quite easy to do.
while (cwk_path_is_separator(new_basename)) {
++new_basename;
}
// Now we measure the length of the new basename, this is a two step
// process. First we find the '\0' character at the end of the string.
new_basename_size = 0;
while (new_basename[new_basename_size]) {
++new_basename_size;
}
// And then we trim the separators at the end of the basename until we reach
// the first valid character.
while (new_basename_size > 0 &&
cwk_path_is_separator(&new_basename[new_basename_size - 1])) {
--new_basename_size;
}
// Now we will output the new basename after the root.
pos += cwk_path_output_sized(buffer, buffer_size, pos, new_basename,
new_basename_size);
// And finally terminate the output and return the total size of the path.
cwk_path_terminate_output(buffer, buffer_size, pos);
return pos;
}
// If there is a last segment we can just forward this call, which is fairly
// easy.
return cwk_path_change_segment(&segment, new_basename, buffer, buffer_size);
}
void cwk_path_get_dirname(const char* path, size_t* length)
{
struct cwk_segment segment;
// We get the last segment of the path. The last segment will contain the
// basename if there is any. If there are no segments we will set the length
// to 0.
if (!cwk_path_get_last_segment(path, &segment)) {
*length = 0;
return;
}
// We can now return the length from the beginning of the string up to the
// beginning of the last segment.
*length = (size_t)(segment.begin - path);
}
bool cwk_path_get_extension(const char* path, const char** extension,
size_t* length)
{
struct cwk_segment segment;
const char* c;
// We get the last segment of the path. The last segment will contain the
// extension if there is any.
if (!cwk_path_get_last_segment(path, &segment)) {
return false;
}
// Now we search for a dot within the segment. If there is a dot, we consider
// the rest of the segment the extension. We do this from the end towards the
// beginning, since we want to find the last dot.
for (c = segment.end; c >= segment.begin; --c) {
if (*c == '.') {
// Okay, we found an extension. We can stop looking now.
*extension = c;
*length = (size_t)(segment.end - c);
return true;
}
}
// We couldn't find any extension.
return false;
}
bool cwk_path_has_extension(const char* path)
{
const char* extension;
size_t length;
// We just wrap the get_extension call which will then do the work for us.
return cwk_path_get_extension(path, &extension, &length);
}
size_t cwk_path_change_extension(const char* path, const char* new_extension,
char* buffer, size_t buffer_size)
{
struct cwk_segment segment;
const char* c, * old_extension;
size_t pos, root_size, trail_size, new_extension_size;
// First we try to get the last segment. We may only have a root without any
// segments, in which case we will create one.
if (!cwk_path_get_last_segment(path, &segment)) {
// So there is no segment in this path. First we grab the root and output
// that. We are not going to modify the root in any way. If there is no
// root, this will end up with a root size 0, and nothing will be written.
cwk_path_get_root(path, &root_size);
pos = cwk_path_output_sized(buffer, buffer_size, 0, path, root_size);
// Add a dot if the submitted value doesn't have any.
if (*new_extension != '.') {
pos += cwk_path_output_dot(buffer, buffer_size, pos);
}
// And finally terminate the output and return the total size of the path.
pos += cwk_path_output(buffer, buffer_size, pos, new_extension);
cwk_path_terminate_output(buffer, buffer_size, pos);
return pos;
}
// Now we seek the old extension in the last segment, which we will replace
// with the new one. If there is no old extension, it will point to the end of
// the segment.
old_extension = segment.end;
for (c = segment.begin; c < segment.end; ++c) {
if (*c == '.') {
old_extension = c;
}
}
pos = cwk_path_output_sized(buffer, buffer_size, 0, segment.path,
(size_t)(old_extension - segment.path));
// If the new extension starts with a dot, we will skip that dot. We always
// output exactly one dot before the extension. If the extension contains
// multiple dots, we will output those as part of the extension.
if (*new_extension == '.') {
++new_extension;
}
// We calculate the size of the new extension, including the dot, in order to
// output the trail - which is any part of the path coming after the
// extension. We must output this first, since the buffer may overlap with the
// submitted path - and it would be overridden by longer extensions.
new_extension_size = strlen(new_extension) + 1;
trail_size = cwk_path_output(buffer, buffer_size, pos + new_extension_size,
segment.end);
// Finally we output the dot and the new extension. The new extension itself
// doesn't contain the dot anymore, so we must output that first.
pos += cwk_path_output_dot(buffer, buffer_size, pos);
pos += cwk_path_output(buffer, buffer_size, pos, new_extension);
// Now we terminate the output with a null-terminating character, but before
// we do that we must add the size of the trail to the position which we
// output before.
pos += trail_size;
cwk_path_terminate_output(buffer, buffer_size, pos);
// And the position is our output size now.
return pos;
}
size_t cwk_path_normalize(const char* path, char* buffer, size_t buffer_size)
{
const char* paths[2];
// Now we initialize the paths which we will normalize. Since this function
// only supports submitting a single path, we will only add that one.
paths[0] = path;
paths[1] = NULL;
return cwk_path_join_and_normalize_multiple(paths, buffer, buffer_size);
}
size_t cwk_path_get_intersection(const char* path_base, const char* path_other)
{
bool absolute;
size_t base_root_length, other_root_length;
const char* end;
const char* paths_base[2], * paths_other[2];
struct cwk_segment_joined base, other;
// We first compare the two roots. We just return zero if they are not equal.
// This will also happen to return zero if the paths are mixed relative and
// absolute.
cwk_path_get_root(path_base, &base_root_length);
cwk_path_get_root(path_other, &other_root_length);
if (!cwk_path_is_string_equal(path_base, path_other, base_root_length)) {
return 0;
}
// Configure our paths. We just have a single path in here for now.
paths_base[0] = path_base;
paths_base[1] = NULL;
paths_other[0] = path_other;
paths_other[1] = NULL;
// So we get the first segment of both paths. If one of those paths don't have
// any segment, we will return 0.
if (!cwk_path_get_first_segment_joined(paths_base, &base) ||
!cwk_path_get_first_segment_joined(paths_other, &other)) {
return base_root_length;
}
// We now determine whether the path is absolute or not. This is required
// because if will ignore removed segments, and this behaves differently if
// the path is absolute. However, we only need to check the base path because
// we are guaranteed that both paths are either relative or absolute.
absolute = cwk_path_is_root_absolute(path_base, base_root_length);
// We must keep track of the end of the previous segment. Initially, this is
// set to the beginning of the path. This means that 0 is returned if the
// first segment is not equal.
end = path_base + base_root_length;
// Now we loop over both segments until one of them reaches the end or their
// contents are not equal.
do {
// We skip all segments which will be removed in each path, since we want to
// know about the true path.
if (!cwk_path_segment_joined_skip_invisible(&base, absolute) ||
!cwk_path_segment_joined_skip_invisible(&other, absolute)) {
break;
}
if (!cwk_path_is_string_equal(base.segment.begin, other.segment.begin,
base.segment.size)) {
// So the content of those two segments are not equal. We will return the
// size up to the beginning.
return (size_t)(end - path_base);
}
// Remember the end of the previous segment before we go to the next one.
end = base.segment.end;
} while (cwk_path_get_next_segment_joined(&base) &&
cwk_path_get_next_segment_joined(&other));
// Now we calculate the length up to the last point where our paths pointed to
// the same place.
return (size_t)(end - path_base);
}
bool cwk_path_get_first_segment(const char* path, struct cwk_segment* segment)
{
size_t length;
const char* segments;
// We skip the root since that's not part of the first segment. The root is
// treated as a separate entity.
cwk_path_get_root(path, &length);
segments = path + length;
// Now, after we skipped the root we can continue and find the actual segment
// content.
return cwk_path_get_first_segment_without_root(path, segments, segment);
}
bool cwk_path_get_last_segment(const char* path, struct cwk_segment* segment)
{
// We first grab the first segment. This might be our last segment as well,
// but we don't know yet. There is no last segment if there is no first
// segment, so we return false in that case.
if (!cwk_path_get_first_segment(path, segment)) {
return false;
}
// Now we find our last segment. The segment struct of the caller
// will contain the last segment, since the function we call here will not
// change the segment struct when it reaches the end.
while (cwk_path_get_next_segment(segment)) {
// We just loop until there is no other segment left.
}
return true;
}
bool cwk_path_get_next_segment(struct cwk_segment* segment)
{
const char* c;
// First we jump to the end of the previous segment. The first character must
// be either a '\0' or a separator.
c = segment->begin + segment->size;
if (*c == '\0') {
return false;
}
// Now we skip all separator until we reach something else. We are not yet
// guaranteed to have a segment, since the string could just end afterwards.
assert(cwk_path_is_separator(c));
do {
++c;
} while (cwk_path_is_separator(c));
// If the string ends here, we can safely assume that there is no other
// segment after this one.
if (*c == '\0') {
return false;
}
// Now we are safe to assume there is a segment. We store the beginning of
// this segment in the segment struct of the caller.
segment->begin = c;
// And now determine the size of this segment, and store it in the struct of
// the caller as well.
c = cwk_path_find_next_stop(c);
segment->end = c;
segment->size = (size_t)(c - segment->begin);
// Tell the caller that we found a segment.
return true;
}
bool cwk_path_get_previous_segment(struct cwk_segment* segment)
{
const char* c;
// The current position might point to the first character of the path, which
// means there are no previous segments available.
c = segment->begin;
if (c <= segment->segments) {
return false;
}
// We move towards the beginning of the path until we either reached the
// beginning or the character is no separator anymore.
do {
--c;
if (c < segment->segments) {
// So we reached the beginning here and there is no segment. So we return
// false and don't change the segment structure submitted by the caller.
return false;
}
} while (cwk_path_is_separator(c));
// We are guaranteed now that there is another segment, since we moved before
// the previous separator and did not reach the segment path beginning.
segment->end = c + 1;
segment->begin = cwk_path_find_previous_stop(segment->segments, c);
segment->size = (size_t)(segment->end - segment->begin);
return true;
}
enum cwk_segment_type cwk_path_get_segment_type(
const struct cwk_segment* segment)
{
// We just make a string comparison with the segment contents and return the
// appropriate type.
if (strncmp(segment->begin, ".", segment->size) == 0) {
return CWK_CURRENT;
} else if (strncmp(segment->begin, "..", segment->size) == 0) {
return CWK_BACK;
}
return CWK_NORMAL;
}
bool cwk_path_is_separator(const char* str)
{
const char* c;
// We loop over all characters in the read symbols.
c = separators[path_style];
while (*c) {
if (*c == *str) {
return true;
}
++c;
}
return false;
}
size_t cwk_path_change_segment(struct cwk_segment* segment, const char* value,
char* buffer, size_t buffer_size)
{
size_t pos, value_size, tail_size;
// First we have to output the head, which is the whole string up to the
// beginning of the segment. This part of the path will just stay the same.
pos = cwk_path_output_sized(buffer, buffer_size, 0, segment->path,
(size_t)(segment->begin - segment->path));
// In order to trip the submitted value, we will skip any separator at the
// beginning of it and behave as if it was never there.
while (cwk_path_is_separator(value)) {
++value;
}
// Now we determine the length of the value. In order to do that we first
// locate the '\0'.
value_size = 0;
while (value[value_size]) {
++value_size;
}
// Since we trim separators at the beginning and in the end of the value we
// have to subtract from the size until there are either no more characters
// left or the last character is no separator.
while (value_size > 0 && cwk_path_is_separator(&value[value_size - 1])) {
--value_size;
}
// We also have to determine the tail size, which is the part of the string
// following the current segment. This part will not change.
tail_size = strlen(segment->end);
// Now we output the tail. We have to do that, because if the buffer and the
// source are overlapping we would override the tail if the value is
// increasing in length.
cwk_path_output_sized(buffer, buffer_size, pos + value_size, segment->end,
tail_size);
// Finally we can output the value in the middle of the head and the tail,
// where we have enough space to fit the whole trimmed value.
pos += cwk_path_output_sized(buffer, buffer_size, pos, value, value_size);
// Now we add the tail size to the current position and terminate the output -
// basically, ensure that there is a '\0' at the end of the buffer.
pos += tail_size;
cwk_path_terminate_output(buffer, buffer_size, pos);
// And now tell the caller how long the whole path would be.
return pos;
}
enum cwk_path_style cwk_path_guess_style(const char* path)
{
const char* c;
size_t root_length;
struct cwk_segment segment;
// First we determine the root. Only windows roots can be longer than a single
// slash, so if we can determine that it starts with something like "C:", we
// know that this is a windows path.
cwk_path_get_root_windows(path, &root_length);
if (root_length > 1) {
return CWK_STYLE_WINDOWS;
}
// Next we check for slashes. Windows uses backslashes, while unix uses
// forward slashes. Windows actually supports both, but our best guess is to
// assume windows with backslashes and unix with forward slashes.
for (c = path; *c; ++c) {
if (*c == *separators[CWK_STYLE_UNIX]) {
return CWK_STYLE_UNIX;
} else if (*c == *separators[CWK_STYLE_WINDOWS]) {
return CWK_STYLE_WINDOWS;
}
}
// This path does not have any slashes. We grab the last segment (which
// actually must be the first one), and determine whether the segment starts
// with a dot. A dot is a hidden folder or file in the UNIX world, in that
// case we assume the path to have UNIX style.
if (!cwk_path_get_last_segment(path, &segment)) {
// We couldn't find any segments, so we default to a UNIX path style since
// there is no way to make any assumptions.
return CWK_STYLE_UNIX;
}
if (*segment.begin == '.') {
return CWK_STYLE_UNIX;
}
// And finally we check whether the last segment contains a dot. If it
// contains a dot, that might be an extension. Windows is more likely to have
// file names with extensions, so our guess would be windows.
for (c = segment.begin; *c; ++c) {
if (*c == '.') {
return CWK_STYLE_WINDOWS;
}
}
// All our checks failed, so we will return a default value which is currently
// UNIX.
return CWK_STYLE_UNIX;
}
void cwk_path_set_style(enum cwk_path_style style)
{
// We can just set the global path style variable and then the behaviour for
// all functions will change accordingly.
assert(style == CWK_STYLE_UNIX || style == CWK_STYLE_WINDOWS);
path_style = style;
}
enum cwk_path_style cwk_path_get_style(void)
{
// Simply return the path style which we store in a global variable.
return path_style;
}