// Character comparison modes: (choose exactly one!)
#define NATSORT_CMP_STRING_NOCASE
//#define NATSORT_CMP_STRING
//#define NATSORT_CMP_CHAR_NOCASE
//#define NATSORT_CMP_CHAR
// Additional features:
#define NATSORT_WITH_SPECIAL
#define NATSORT_WITH_NEGATIVE
///
/// Compares 2 strings in natural order.
///
///
///
/// Comparison result. 0: both strings are equal, negative: s1 < s2, positive: s1 > s2
int NatCompare(string s1, string s2)
{
// Implementation notes:
// * Sorting support:
// This function can sort strings with numbers in the order of their numeric value,
// not just each character's value in their strict order. Negative numbers are
// supported only at the beginning of each string. Leading zeros and special
// characters are ignored for sorting but are given a preference if both strings
// would be equal otherwise (to produce deterministic results).
// * Safe use of indices:
// Only access a character at a string position, when it is clear that the index is
// within the allowed range.
// * Produce deterministic results:
// Whenever the order of processing s1 and s2 is not irrelevant, i.e. exchanging them,
// the resulting sort order may not be deterministic and depend on the order of the
// unsorted data.
// Special cases
if (s1 == null && s2 == null) return 0;
if (s1 == null) return -1;
if (s2 == null) return 1;
int pos1 = 0, pos2 = 0; // Current string positions
char c1, c2; // Characters at current string positions
int len1 = s1.Length;
int len2 = s2.Length;
// More special cases
if (len1 == 0 && len2 == 0) return 0;
if (len1 == 0) return -1;
if (len2 == 0) return 1;
// Negative number flags
#if NATSORT_WITH_NEGATIVE
bool neg1 = len1 > 1 && s1[0] == '-' && Char.IsDigit(s1[1]);
bool neg2 = len2 > 1 && s2[0] == '-' && Char.IsDigit(s2[1]);
#else
bool neg1 = false, neg2 = false;
#endif
int preference = 0; // String preference value: -1 prefers s1, 1 prefers s2 (see function return value)
bool digit1, digit2;
int endnum1, endnum2; // Where the numbers end in the string
int numpos1, numpos2; // Current comparison position in a number
int lz1, lz2; // Number of leading zeros we've seen until then
#if NATSORT_WITH_SPECIAL
// These characters will be skipped in comparison
const string special = " '\"";
#endif
// Main loop:
do
{
#if NATSORT_WITH_SPECIAL
// Check for non-letter characters and skip them
int skipped1 = 0;
int skipped2 = 0;
while (special.IndexOf(s1[pos1 + skipped1]) != -1 && pos1 + skipped1 < len1 - 1)
{
skipped1++;
}
while (special.IndexOf(s2[pos2 + skipped2]) != -1 && pos2 + skipped2 < len2 - 1)
{
skipped2++;
}
if (preference == 0) preference = skipped1 - skipped2;
if (preference == 0 && skipped1 == skipped2)
{
// Skipped none or equal number of special characters: compare them for a preference
for (int i = 0; i < skipped1; i++)
{
int c = s1[pos1 + i].CompareTo(s2[pos2 + i]);
if (c != 0)
{
preference = c;
break;
}
}
}
pos1 += skipped1;
pos2 += skipped2;
#endif
c1 = s1[pos1];
c2 = s2[pos2];
// Check if we have digits in both strings; also accept '-' at the beginning
digit1 = Char.IsDigit(c1) || (pos1 == 0 && neg1);
digit2 = Char.IsDigit(c2) || (pos2 == 0 && neg2);
if (!digit1 || !digit2)
{
// At least one of them is no digit: compare by character
// If one of them is a negative number, replace the '-' by a digit to keep negative
// and positive numbers together, not separated by other symbols
if (pos1 == 0 && neg1) c1 = '0'; // c1 was '-'
if (pos2 == 0 && neg2) c2 = '0'; // c2 was '-'
#if NATSORT_CMP_STRING_NOCASE
int cmp = string.Compare(c1.ToString(), c2.ToString(), StringComparison.CurrentCultureIgnoreCase);
#elif NATSORT_CMP_STRING
int cmp = string.Compare(c1.ToString(), c2.ToString(), StringComparison.CurrentCulture);
#elif NATSORT_CMP_CHAR_NOCASE
int cmp = char.ToLower(c1).CompareTo(char.ToLower(c2));
#elif NATSORT_CMP_CHAR
int cmp = c1.CompareTo(c2);
#endif
if (cmp != 0) return cmp; // We have a string difference at this point
#if NATSORT_CMP_STRING_NOCASE || NATSORT_CMP_STRING
// Both characters are the same, compare them by value again, to make a preference
if (preference == 0 && c1 < c2) preference = -1;
if (preference == 0 && c1 > c2) preference = 1;
#endif
}
else
{
// Both characters are digits: find the whole number value
endnum1 = pos1 + 1;
while (endnum1 < len1 && Char.IsDigit(s1[endnum1])) endnum1++;
endnum1--;
endnum2 = pos2 + 1;
while (endnum2 < len2 && Char.IsDigit(s2[endnum2])) endnum2++;
endnum2--;
// Compare both strings from the end of their number leftwards
numpos1 = endnum1;
numpos2 = endnum2;
lz1 = lz2 = 0;
#if NATSORT_WITH_NEGATIVE
int negfactor = (pos1 == 0 && (neg1 || neg2)) ? -1 : 1;
// This implies (pos2 == 0)
// Negative number factor, only valid at the beginning of the strings
#endif
int numcmp = 0;
do
{
c1 = numpos1 >= 0 ? s1[numpos1] : '0';
c2 = numpos2 >= 0 ? s2[numpos2] : '0';
// See if we're still in the number
if (numpos1 < pos1 || c1 == '-')
{
// We left the number in s1: assume '0'
c1 = '0';
}
if (numpos2 < pos2 || c2 == '-')
{
// We left the number in s2: assume '0'
c2 = '0';
}
int cmp = c1.CompareTo(c2);
if (cmp != 0) numcmp = cmp; // We have a numeric difference at this point, keep it for later
#if NATSORT_WITH_NEGATIVE
numcmp *= negfactor;
#endif
// Add '0' to the number of leading zeros, or reset the counter
if (c1 == '0') lz1++; else lz1 = 0;
if (c2 == '0') lz2++; else lz2 = 0;
// Now both digits are the same, try with the next to the left
numpos1--;
numpos2--;
}
while (numpos1 >= pos1 || numpos2 >= pos2);
// Loop as long as one of the strings is still in the number
#if NATSORT_WITH_NEGATIVE
if (negfactor == -1 && neg1 ^ neg2)
{
// Only one of both numbers is negative: make a decision
if (neg1) return -1;
return 1;
}
#endif
// Traversed both numbers, did we encounter a numeric difference?
if (numcmp != 0) return numcmp;
// We're still here, so both numbers have the same value
// Is one of them longer? Then prefer that one (if not already prefering something)
if (preference == 0 && lz1 > lz2) preference = -1;
if (preference == 0 && lz1 < lz2) preference = 1;
// Continue character-wise right after the last digit of the number:
// Set the current pointer to the last digit position, it's increased later
pos1 = endnum1;
pos2 = endnum2;
}
// No decision yet, go to the next character
pos1++;
pos2++;
}
while (pos1 < len1 && pos2 < len2);
// Out of the main loop:
// Which of both strings (if any) would continue?
if (pos1 < len1) return 1; // s1 is longer than this
if (pos2 < len2) return -1; // s2 is longer than this
// Both strings end here: Do we have a preference? If not, they're equal.
return preference;
}