bug fix: new ares_strsplit (#492)

* add ares_strsplit unit test

The test reveals a bug in the implementation of ares_strsplit when the
make_set parameter is set to 1, as distinct domains are confused for
equal:

  out = ares_strsplit("example.com, example.co", ", ", 1, &n);

evaluates to n = 1 with out = { "example.com" }.

* bugfix and cleanup of ares_strsplit

The purpose of ares_strsplit in c-ares is to split a comma-delimited
string of unique (up to letter case) domains. However, because the
terminating NUL byte was not checked in the substrings when comparing
for uniqueness, the function would sometimes drop domains it should
not. For example,

    ares_strsplit("example.com, example.co", ",")

would only result in a single domain "example.com".

Aside from this bugfix, the following cleanup is performed:

1. The tokenization now happens with the help of strcspn instead of the
   custom function is_delim.
2. The function list_contains has been inlined.
3. The interface of ares_strsplit has been simplified by removing the
   parameter make_set since in practice it was always 1.
4. There are fewer passes over the input string.
5. We resize the table using realloc() down to its minimum size.
6. The docstring of ares_strsplit is updated and also a couple typos
   are fixed.

There occurs a single use of ares_strsplit and since the make_set
parameter has been removed, the call in ares_init.c is modified
accordingly. The unit test for ares_strsplit is also updated.

Fix By: Nikolaos Chatzikonstantinou (@createyourpersonalaccount)
pull/495/head^2
Nikolaos Chatzikonstantinou 2 years ago committed by GitHub
parent 4b7301a0d2
commit 313bd2aa7e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      src/lib/ares_init.c
  2. 174
      src/lib/ares_strsplit.c
  3. 15
      src/lib/ares_strsplit.h
  4. 42
      test/ares-test-internal.cc

@ -2000,7 +2000,7 @@ static int set_search(ares_channel channel, const char *str)
channel->ndomains = -1;
} /* LCOV_EXCL_STOP */
channel->domains = ares_strsplit(str, ", ", 1, &cnt);
channel->domains = ares_strsplit(str, ", ", &cnt);
channel->ndomains = (int)cnt;
if (channel->domains == NULL || channel->ndomains == 0) {
channel->domains = NULL;

@ -22,46 +22,6 @@
#include "ares.h"
#include "ares_private.h"
static int list_contains(char * const *list, size_t num_elem, const char *str, int insensitive)
{
size_t len;
size_t i;
len = strlen(str);
for (i=0; i<num_elem; i++)
{
if (insensitive)
{
#ifdef WIN32
if (strnicmp(list[i], str, len) == 0)
#else
if (strncasecmp(list[i], str, len) == 0)
#endif
return 1;
}
else
{
if (strncmp(list[i], str, len) == 0)
return 1;
}
}
return 0;
}
static int is_delim(char c, const char *delims, size_t num_delims)
{
size_t i;
for (i=0; i<num_delims; i++)
{
if (c == delims[i])
return 1;
}
return 0;
}
void ares_strsplit_free(char **elms, size_t num_elm)
{
size_t i;
@ -74,105 +34,67 @@ void ares_strsplit_free(char **elms, size_t num_elm)
ares_free(elms);
}
char **ares_strsplit(const char *in, const char *delms, int make_set, size_t *num_elm)
{
char *parsestr;
char **temp;
char **out;
size_t cnt;
size_t nelms;
size_t in_len;
size_t num_delims;
size_t i;
char **ares_strsplit(const char *in, const char *delms, size_t *num_elm) {
const char *p;
char **table;
void *tmp;
size_t i, j, k, count;
if (in == NULL || delms == NULL || num_elm == NULL)
return NULL;
*num_elm = 0;
in_len = strlen(in);
num_delims = strlen(delms);
/* Figure out how many elements. */
nelms = 1;
for (i=0; i<in_len; i++)
{
if (is_delim(in[i], delms, num_delims))
{
nelms++;
/* count non-empty delimited substrings */
count = 0;
p = in;
do {
i = strcspn(p, delms);
if (i != 0) {
/* string is non-empty */
count++;
p += i;
}
}
} while(*p++ != 0);
/* Copy of input so we can cut it up. */
parsestr = ares_strdup(in);
if (parsestr == NULL)
if (count == 0)
return NULL;
/* Temporary array to store locations of start of each element
* within parsestr. */
temp = ares_malloc(nelms * sizeof(*temp));
if (temp == NULL)
{
ares_free(parsestr);
table = ares_malloc(count * sizeof(*table));
if (table == NULL)
return NULL;
}
temp[0] = parsestr;
cnt = 1;
for (i=0; i<in_len && cnt<nelms; i++)
{
if (!is_delim(parsestr[i], delms, num_delims))
continue;
/* Replace sep with NULL. */
parsestr[i] = '\0';
/* Add the pointer to the array of elements */
temp[cnt] = parsestr+i+1;
cnt++;
}
/* Copy each element to our output array. */
out = ares_malloc(nelms * sizeof(*out));
if (out == NULL)
{
ares_free(parsestr);
ares_free(temp);
return NULL;
}
nelms = 0;
for (i=0; i<cnt; i++)
{
if (temp[i][0] == '\0')
continue;
if (make_set && list_contains(out, nelms, temp[i], 1))
continue;
out[nelms] = ares_strdup(temp[i]);
if (out[nelms] == NULL)
{
ares_strsplit_free(out, nelms);
ares_free(parsestr);
ares_free(temp);
return NULL;
j = 0; /* current table entry */
/* re-calculate indices and allocate new strings for table */
for (p = in; j < count; p += i + 1) {
i = strcspn(p, delms);
if (i != 0) {
for (k = 0; k < j; k++) {
#ifdef WIN32
if (strnicmp(table[k], p, i) == 0 && table[k][i] == 0)
break;
#else
if (strncasecmp(table[k], p, i) == 0 && table[k][i] == 0)
break;
#endif
}
if (k == j) {
/* copy unique strings only */
table[j] = ares_malloc(i + 1);
if (table[j] == NULL) {
ares_strsplit_free(table, j);
return NULL;
}
strncpy(table[j], p, i);
table[j++][i] = 0;
} else
count--;
}
nelms++;
}
/* If there are no elements don't return an empty allocated
* array. */
if (nelms == 0)
{
ares_strsplit_free(out, nelms);
out = NULL;
}
/* Get the true number of elements (recalculated because of make_set) */
*num_elm = nelms;
tmp = ares_realloc(table, count * sizeof (*table));
if (tmp != NULL)
table = tmp;
ares_free(parsestr);
ares_free(temp);
return out;
*num_elm = count;
return table;
}

@ -18,22 +18,21 @@
#include "ares_setup.h"
/* Split a string on delem skipping empty elements.
/* Split a string on delms skipping empty or duplicate elements.
*
* param in String to split.
* param delims String of characters to treat as a delimitor.
* Each character in the string is a delimitor so
* there can be multiple delimitors to split on.
* E.g. ", " will split on all comma's and spaces.
* param make_set Have the list be a Set where there are no
* duplicate entries. 1 for true, 0 or false.
* param delms String of characters to treat as a delimitor.
* Each character in the string is a delimitor so
* there can be multiple delimitors to split on.
* E.g. ", " will split on all comma's and spaces.
* Duplicate entries are removed.
* param num_elm Return parameter of the number of elements
* in the result array.
*
* returns an allocated array of allocated string elements.
*
*/
char **ares_strsplit(const char *in, const char *delms, int make_set, size_t *num_elm);
char **ares_strsplit(const char *in, const char *delms, size_t *num_elm);
/* Frees the result returned from ares_strsplit(). */
void ares_strsplit_free(char **elms, size_t num_elm);

@ -19,6 +19,7 @@ extern "C" {
#include "ares_nowarn.h"
#include "ares_inet_net_pton.h"
#include "ares_data.h"
#include "ares_strsplit.h"
#include "ares_private.h"
#include "bitncmp.h"
@ -47,6 +48,47 @@ void CheckPtoN4(int size, unsigned int value, const char *input) {
}
#endif
#ifndef CARES_SYMBOL_HIDING
TEST_F(LibraryTest, Strsplit) {
using std::vector;
using std::string;
size_t n;
struct {
vector<string> inputs;
vector<string> delimiters;
vector<vector<string>> expected;
} data = {
{
"",
" ",
" ",
"example.com, example.co",
" a, b, A,c, d, e,,,D,e,e,E",
},
{ ", ", ", ", ", ", ", ", ", " },
{
{}, {}, {},
{ "example.com", "example.co" },
{ "a", "b", "c", "d", "e" },
},
};
for(size_t i = 0; i < data.inputs.size(); i++) {
char **out = ares_strsplit(data.inputs.at(i).c_str(),
data.delimiters.at(i).c_str(), &n);
if(data.expected.at(i).size() == 0) {
EXPECT_EQ(out, nullptr);
}
else {
EXPECT_EQ(n, data.expected.at(i).size());
for(size_t j = 0; j < n && j < data.expected.at(i).size(); j++) {
EXPECT_STREQ(out[j], data.expected.at(i).at(j).c_str());
}
}
ares_strsplit_free(out, n);
}
}
#endif
TEST_F(LibraryTest, InetPtoN) {
struct in_addr a4;
struct in6_addr a6;

Loading…
Cancel
Save