Developers Heaven Forum
Desktop Programming => C/C++ & Visual C++ => Topic started by: David on July 19, 2011, 01:17:05 AM
-
This C function let's you readily split a C string into a Clist of words (tokens - dynamic C strings), using your supplied DELIMITS string ...
Links to some example programs that demo the use of split.h ... follow ...
/* split.h */ /* this version: 2016-10-09 */
/* http://developers-heaven.net/forum/index.php/topic,46.0.html */
#ifndef dwSPLIT_H
#define dwSPLIT_H
#ifndef DELIMITS
#define DELIMITS " \t"
#endif
#define NUM_DLMTS sizeof(DELIMITS) -1
/* adds readLine.h which adds stdio.h, stdlib.h, string.h, myAssert, newCopy */
#include "ClistOfString.h" /* also adds "Clist.h" */
#ifndef PUSH_CLIST
#define PUSH_CLIST push_backClist
#endif
void split( Clist* lst, const char* p1 )
{
const char* p2;
Node ml; /* ml is really a ... node/element/record in a Node ... */
for( ; ; ) /* loop forever ... until break */
{
while( *p1 != 0 && strchr(DELIMITS, *p1) ) ++p1;
if( *p1 == 0 ) break; /* i.e. if empty or all delimits */
p2 = p1+1;
while( *p2 != 0 && !strchr(DELIMITS, *p2) ) ++p2;
ml.str = newsubstr( p1, 0, p2-p1 );
PUSH_CLIST( lst, &ml ); /* default is push_backClist */
p1 = p2;
}
}
#endif
-
Here is a link to a demo program that uses split.h ... (Note: split.h includes ClistOfString.h ... and ClistOfString.h includes readLine.h and List.h ... and readLine.h includes stdio.h, stdlib.h, string.h and defines myAssert and newCopy)
http://developers-heaven.net/forum/index.php/topic,466.msg676.html#msg676
CListOfString.h
http://developers-heaven.net/forum/index.php/topic,2582.msg2882.html#msg2882
readLine.h
http://developers-heaven.net/forum/index.php/topic,2580.msg2864.html#msg2864
Clist.h
http://developers-heaven.net/forum/index.php/topic,2582.msg2877.html#msg2877
Another example program that uses split.h to count the words in some text ...
http://www.dreamincode.net/forums/topic/247058-writing-a-c-program-to-check-if-a-given-word-is-repeated/page__pid__1436894__st__15&#entry1436894
-
Here is a C++ version ... also includes trim functions for right trim, left trim ... or trim both sides ...
// void_string_strip_split.cpp
// this version 2010-05-10
// http://developers-heaven.net/forum/index.php/topic,46.0.html
#include <iostream>
#include <string>
#include <list>
using namespace std;
#define trim strip
#define ltrim lstrip
#define rtrim rstrip
// trim leading and trailing whitespaces from 's' ... and return by 'ref.'
void strip( string& s, const string t = " \t" ) // default whitespace: "\t "
{
size_t p1 = s.find_first_not_of( t ); // get index of 'first char' ...
if( string::npos != p1 ) // ok ... not all ws or empty ... so can safely
{
s.erase( 0, p1);
size_t p2 = s.find_last_not_of( t ); // get index of 'last char' ...
s.erase( p2+1 );
}
else // ... all whitespaces or empty
s.clear();
}
// trim trailing whitespaces only ...
void rstrip( string& s, const string t = " \t" )
{
size_t p2 = s.find_last_not_of( t );
if( string::npos != p2 )
s.erase( p2+1 );
else
s.clear();
}
// trim leading whitespaces only ...
void lstrip( string& s, const string t = " \t" )
{
size_t p1 = s.find_first_not_of( t );
if( string::npos != p1 )
s.erase( 0, p1 );
else
s.clear();
}
void split( list<string>& lst, const string& s, const string delimits = " \t" )
{
size_t p1, p2 = 0;
for( ; ; ) // loop forever ... until break
{
p1 = s.find_first_not_of( delimits, p2 ); // Note: p2 is 0 on first loop
if( string::npos == p1 ) break; // i.e. if empty or all delimits
p2 = s.find_first_of( delimits, p1+1 );
if( string::npos != p2 ) // i.e. if still more ... p2 is not past end
lst.push_back( s.substr( p1, p2-p1 ) );
else
{
lst.push_back( s.substr( p1 ) );
break;
}
}
}
int main()
{
string s, t = "\t leading and trailing whitespaces\t ";
s = t;
cout << "NO strip: '" << s <<"'\n";
strip( s );
cout << " strip: '" << s <<"'\n";
s = t;
rstrip( s );
cout << " rstrip: '" << s <<"'\n";
s = t;
lstrip( s );
cout << " lstrip: '" << s <<"'\n";
string t2 = " \t \t \t ";
s = t2;
cout << "NO trim: '" << s <<"'\n";
trim( s );
cout << " trim: '" << s <<"'\n";
s = t2;
rtrim( s );
cout << " rtrim: '" << s <<"'\n";
s = t2;
ltrim( s );
cout << " ltrim: '" << s <<"'\n";
list < string > myList;
split( myList, t );
cout << "\n\nmyList.size() = "<< myList.size() << endl;
list < string > :: const_iterator it;
int i = 0;
for( it = myList.begin(); it != myList.end(); ++ it, ++ i)
cout << i << ": " << *it << endl;
cout << "\nPress 'Enter' to continue ... " << flush;
cin.get();
}
A version of the above that returns the new list of strings ...
// string_strip_split.cpp
// this version 2010-05-10
// http://developers-heaven.net/forum/index.php/topic,46.0.html
#include <iostream>
#include <string>
#include <list>
using namespace std;
#define trim strip
#define ltrim lstrip
#define rtrim rstrip
// trim leading and trailing whitespaces from returned copy of 's'...
string strip( const string& s, const string t = " \t" )
{
size_t p1 = s.find_first_not_of( t );
if( string::npos != p1 ) // ok ... not all ws or empty ... so can safely
{
size_t p2 = s.find_last_not_of( t ); // get index of 'last char' ...
return s.substr( p1, p2+1-p1 );
}
// else ... all whitespaces or empty ... so return an empty string
return "";
}
// trim leading whitespaces only ...
string lstrip( const string& s, const string t = " \t" )
{
size_t p1 = s.find_first_not_of( t );
if( string::npos != p1 )
return s.substr( p1 );
// else ...
return "";
}
// trim trailing whitespaces only ...
string rstrip( const string& s, const string t = " \t" )
{
size_t p2 = s.find_last_not_of( t );
if( string::npos != p2 )
return s.substr( 0, p2+1 );
// else ...
return "";
}
list < string > split( const string& s, const string delimits = " \t" )
{
list < string > tmp;
size_t p1, p2 = 0;
for( ; ; ) // loop forever ... until break
{
p1 = s.find_first_not_of( delimits, p2 ); // Note: p2 is 0 on first loop
if( string::npos == p1 ) break; // i.e. if empty or all delimits
p2 = s.find_first_of( delimits, p1+1 );
if( string::npos != p2 ) // i.e. if still more ... p2 is not past end
tmp.push_back( s.substr( p1, p2-p1 ) );
else
{
tmp.push_back( s.substr( p1 ) );
break;
}
}
return tmp;
}
int main()
{
string s = "\t leading and trailing whitespaces\t ";
string t = s;
cout << "NO strip: '" << s <<"'\n"
<< " strip: '" << strip( s ) <<"'\n"
<< " rstrip: '" << rstrip( s ) <<"'\n"
<< " lstrip: '" << lstrip( s ) <<"'\n";
s = " \t \t \t ";
cout << "NO strip: '" << s <<"'\n"
<< " strip: '" << trim( s ) <<"'\n"
<< " rstrip: '" << rtrim( s ) <<"'\n"
<< " lstrip: '" << ltrim( s ) <<"'\n";
list < string > mylist = split( t ); // testing split with default delimiters
cout << "\n\nmylist.size() = "<< mylist.size() << endl;
list <string > :: const_iterator it;
int i = 0;
for( it = mylist.begin(); it != mylist.end(); ++ it, ++ i)
cout << i << ": " << *it << endl;
cout << "\nPress 'Enter' to continue ... " << flush;
cin.get();
}