Author Topic: split.h ... a C emulation of the Python spilt function to parse a string ...  (Read 20893 times)

Offline David

  • Hero Member
  • *****
  • Posts: 647
    • View Profile
This C function let's you readily split a C string into a Clist of words (tokens - dynamic C strings), using your supplied DELIMITS string ...

Links to some example programs that demo the use of split.h ... follow ...

Code: [Select]
/* split.h */  /* this version: 2016-10-09 */


/* http://developers-heaven.net/forum/index.php/topic,46.0.html */


#ifndef dwSPLIT_H
#define dwSPLIT_H

#ifndef DELIMITS
#define DELIMITS  " \t"
#endif

#define NUM_DLMTS  sizeof(DELIMITS) -1


/* adds readLine.h which adds stdio.h, stdlib.h, string.h, myAssert, newCopy */
#include "ClistOfString.h" /* also adds "Clist.h" */

#ifndef PUSH_CLIST
#define PUSH_CLIST  push_backClist
#endif


void split( Clist* lst, const char* p1 )
{
    const char* p2;
    Node ml; /* ml is really a ... node/element/record in a Node ... */
    for( ; ; ) /* loop forever ... until break */
    {
        while( *p1 != 0 && strchr(DELIMITS, *p1) ) ++p1;
        if( *p1 == 0 ) break; /* i.e. if empty or all delimits */

        p2 = p1+1;
        while( *p2 != 0 && !strchr(DELIMITS, *p2) ) ++p2;
        ml.str = newsubstr( p1, 0, p2-p1 );
        PUSH_CLIST( lst, &ml ); /* default is push_backClist */
        p1 = p2;
    }
}


#endif

« Last Edit: October 09, 2016, 09:15:22 AM by David »

Offline David

  • Hero Member
  • *****
  • Posts: 647
    • View Profile
Here is a link to a demo program that uses split.h ... (Note: split.h includes ClistOfString.h ... and ClistOfString.h includes readLine.h and List.h ... and readLine.h includes stdio.h, stdlib.h, string.h and defines myAssert and newCopy)

http://developers-heaven.net/forum/index.php/topic,466.msg676.html#msg676

CListOfString.h
http://developers-heaven.net/forum/index.php/topic,2582.msg2882.html#msg2882

readLine.h
http://developers-heaven.net/forum/index.php/topic,2580.msg2864.html#msg2864

Clist.h
http://developers-heaven.net/forum/index.php/topic,2582.msg2877.html#msg2877

Another example program that uses split.h to count the words in some text ...
http://www.dreamincode.net/forums/topic/247058-writing-a-c-program-to-check-if-a-given-word-is-repeated/page__pid__1436894__st__15&#entry1436894
« Last Edit: September 16, 2011, 05:18:57 PM by David »

Offline David

  • Hero Member
  • *****
  • Posts: 647
    • View Profile
Here is a C++ version ... also includes trim functions for right trim, left trim ... or trim both sides ...

Code: [Select]
// void_string_strip_split.cpp
// this version 2010-05-10

// http://developers-heaven.net/forum/index.php/topic,46.0.html

#include <iostream>
#include <string>
#include <list>

using namespace std;

#define trim strip
#define ltrim lstrip
#define rtrim rstrip

// trim leading and trailing whitespaces from 's' ... and return by 'ref.'
void strip( string& s, const string t = " \t" ) // default whitespace: "\t "
{
    size_t p1 = s.find_first_not_of( t ); // get index of 'first char' ...
    if( string::npos != p1  ) // ok ... not all ws or empty ... so can safely
    {
        s.erase( 0, p1);
        size_t p2 = s.find_last_not_of( t ); // get index of 'last char' ...
        s.erase( p2+1 );
    }
    else // ... all whitespaces or empty
        s.clear();
}

// trim trailing whitespaces only ...
void rstrip( string& s, const string t = " \t" )
{
    size_t p2 = s.find_last_not_of( t );
    if( string::npos != p2 )
        s.erase( p2+1 );
    else
        s.clear();
}

// trim leading whitespaces only ...
void lstrip( string& s, const string t = " \t" )
{
    size_t p1 = s.find_first_not_of( t );
    if( string::npos != p1 )
        s.erase( 0, p1 );
    else
        s.clear();
}

void split( list<string>& lst, const string& s, const string delimits = " \t" )
{
    size_t p1, p2 = 0;
    for( ; ; ) // loop forever ... until break
    {
        p1 = s.find_first_not_of( delimits, p2 ); // Note: p2 is 0 on first loop
        if( string::npos == p1 ) break; // i.e. if empty or all delimits

        p2 = s.find_first_of( delimits, p1+1 );
        if( string::npos != p2 ) // i.e. if still more ... p2 is not past end
            lst.push_back( s.substr( p1, p2-p1 ) );
        else
        {
            lst.push_back( s.substr( p1 ) );
            break;
        }
    }
}


int main()
{
    string s, t = "\t   leading and trailing whitespaces\t   ";
    s = t;
    cout << "NO strip: '" << s <<"'\n";
    strip( s );
    cout << "   strip: '" << s <<"'\n";
    s = t;
    rstrip( s );
    cout << "  rstrip: '" << s <<"'\n";
    s = t;
    lstrip( s );
    cout << "  lstrip: '" << s <<"'\n";
   
    string t2 = "       \t    \t    \t      ";
    s = t2;
    cout << "NO trim: '" << s <<"'\n";
    trim( s );
    cout << "   trim: '" << s <<"'\n";
    s = t2;
    rtrim( s );
    cout << "  rtrim: '" << s <<"'\n";
    s = t2;
    ltrim( s );
    cout << "  ltrim: '" << s <<"'\n";
   
   
    list < string > myList;
    split( myList, t );
    cout << "\n\nmyList.size() = "<< myList.size() << endl;
   
    list < string > :: const_iterator it;
    int i = 0;
    for( it = myList.begin(); it != myList.end(); ++ it, ++ i)
        cout << i << ": " << *it << endl;

    cout << "\nPress 'Enter' to continue ... " << flush;
    cin.get();
}


A version of the above that returns the new list of strings ...
Code: [Select]
// string_strip_split.cpp
// this version 2010-05-10

// http://developers-heaven.net/forum/index.php/topic,46.0.html

#include <iostream>
#include <string>
#include <list>

using namespace std;

#define trim strip
#define ltrim lstrip
#define rtrim rstrip

// trim leading and trailing whitespaces from returned copy of 's'...
string strip( const string& s, const string t = " \t" )
{
    size_t p1 = s.find_first_not_of( t );
    if( string::npos != p1  ) // ok ... not all ws or empty ... so can safely
    {
        size_t p2 = s.find_last_not_of( t ); // get index of 'last char' ...
        return s.substr( p1, p2+1-p1 );
    }
    // else ... all whitespaces or empty ... so return an empty string
    return "";
}

// trim leading whitespaces only ...
string lstrip( const string& s, const string t = " \t" )
{
    size_t p1 = s.find_first_not_of( t );
    if( string::npos != p1 )
        return s.substr( p1 );
    // else ...
    return "";
}

// trim trailing whitespaces only ...
string rstrip( const string& s, const string t = " \t" )
{
    size_t p2 = s.find_last_not_of( t );
    if( string::npos != p2 )
        return s.substr( 0, p2+1 );
    // else ...
    return "";
}

list < string > split( const string& s, const string delimits = " \t" )
{
    list < string > tmp;
    size_t p1, p2 = 0;
    for( ; ; ) // loop forever ... until break
    {
        p1 = s.find_first_not_of( delimits, p2 ); // Note: p2 is 0 on first loop
        if( string::npos == p1 ) break; // i.e. if empty or all delimits

        p2 = s.find_first_of( delimits, p1+1 );
        if( string::npos != p2 ) // i.e. if still more ... p2 is not past end
            tmp.push_back( s.substr( p1, p2-p1 ) );
        else
        {
            tmp.push_back( s.substr( p1 ) );
            break;
        }
    }
    return tmp;
}


int main()
{
    string s = "\t   leading and trailing whitespaces\t   ";
    string t = s;
   
    cout << "NO strip: '" << s <<"'\n"
         << "   strip: '" << strip( s ) <<"'\n"
         << "  rstrip: '" << rstrip( s ) <<"'\n"
         << "  lstrip: '" << lstrip( s ) <<"'\n";
         
    s = " \t  \t  \t  ";
   
    cout << "NO strip: '" << s <<"'\n"
         << "   strip: '" << trim( s ) <<"'\n"
         << "  rstrip: '" << rtrim( s ) <<"'\n"
         << "  lstrip: '" << ltrim( s ) <<"'\n";


    list < string > mylist = split( t ); // testing split with default delimiters
    cout << "\n\nmylist.size() = "<< mylist.size() << endl;
   
    list <string > :: const_iterator it;
    int i = 0;
    for( it = mylist.begin(); it != mylist.end(); ++ it, ++ i)
        cout << i << ": " << *it << endl;
       
    cout << "\nPress 'Enter' to continue ... " << flush;
    cin.get();
}
« Last Edit: November 30, 2011, 07:00:04 AM by David »