Merging ... using a linked-list, insertsort and fgets to read each line (no struct) ...
(and to sort lines using an offset into that line)
(placed here, since wouldn't fit above, with other files merging/sorting examples.)
/* mergeTwoFiles_fgets.c */ /* this version 2010-05-25 */
/*
C demo of merging two files into a new sorted file ... using a linked-
list ... by inserting lines, in sorted order, into the (growing) list.
Note: using while( fgets(buffer, MAXLEN+1, fp) ) to read each file line,
so ... max line length is limited to the define value of MAXLEN
Pick MAXLEN so '\n' char is always included at the end of each line read in
http://developers-heaven.net/forum/index.php/topic,46.0.html
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h> /* strlen, strcpy */
#define fName1 "info1.txt"
#define fName2 "info2.txt"
#define fName3 "merge.txt"
#define MAXLEN 132
#define sort_offset 12 /* so files will be sorted by names ... */
/*
info1.txt
09876 year1 Smith Catee Rose
08765 year2 Clinton Hilary Claire
07654 year3 Barrack Richard Kenneth
info2.txt
08967 year1 Edison Bill
06789 year2 Bell Robert Ed
merge.txt
07654 year3 Barrack Richard Kenneth
06789 year2 Bell Robert Ed
08765 year2 Clinton Hilary Claire
08967 year1 Edison Bell
09876 year1 Smith Catee Rose
*/
typedef struct myStudent
{
char* dataLine;
struct myStudent* next;
} Student ;
typedef Student *pStudent;
/* using these two global variable to ease function calls ... */
pStudent pHead = NULL; /* Note: needs a NULL value to start inserts ... */
int numStudents = 0;
void insert( pStudent pS );
int readFile( char* name );
int writeFile( char* name );
int studentCmp( pStudent pS1, pStudent pS2 );
char* newCopy( char* str );
void view( pStudent pS );
void showAll();
void delAll();
void flushStdin();
void myAssert( int condition, char nessage[] );
int main() /* ********************* start of main *************************** */
{
int num = readFile( fName1 );
if( num )
numStudents += num;
num = readFile( fName2 );
if( num )
numStudents += num;
//showAll();
num = writeFile( fName3 );
if( num == numStudents )
printf
(
"All %d students were successfully sorted and merged in file %s.",
num, fName3
);
else
printf
(
"Error: writing to file %s Note: wrote %d vs. numStudents of %d",
fName3, num, numStudents
);
delAll();
printf("\n\nPress 'Enter' to continue ... ");
flushStdin();
system( "notepad merge.txt" );
return 0;
} /* ******************************* end of main **************************** */
void flushStdin()
{
while( getchar()!='\n' ) ;
}
char* newCopy( char* str )
{
char* nCopy = (char*) malloc( strlen(str) + 1 );
myAssert( (nCopy != NULL), "Error: malloc failed (1)" );
strcpy( nCopy, str );
return nCopy;
}
/* insert in list with last & first names in proper order */
void insert( pStudent pS )
{
pStudent q = pHead; /* Get a working copy of pHead in q */
/* Firstly, we handle the case where 'this' should be the first element. */
if(pHead == NULL || studentCmp(pS, pHead) < 0)
{
/* So ... it now becomes the first element ... */
pS->next = pHead; /* old pHead becomes 2nd in list ... */
pHead = pS; /* and ... this pS ... becomes the head of the list */
}
else /* If here ... search the linked list for the right location ... */
{
/* Start comparing with element AFTER 'q' ... i.e. the 'next in' ... */
while(q->next != NULL && studentCmp(q->next, pS) <= 0)
{
q = q->next; /* Traverse the list ... */
}
/*
Ok, insert after 'q' by relinking the pointers (similar to above)
( Includes inserting at end position ... where q->next == NULL )
*/
pS->next = q->next; /* Inserted 'pS' is linked to 'upstream element' */
q->next = pS; /* Now 'q' is updated to link to the new 'pS' element */
}
}
void view( pStudent pS )
{
printf( pS->dataLine ); /* string already has '\n' at end ... */
fflush( stdout );
}
void showAll()
{
pStudent p = pHead; /* int c; */
if( pHead == NULL )
{
puts("\nNo records in memory at present.") ;
return;
}
/* If reach here ... */
while( p != NULL )
{
view( p );
p = p->next;
}
}
int writeFile(char *name)
{
FILE* fp;
int count = 0; /* to track the records actually written to the file */
pStudent p = pHead;
if( p == NULL )
{
puts("\nNo records available ... so NO records written to file.") ;
return 0;
}
fp = fopen( name, "w" );
if( fp == NULL )
{
printf("\nError opening file %s. Press 'Enter' to continue ... ", name);
flushStdin();
return 0;
}
for( ; p != NULL; p = p->next )
{
fprintf( fp, "%s", p->dataLine ); /* Note: 'line' already has '\n' at end */
++count;
}
fclose( fp );
return count; /* Number of records written. */
}
int readFile( char* name )
{
int count = 0;
pStudent pS;
char buffer[MAXLEN+1];
FILE* fp = fopen( name, "r" );
if( fp == NULL )
{
printf
(
"\nError opening file %s.\n"
"Perhaps it hasn't been created yet?\n"
"Press 'Enter' to continue ... ",
name
);
flushStdin();
return 0;
}
/* If the program reaches here ... */
while( fgets( buffer, MAXLEN+1, fp ) )
{
pS = (pStudent) malloc( sizeof(Student) );
myAssert( (pS != NULL), "Error: malloc failed (2)" );
pS->dataLine = newCopy( buffer );
insert( pS );
++count;
}
fclose( fp );
printf("%d records were read into memory from the file %s.\n", count, name);
return count; /* Total number of student records found in the file. */
}
/* A function to compare two student records to permit sorting ... */
int studentCmp( pStudent pS1, pStudent pS2 )
{
return strcmp( pS1->dataLine+sort_offset, pS2->dataLine+sort_offset );
}
void delAll()
{
pStudent p = pHead; /* set p to this initial value to start loop */
pStudent pNext; /* to hold the pointer to the next record */
for( ; p != NULL; p = pNext )
{
pNext = p->next; /* get pointer to the next record */
free( p->dataLine );
free( p );
}
/* update globals ... */
pHead = NULL;
numStudents = 0;
}
void myAssert( int condition, char message[] )
{
if( !condition )
{
fputs( message, stderr );
getchar();
exit(1);
}
}
As above, but using readLine instead of fgets ... so ... NO max line length (except as set by available memory)
/* mergeTwoFiles_readLine.c */ /* this version 2010-05-25 */
/*
C demo of merging two files into a new sorted file ... using a linked-
list ... by inserting lines, in sorted order, into the (growing) list.
Note: using while( (line = readLine(fp)) ) to read each file line,
so ... NO max line length ... (except as limited by available memory)
http://developers-heaven.net/forum/index.php/topic,46.0.html
*/
#include "readLine.h" /* includes stdio.h, stdlib.h and myAssert */
#include <string.h> /* re. strcmp */
#define fName1 "info1.txt"
#define fName2 "info2.txt"
#define fName3 "merge.txt"
#define sort_offset 12 /* so files will be sorted by names ... */
/*
info1.txt
09876 year1 Smith Catee Rose
08765 year2 Clinton Hilary Claire
07654 year3 Barrack Richard Kenneth
info2.txt
08967 year1 Edison Bill
06789 year2 Bell Robert Ed
merge.txt
07654 year3 Barrack Richard Kenneth
06789 year2 Bell Robert Ed
08765 year2 Clinton Hilary Claire
08967 year1 Edison Bell
09876 year1 Smith Catee Rose
*/
typedef struct myStudent
{
char* dataLine;
struct myStudent* next;
} Student ;
typedef Student *pStudent;
/* using these two global variable to ease function calls ... */
pStudent pHead = NULL; /* Note: needs a NULL value to start inserts ... */
int numStudents = 0;
void insert( pStudent pS );
int readFile( char* fName );
int writeFile( char* fName );
int studentCmp( pStudent pS1, pStudent pS2 );
void view( pStudent pS );
void showAll();
void delAll();
void flushStdin();
int main() /* ********************* start of main *************************** */
{
int num = readFile( fName1 );
if( num )
numStudents += num;
num = readFile( fName2 );
if( num )
numStudents += num;
//showAll();
num = writeFile( fName3 );
if( num == numStudents )
printf
(
"All %d students were successfully sorted and merged in file %s.",
num, fName3
);
else
printf
(
"Error: writing to file %s Note: wrote %d vs. numStudents of %d",
fName3, num, numStudents
);
delAll();
printf("\n\nPress 'Enter' to continue ... ");
flushStdin();
system( "notepad merge.txt" );
return 0;
} /* ******************************* end of main **************************** */
void flushStdin()
{
while( getchar()!='\n' ) ;
}
/* insert in list with last & first names in proper order */
void insert( pStudent pS )
{
pStudent q = pHead; /* Get a working copy of pHead in q */
/* Firstly, we handle the case where 'this' should be the first element. */
if(pHead == NULL || studentCmp(pS, pHead) < 0)
{
/* So ... it now becomes the first element ... */
pS->next = pHead; /* old pHead becomes 2nd in list ... */
pHead = pS; /* and ... this pS ... becomes the head of the list */
}
else /* If here ... search the linked list for the right location ... */
{
/* Start comparing with element AFTER 'q' ... i.e. the 'next in' ... */
while(q->next != NULL && studentCmp(q->next, pS) <= 0)
{
q = q->next; /* Traverse the list ... */
}
/*
Ok, insert after 'q' by relinking the pointers (similar to above)
( Includes inserting at end position ... where q->next == NULL )
*/
pS->next = q->next; /* Inserted 'pS' is linked to 'upstream element' */
q->next = pS; /* Now 'q' is updated to link to the new 'pS' element */
}
}
void view( pStudent pS )
{
puts( pS->dataLine );
fflush( stdout );
}
void showAll()
{
pStudent p = pHead; /* int c; */
if( pHead == NULL )
{
puts("\nNo records in memory at present.") ;
return;
}
/* If reach here ... */
while( p != NULL )
{
view( p );
p = p->next;
}
}
int writeFile(char *fName)
{
FILE* fp;
int count = 0; /* to track the records actually written to the file */
pStudent p = pHead;
if( p == NULL )
{
puts("\nNo records available ... so NO records written to file.") ;
return 0;
}
fp = fopen( fName, "w" );
if( fp == NULL )
{
printf("\nError opening file %s. Press 'Enter' to continue ... ", fName);
flushStdin();
return 0;
}
for( ; p != NULL; p = p->next )
{
fprintf( fp, "%s\n", p->dataLine );
++count;
}
fclose( fp );
return count; /* Number of records written. */
}
int readFile( char* fName )
{
int count = 0;
pStudent pS;
char* line;
FILE* fp = fopen( fName, "r" );
if( fp == NULL )
{
printf
(
"\nError opening file %s.\n"
"Perhaps it hasn't been created yet?\n"
"Press 'Enter' to continue ... ",
fName
);
flushStdin();
return 0;
}
/* If the program reaches here ... */
while( (line = readLine(fp)) )
{
pS = (pStudent) malloc( sizeof(Student) );
myAssert( (pS != NULL), "Error: malloc failed (2)" );
pS->dataLine = line;
insert( pS );
++count;
}
fclose( fp );
printf("%d records were read into memory from the file %s.\n", count, fName);
return count; /* Total number of student records found in the file. */
}
/* A function to compare two student records to permit sorting ... */
int studentCmp( pStudent pS1, pStudent pS2 )
{
return strcmp( pS1->dataLine+sort_offset, pS2->dataLine+sort_offset );
}
void delAll()
{
pStudent p = pHead; /* set p to this initial value to start loop */
pStudent pNext; /* to hold the pointer to the next record */
for( ; p != NULL; p = pNext )
{
pNext = p->next; /* get pointer to the next record */
free( p->dataLine );
free( p );
}
/* update globals ... */
pHead = NULL;
numStudents = 0;
}
And here is a way to sort a file too large for memory ....
break it into 2 parts and sort each file using a merge sort and then merge the 2 sorted files
/* mergeTwoFiles_msortAndMerge.c */ /* this version 2010-05-25 */
/*
C demo of merging two files into a new sorted file ...
1st, read each file into a linked-list,
2nd, then msort and write file back sorted to a new sorted file
3rd, then merge the 2 sorted files, line by line, into a merge.txt file
Note 1: using my ClistOfString.h and my readLine.h ... included below
Note 2: using while( (line = readLine(fp)) ) to read each file line,
so ... NO max line length ... (except as limited by available memory)
http://developers-heaven.net/forum/index.php/topic,46.0.html
*/
/* set sort_offset to 12 here, so file lines will be sorted by name ... */
#define sort_offset 12 /* if sort_offset not defined here, default is 0 */
#include "ClistOfString.h" /* includes <string.h>, "readLine.h" that includes */
/* stdio.h, stdlib.h and myAssert */
#define fName1 "info1.txt"
#define fName2 "info2.txt"
#define fName3 "merge.txt"
/*
info1.txt
09876 year1 Smith Catee Rose
08765 year2 Clinton Hilary Claire
07654 year3 Barrack Richard Kenneth
info2.txt
08967 year1 Edison Bill
06789 year2 Bell Robert Ed
merge.txt
07654 year3 Barrack Richard Kenneth
06789 year2 Bell Robert Ed
08765 year2 Clinton Hilary Claire
08967 year1 Edison Bell
09876 year1 Smith Catee Rose
*/
void readFile( char*, Clist* );
int writeFile( char*, Clist* );
int mergeFiles( char*, char*, char* );
int main() /* ********************* start of main *************************** */
{
int count1, count2, count3;
Clist cl;
init( &cl );
readFile( fName1, &cl );
//showAll( &cl );
msort( &cl );
count1 = writeFile( "sorted" fName1, &cl );
if( count1 != cl.size )
printf( "\nError: only %d of %d lines filed\n", count1, cl.size );
freeAll( &cl );
//system( "notepad sorted" fName1 );
readFile( fName2, &cl );
//showAll( &cl );
msort( &cl );
count2 = writeFile( "sorted" fName2, &cl );
if( count2 != cl.size )
printf( "\nError: only %d of %d lines filed\n", count2, cl.size );
freeAll( &cl );
//system( "notepad sorted" fName2 );
count3 = mergeFiles( "sorted" fName1, "sorted" fName2, fName3 );
if( count3 != count1+count2 )
printf( "\nError: only %d of %d lines filed\n", count3, count1+count2);
system( "notepad " fName3 );
return 0;
} /* ******************************* end of main **************************** */
void readFile( char* fName, Clist* cl )
{
List lst;
FILE* fp = fopen( fName, "r" );
if( fp == NULL )
{
printf
(
"\nError opening file %s.\n"
"Perhaps it hasn't been created yet?\n"
"Press 'Enter' to continue ... ",
fName
);
getchar();
return;
}
/* If the program reaches here ... */
while( (lst.line = readLine(fp)) )
{
push_back( cl, &lst );
}
fclose( fp );
printf("%d records were read into memory from the file %s.\n", cl->size, fName);
}
int writeFile( char *fName, Clist* cl )
{
FILE* fp;
int count = 0; /* to track the records actually written to the file */
pList pL;
if( !cl->size )
{
puts("\nNo records available ... so NO records written to file.") ;
return 0;
}
fp = fopen( fName, "w" );
if( fp == NULL )
{
printf("\nError opening file %s. Press 'Enter' to continue ... ", fName);
getchar();
return 0;
}
for( pL = cl->start; pL != NULL; pL = pL->next )
{
fprintf( fp, "%s\n", pL->line );
++count;
}
fclose( fp );
return count; /* Number of records written. */
}
int mergeFiles( char* a, char* b, char* c )
{
FILE* f1, * f2, * f3;
char* line1, * line2;
int count = 0;
f1 = fopen( a, "r" );
if( f1 == NULL )
{
printf("\nError opening file %s. Press 'Enter' to continue ... ", a);
getchar();
return 0;
}
f2 = fopen( b, "r" );
if( f2 == NULL )
{
printf("\nError opening file %s. Press 'Enter' to continue ... ", b);
getchar();
return 0;
}
f3 = fopen( c, "w" );
if( f3 == NULL )
{
printf("\nError opening file %s. Press 'Enter' to continue ... ", c);
getchar();
return 0;
}
line1 = readLine(f1);
line2 = readLine(f2);
while( line1 && line2 )
{
if( strcmp(line1+sort_offset, line2+sort_offset) <= 0 )
{
fprintf( f3, "%s\n", line1 );
++count;
free( line1 );
line1 = readLine(f1);
}
else
{
fprintf( f3, "%s\n", line2 );
++count;
free( line2 );
line2 = readLine(f2);
}
}
/* test end conditions ... */
if( line1 )
{
fprintf( f3, "%s\n", line1 );
++count;
free( line1 );
while( (line1 = readLine(f1)) )
{
fprintf( f3, "%s\n", line1 );
++count;
free( line1 );
}
}
else if( line2 )
{
fprintf( f3, "%s\n", line2 );
++count;
free( line2 );
while( (line2 = readLine(f2)) )
{
fprintf( f3, "%s\n", line2 );
++count;
free( line2 );
}
}
fclose(f3); fclose(f2); fclose(f1);
return count;
}
The above program uses these next 2 files also:
ClistOfString.h and readLine.h ... (that were suppled above, just before this program.)