/* procpg.c (PROgress Code Page Generator) Reads table file with format # comment src_code1 dst_code1 anything ... src_codeN dst_codeN and creates a Progress codepage translation table from src codes to dst codes. ------------------------------------------------------------------------- Copyright (C) 2001 Gediminas Markevicius, Lithuania, Klaipeda http://proc.w3.lt/pro This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details at http://www.gnu.org/copyleft/gpl.html ------------------------------------------------------------------------- Gediminas 2001.01.18 - Created Gediminas 2003.03.26 - options -s,-g */ #include #include #include #include #include #if defined( __MSDOS__) #include #else #include #endif #define VERSION "1.03" /* input file data types */ #define TCHAR 1 #define TDEC 2 #define THEX 3 #define THTML 4 char asType[][8]= { "?", "Char", "Deci", "Hex", "HTML" }; long table[256]; int usage[256]; /* 0-ok, >0-lost, <0-empty */ char buff[4096]; FILE *pf; const char* pcb; int nSrcType, nDstType; int fInverse, fCSource, fGenTable; const char* szInpFile; int ProcessParams( int argc, char** argv); const char* GetBaseName( const char* szPath); FILE* OpenFile( const char *rfile, const char *rmode); int PutTypeValue( FILE *rf, int rtype, long rval); void strtrim( char* pstr); void HelpMain( int fAll); int GetValue( int pType, const char* pc, long int *pval); /* ---------------------------------------------------------------------- :; 2. Main ---------------------------------------------------------------------- */ int main( int argc, char* argv[]) { int i, j, len, cols; int nUnicode, nHexLen, fHasDouble; long l, lnSrc, lnDst, lnMaxDst; char *pc, acHexForm[32], acDecForm[32]; pcb= GetBaseName( argv[0]); if (!ProcessParams( argc, argv)) return 1; /* initialise conversion table */ for (i= 0; i<256; i++) table[ i] = i; /* ---- Generate source table ---- */ if (fGenTable) { pf= stdout; fprintf( pf, "# %s %s\n", asType[nSrcType], asType[nDstType]); for (i= 32; i<256; i++) { PutTypeValue( pf, nSrcType, i); fputs( " ", pf); PutTypeValue( pf, nDstType, i); fputs( "; \n", pf); } if (pf!=stdout) fclose( pf); return 0; } /* ---- Read input file ---- */ if ((pf= OpenFile( szInpFile, "rt"))==NULL) return 2; while (1) { /* read single string from file */ pc= fgets( buff, sizeof(buff)-1, pf); if (pc==NULL) { if (ferror(pf)) { fprintf( stderr, "%s: input file reading failed: %s\n", pcb, strerror( errno)); return 2; } break; } /* did string fit in buffer? */ len = strlen( buff); if (len > sizeof(buff)-3) { fprintf( stderr, "%s: string length exceeds %d characters\n", pcb, (int)sizeof(buff)-3); return 2; } strtrim( pc); /* get source code */ if (strlen( pc)==0) continue; if (*pc=='#') continue; if (!GetValue( nSrcType, pc, &lnSrc) || lnSrc >= 256) { fprintf( stderr, "%s: bad source code value `%s'\n", pcb, pc); return 3; } /* get destination code */ for( ; !isspace( (unsigned char)*pc) && *pc!='\0'; pc++) ; if (!GetValue( nDstType, pc, &lnDst)) { fprintf( stderr, "%s: bad destination code value `%s'\n", pcb, pc); return 3; } /* write codes to conversion table */ table[ (unsigned char)lnSrc] = lnDst; /* printf( "%3d %c %6lx %6ld %-20s\n", lnSrc, (char)lnSrc, lnDst, lnDst, buff); */ } fclose( pf); /* open output file */ pf= stdout; /* ---- Analyse table ---- */ /* find the code output format we shall need */ lnMaxDst= 255; for (i= 0; i<256; i++) { if (table[i] > lnMaxDst) lnMaxDst= table[i]; } for (nHexLen= 1, l= 16L; l < lnMaxDst; nHexLen++, l*=16); sprintf( acHexForm, " 0x%%0%dlX", nHexLen); for (i= 0; i=256) nUnicode= 256; else if (i>=128) nUnicode= 128; else nUnicode= 0; /* ---- change conversion table to become inverse ---- */ if (fInverse) { if (lnMaxDst >= 256L) { fprintf( stderr, "%s: can't create inverse table, destination code values > 255\n", pcb); return 3; } /* fill array of code usage 'usage' */ memset( usage, 0, sizeof(usage)); for (i= 0; i<256; i++) { /* source code 'i' became free (empty) now */ usage[i]--; /* and destination code became used (lost) */ usage[(unsigned int)table[i]]++; } for (i= 0; i<256; i++) { if (usage[i]>1 || usage[i]<-1) break; } fHasDouble= (i>=256)? 0 : 1; i= fHasDouble; /* keep out of compiler warnings */ /* print to file lost and empty codes */ fprintf( pf, "Lost codes :"); for (i= 0; i<256; i++) { if (usage[i]>1) fprintf( pf, " %03d*%d", i, usage[i]); else if (usage[i]>0) fprintf( pf, " %03d", i); } fprintf( pf, "\nEmpty codes :"); for (i= 0; i<256; i++) { if (usage[i]<-1) fprintf( pf, " %03d*%d", i, -usage[i]); else if (usage[i]<0) fprintf( pf, " %03d", i); } fputs( "\n", pf); /* move lost codes to empty ones */ for (i= 0; i<256; i++) { if (usage[i]<0) { /* search for the first lost */ for (j= 0; j<256 && usage[j]<=0; j++) ; if (j>=256) { fprintf( pf, "%s: unable to make inverse: double conversions\n", pcb); break; } table[j]= i; usage[i]= usage[j]= 0; } } } /* ---- Print conversion table ---- */ cols= fCSource? 8: 16; for (i= 0; i<256; i++) { /* put comment */ if (i%cols==0) fprintf( pf, " /*%03d-%03d*/ ", i, i+cols-1); /* put character */ if (fCSource) { fprintf( pf, " 0x%04lX,", table[i]); } else if (i>=nUnicode) { if (table[i] < 256) fprintf( pf, acDecForm, table[i]); else fprintf( pf, acHexForm, table[i]); } else { if (table[i] < 256) fprintf( pf, " %03d", (int)table[i]); else fprintf( pf, " 0x%lX", table[i]); } if (i%cols==(cols-1) && i>0 && i<256) fprintf( pf, "\n"); } fprintf( pf, "\n"); if (pf!=stdout) fclose( pf); return 0; } /* ---------------------------------------------------------------------- :; 5. Processing parameters ---------------------------------------------------------------------- */ int ProcessParams( int argc, char** argv) { int i, j, fOpt, type; int fNextArg; char *pa; /* Set up default values */ szInpFile= NULL; nSrcType= nDstType= 0; fInverse= fCSource= 0; /* Process parameters */ for (i = 1; i < argc; i++) { pa = argv[ i]; fNextArg = 0; if (pa[0] == '-') j = 1, fOpt = 1; else j = 0, fOpt = 0; do { if (fOpt) { /* option */ /* -[c|d|x|e] : source/destination data type */ if (pa[j] == 'c' || pa[j]=='d' || pa[j]=='x' || pa[j]=='e') { switch( pa[j]) { case 'c': type= TCHAR; break; case 'd': type= TDEC; break; case 'x': type= THEX; break; case 'e': type= THTML; break; } if (nSrcType) nDstType= type; else nSrcType= type; j++; } /* -?, -h : help */ else if (pa[j] == 'h' || pa[j] == '?') { j++; HelpMain( 1); goto errex; } /* -i : inverse */ else if (pa[j] == 'i') { j++; fInverse= 1; } /* -s : C source output */ else if (pa[j] == 's') { j++; fCSource= 1; } /* -g : generate source table */ else if (pa[j] == 'g') { j++; fGenTable= 1; } else { HelpMain( 0); goto errex; } } /* if (fOpt) */ else { if (szInpFile) { fprintf( stderr, "%s: only single input file may be defined\n", pcb); goto errex; } szInpFile= pa; fNextArg= 1; } } while (pa[j] != '\0' && !fNextArg); } if (!nSrcType) nSrcType= TCHAR; if (!nDstType) nDstType= TCHAR; if (!szInpFile && !fGenTable) { HelpMain( 0); goto errex; } return 1; /* :::errex::: */ errex: return 0; } void HelpMain( int fAll) { fprintf(stderr, "Usage: %s [-h|?] [ -[X[Y]] ] [-i] [-s] [-g|input_file]\n", pcb); if (fAll) { fprintf(stderr, "Parameters are:\n"); fprintf(stderr, " -X - source code data type, default is character\n"); fprintf(stderr, " -Y - destination code data type, default is character\n"); fprintf(stderr, " -i - make codepage table to be inverse\n"); fprintf(stderr, " -s - put all codes as C source\n"); fprintf(stderr, " -g - create empty source table\n"); fprintf(stderr, " -h,? - show this help\n"); fprintf(stderr, "Data types are:\n"); fprintf(stderr, " c - character (no way to present space)\n"); fprintf(stderr, " d - decimal value\n"); fprintf(stderr, " x - hexadecimal value\n"); fprintf(stderr, " e - html numeric character reference (ÿ ÿ)\n"); fprintf(stderr, "Author : G. Markevicius, Klaipeda, gedimin@proc.w3.lt\n"); fprintf(stderr, "Version: %s, %s\n", VERSION, __DATE__); } } /* ---------------------------------------------------------------------- :; 6. Private procedures ---------------------------------------------------------------------- */ const char* GetBaseName( const char* szPath) /* szPath - file path */ { int len, i; char c; /* searching for '.' in file name */ len= strlen( szPath); for( i= len - 1; i >= 0; i--) { c = szPath[ i]; if (c=='\\' || c=='/' || c==':') { i++; break; } } if (i < 0) i = 0; if (i == len) /* szPath is empty string or ending with \,/, */ return szPath; return szPath + i; } FILE* OpenFile( const char *rfile, const char *rmode) { FILE *pf; if ((pf= fopen( rfile, rmode))==NULL) { fprintf( stderr, "%s: unable to open `%s' : %s\n", pcb, rfile, strerror( errno)); } return pf; } int PutTypeValue( FILE *rf, int rtype, long rval) { int r; switch( rtype) { case TCHAR: r= fprintf( pf, "%c", (char)rval); break; case TDEC: r= fprintf( pf, "%3ld", rval); break; case THEX: r= fprintf( pf, "%04lX", rval); break; case THTML: r= fprintf( pf, "&#%03ld", rval); break; default: r= fprintf( pf, "?"); break; } return r?0:1; } void strtrim( char* pstr) /* removes spaces at the begining and the end of pstr */ { char *p1, *p2; /* begining at left */ p1= pstr; while (isspace((unsigned char)*p1) && *p1!= '\0') p1++; if (*p1=='\0') { *pstr= '\0'; return; } /* right trim */ p2= p1 + strlen( p1); for( p2-- ; isspace((unsigned char)*p2) && p2!= p1; p2--) ; *(p2+1) = '\0'; /* Left trim. I am not sure if memmove works in AIX */ if (p1 != pstr) { while (*p1!= '\0') *(pstr++)= *(p1++); *pstr= '\0'; } } void SkipLeadingSpaces( const char **pc) { char c; c= **pc; while( c!='\0' && isspace( (unsigned char)c)) { (*pc)++; c= **pc; } } long charVal( const char *pc) /* Return ascii value of first non-space character. On error returns LONG_MIN. */ { SkipLeadingSpaces( &pc); if (*pc=='\0') return LONG_MIN; return (unsigned char)*pc; } long hexVal( const char *pc) /* Converts string to long interpreting string as hexadecimal digit. On error returns LONG_MIN. */ { long int i, l, mul, j, len; char c; SkipLeadingSpaces( &pc); /* count digit length */ for( len= 0L; isalnum( (unsigned int)pc[len]); len++) ; if (len==0L) return LONG_MIN; /* convert */ mul = 1; l = 0L; for (i= len-1; i>=0; i--) { c= toupper((unsigned int)pc[i]); if (c>='0' && c<='9') j = c - '0'; else if (c>='A' && c<='F') j = c - 'A' + 10; else if (c=='X') { /* 0xNNN format */ if (i!=1 || pc[0]!='0') return LONG_MIN; continue; } else return LONG_MIN; l = l + j*mul; if (mul >= LONG_MAX/16) return LONG_MIN; mul*= 16; } return l; } long decVal( const char *pc) /* Converts string to long interpreting string as decimal digit. On error returns LONG_MIN. */ { long int i, l, mul, j, len; char c; SkipLeadingSpaces( &pc); /* count digit length */ for( len= 0L; isalnum( (unsigned int)pc[len]); len++) ; if (len==0L) return LONG_MIN; /* convert */ mul = 1; l = 0L; for (i= len-1; i>=0; i--) { c= pc[i]; if (c>='0' && c<='9') j = c - '0'; else return LONG_MIN; l = l + j*mul; if (mul >= LONG_MAX/10) return LONG_MIN; mul*= 10; } return l; } long htmlVal( const char *pc) /* Converts string to long interpreting string as html numeric character reference (þ \). On error returns LONG_MIN. */ { SkipLeadingSpaces( &pc); if (*(pc++)!='&') return LONG_MIN; if (*(pc++)!='#') return LONG_MIN; if (*pc=='x' || *pc=='X') return( hexVal( pc+1)); return( decVal( pc)); } int GetValue( int pType, const char* pc, long int *pval) { switch( pType) { case TCHAR: *pval= charVal( pc); break; case TDEC: *pval= decVal( pc); break; case THEX: *pval= hexVal( pc); break; case THTML: *pval= htmlVal( pc); break; default: *pval= LONG_MIN; } return (*pval>=0L); }