busybox/coreutils/sort.c
Rob Landley 39218932b1 Fix bug found by Joey Hess and forwarded by Bastian Bunk. Add Joey Hess's
test case to the testsuite, plus another one that would have been broken by
the first attempt at a fix.
2006-06-15 20:50:38 +00:00

331 lines
8.9 KiB
C

/* vi: set sw=4 ts=4: */
/*
* SuS3 compliant sort implementation for busybox
*
* Copyright (C) 2004 by Rob Landley <rob@landley.net>
*
* MAINTAINER: Rob Landley <rob@landley.net>
*
* Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
*
* See SuS3 sort standard at:
* http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html
*/
#include <ctype.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include "busybox.h"
static int global_flags;
/*
sort [-m][-o output][-bdfinru][-t char][-k keydef]... [file...]
sort -c [-bdfinru][-t char][-k keydef][file]
*/
/* These are sort types */
#define FLAG_n 1 /* Numeric sort */
#define FLAG_g 2 /* Sort using strtod() */
#define FLAG_M 4 /* Sort date */
/* ucsz apply to root level only, not keys. b at root level implies bb */
#define FLAG_u 8 /* Unique */
#define FLAG_c 16 /* Check: no output, exit(!ordered) */
#define FLAG_s 32 /* Stable sort, no ascii fallback at end */
#define FLAG_z 64 /* Input is null terminated, not \n */
/* These can be applied to search keys, the previous four can't */
#define FLAG_b 128 /* Ignore leading blanks */
#define FLAG_r 256 /* Reverse */
#define FLAG_d 512 /* Ignore !(isalnum()|isspace()) */
#define FLAG_f 1024 /* Force uppercase */
#define FLAG_i 2048 /* Ignore !isprint() */
#define FLAG_bb 32768 /* Ignore trailing blanks */
#ifdef CONFIG_FEATURE_SORT_BIG
static char key_separator;
static struct sort_key
{
struct sort_key *next_key; /* linked list */
unsigned short range[4]; /* start word, start char, end word, end char */
int flags;
} *key_list;
static char *get_key(char *str, struct sort_key *key, int flags)
{
int start=0,end,len,i,j;
/* Special case whole string, so we don't have to make a copy */
if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
&& !(flags&(FLAG_b&FLAG_d&FLAG_f&FLAG_i&FLAG_bb))) return str;
/* Find start of key on first pass, end on second pass*/
len=strlen(str);
for(j=0;j<2;j++) {
if(!key->range[2*j]) end=len;
/* Loop through fields */
else {
end=0;
for(i=1;i<key->range[2*j]+j;i++) {
/* Skip leading blanks or first separator */
if(str[end]) {
if(!key_separator && isspace(str[end]))
while(isspace(str[end])) end++;
}
/* Skip body of key */
for(;str[end];end++) {
if(key_separator) {
if(str[end]==key_separator) break;
} else if(isspace(str[end])) break;
}
}
}
if(!j) start=end;
}
/* Key with explicit separator starts after separator */
if(key_separator && str[start]==key_separator) start++;
/* Strip leading whitespace if necessary */
if(flags&FLAG_b) while(isspace(str[start])) start++;
/* Strip trailing whitespace if necessary */
if(flags&FLAG_bb) while(end>start && isspace(str[end-1])) end--;
/* Handle offsets on start and end */
if(key->range[3]) {
end+=key->range[3]-1;
if(end>len) end=len;
}
if(key->range[1]) {
start+=key->range[1]-1;
if(start>len) start=len;
}
/* Make the copy */
if(end<start) end=start;
str=bb_xstrndup(str+start,end-start);
/* Handle -d */
if(flags&FLAG_d) {
for(start=end=0;str[end];end++)
if(isspace(str[end]) || isalnum(str[end])) str[start++]=str[end];
str[start]=0;
}
/* Handle -i */
if(flags&FLAG_i) {
for(start=end=0;str[end];end++)
if(isprint(str[end])) str[start++]=str[end];
str[start]=0;
}
/* Handle -f */
if(flags*FLAG_f) for(i=0;str[i];i++) str[i]=toupper(str[i]);
return str;
}
static struct sort_key *add_key(void)
{
struct sort_key **pkey=&key_list;
while(*pkey) pkey=&((*pkey)->next_key);
return *pkey=xcalloc(1,sizeof(struct sort_key));
}
#define GET_LINE(fp) (global_flags&FLAG_z) ? bb_get_chunk_from_file(fp,NULL) \
: bb_get_chomped_line_from_file(fp)
#else
#define GET_LINE(fp) bb_get_chomped_line_from_file(fp)
#endif
/* Iterate through keys list and perform comparisons */
static int compare_keys(const void *xarg, const void *yarg)
{
int flags=global_flags,retval=0;
char *x,*y;
#ifdef CONFIG_FEATURE_SORT_BIG
struct sort_key *key;
for(key=key_list;!retval && key;key=key->next_key) {
flags=(key->flags) ? key->flags : global_flags;
/* Chop out and modify key chunks, handling -dfib */
x=get_key(*(char **)xarg,key,flags);
y=get_key(*(char **)yarg,key,flags);
#else
/* This curly bracket serves no purpose but to match the nesting
level of the for() loop we're not using */
{
x=*(char **)xarg;
y=*(char **)yarg;
#endif
/* Perform actual comparison */
switch(flags&7) {
default:
bb_error_msg_and_die("Unknown sort type.");
break;
/* Ascii sort */
case 0:
retval=strcmp(x,y);
break;
#ifdef CONFIG_FEATURE_SORT_BIG
case FLAG_g:
{
char *xx,*yy;
double dx=strtod(x,&xx), dy=strtod(y,&yy);
/* not numbers < NaN < -infinity < numbers < +infinity) */
if(x==xx) retval=(y==yy ? 0 : -1);
else if(y==yy) retval=1;
/* Check for isnan */
else if(dx != dx) retval = (dy != dy) ? 0 : -1;
else if(dy != dy) retval = 1;
/* Check for infinity. Could underflow, but it avoids libm. */
else if(1.0/dx == 0.0) {
if(dx<0) retval=((1.0/dy == 0.0 && dy<0) ? 0 : -1);
else retval=((1.0/dy == 0.0 && dy>0) ? 0 : 1);
} else if(1.0/dy == 0.0) retval=dy<0 ? 1 : -1;
else retval=dx>dy ? 1 : (dx<dy ? -1 : 0);
break;
}
case FLAG_M:
{
struct tm thyme;
int dx;
char *xx,*yy;
xx=strptime(x,"%b",&thyme);
dx=thyme.tm_mon;
yy=strptime(y,"%b",&thyme);
if(!xx) retval=(!yy ? 0 : -1);
else if(!yy) retval=1;
else retval=(dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon);
break;
}
/* Full floating point version of -n */
case FLAG_n:
{
double dx=atof(x),dy=atof(y);
retval=dx>dy ? 1 : (dx<dy ? -1 : 0);
break;
}
}
/* Free key copies. */
if(x!=*(char **)xarg) free(x);
if(y!=*(char **)yarg) free(y);
if(retval) break;
#else
/* Integer version of -n for tiny systems */
case FLAG_n:
retval=atoi(x)-atoi(y);
break;
}
#endif
}
/* Perform fallback sort if necessary */
if(!retval && !(global_flags&FLAG_s))
retval=strcmp(*(char **)xarg, *(char **)yarg);
//dprintf(2,"reverse=%d\n",flags&FLAG_r);
return ((flags&FLAG_r)?-1:1)*retval;
}
int sort_main(int argc, char **argv)
{
FILE *fp,*outfile=NULL;
int linecount=0,i,flag;
char *line,**lines=NULL,*optlist="ngMucszbrdfimS:T:o:k:t:";
int c;
bb_default_error_retval = 2;
/* Parse command line options */
while((c=getopt(argc,argv,optlist))>0) {
line=strchr(optlist,c);
if(!line) bb_show_usage();
switch(*line) {
#ifdef CONFIG_FEATURE_SORT_BIG
case 'o':
if(outfile) bb_error_msg_and_die("Too many -o.");
outfile=bb_xfopen(optarg,"w");
break;
case 't':
if(key_separator || optarg[1])
bb_error_msg_and_die("Too many -t.");
key_separator=*optarg;
break;
/* parse sort key */
case 'k':
{
struct sort_key *key=add_key();
char *temp, *temp2;
temp=optarg;
for(i=0;*temp;) {
/* Start of range */
key->range[2*i]=(unsigned short)strtol(temp,&temp,10);
if(*temp=='.')
key->range[(2*i)+1]=(unsigned short)strtol(temp+1,&temp,10);
for(;*temp;temp++) {
if(*temp==',' && !i++) {
temp++;
break;
} /* no else needed: fall through to syntax error
because comma isn't in optlist */
temp2=strchr(optlist,*temp);
flag=(1<<(temp2-optlist));
if(!temp2 || (flag>FLAG_M && flag<FLAG_b))
bb_error_msg_and_die("Unknown key option.");
/* b after , means strip _trailing_ space */
if(i && flag==FLAG_b) flag=FLAG_bb;
key->flags|=flag;
}
}
break;
}
#endif
default:
global_flags|=(1<<(line-optlist));
/* global b strips leading and trailing spaces */
if(global_flags&FLAG_b) global_flags|=FLAG_bb;
break;
}
}
/* Open input files and read data */
for(i=argv[optind] ? optind : optind-1;argv[i];i++) {
if(i<optind || (*argv[i]=='-' && !argv[i][1])) fp=stdin;
else fp=bb_xfopen(argv[i],"r");
for(;;) {
line=GET_LINE(fp);
if(!line) break;
if(!(linecount&63))
lines=xrealloc(lines, sizeof(char *)*(linecount+64));
lines[linecount++]=line;
}
fclose(fp);
}
#ifdef CONFIG_FEATURE_SORT_BIG
/* if no key, perform alphabetic sort */
if(!key_list) add_key()->range[0]=1;
/* handle -c */
if(global_flags&FLAG_c) {
int j=(global_flags&FLAG_u) ? -1 : 0;
for(i=1;i<linecount;i++)
if(compare_keys(&lines[i-1],&lines[i])>j) {
fprintf(stderr,"Check line %d\n",i);
return 1;
}
return 0;
}
#endif
/* Perform the actual sort */
qsort(lines,linecount,sizeof(char *),compare_keys);
/* handle -u */
if(global_flags&FLAG_u) {
for(flag=0,i=1;i<linecount;i++) {
if(!compare_keys(&lines[flag],&lines[i])) free(lines[i]);
else lines[++flag]=lines[i];
}
if(linecount) linecount=flag+1;
}
/* Print it */
if(!outfile) outfile=stdout;
for(i=0;i<linecount;i++) fprintf(outfile,"%s\n",lines[i]);
bb_fflush_stdout_and_exit(EXIT_SUCCESS);
}