a4.pdfEngineering a Sort Function.pdf
scandir




int musl_scandir(const char *path, struct dirent ***res,int (*sel)(const struct dirent *),int (*cmp)(const struct dirent **, const struct dirent **)){DIR *d = opendir(path);struct dirent *de, **names = NULL, **tmp;size_t cnt = 0, len = 0;if (!d) return -1;while ((de = readdir(d))) {if (sel && !sel(de)) continue;if (cnt >= len) {len = 2*len+1;if (len > SIZE_MAX/sizeof(*names)) break;tmp = realloc(names, len * sizeof(*names));if (!tmp) break;names = tmp;}names[cnt] = malloc(de->d_reclen);if (!names[cnt]) break;memcpy(names[cnt++], de, de->d_reclen);}closedir(d);if (errno) {if (names) while (cnt-- > 0) free(names[cnt]);free(names);return -1;}if (cmp) qsort(names, cnt, sizeof *names, (int (*)(const void *, const void *))cmp);*res = names;return cnt;}
Implement myls

void ls(const char *dir, bool show_all, bool sort_by_type)
{
/*
show_all sort_by_type filter
t t folder, not folder
t f *
f f not .*
f t folder not .* not folder not .*
*/
struct dirent **files[2] = {NULL, NULL};
size_t count[2] = {-2, -2};
if (show_all && sort_by_type) {
count[0] = scandir(dir, &files[0], is_fold, dir_comp);
count[1] = scandir(dir, &files[1], is_file, dir_comp);
} else if (show_all && !sort_by_type) {
count[0] = scandir(dir, &files[0], NULL, dir_comp);
} else if (!show_all && !sort_by_type) {
count[0] = scandir(dir, &files[0], wo_dot, dir_comp);
} else {
count[0] = scandir(dir, &files[0], fold_wo_dot, dir_comp);
count[1] = scandir(dir, &files[1], file_wo_dot, dir_comp);
}
for (size_t i = 0; i < 2; i++) {
if (count[i] == -1) {
printf("./myls: cannot access %s: No such directory\n", dir);
continue;
}
if (files[i] == NULL || count[i] == -2) {
continue;
}
for (size_t j = 0; j < count[i]; j++) {
if (is_fold(files[i][j])) {
printf("%s/\n", files[i][j]->d_name);
} else {
printf("%s\n", files[i][j]->d_name);
}
free(files[i][j]);
}
free(files[i]);
}
}
bsearch


Remember that a typecast is a completely compile-time operation, it incurs no runtime performance cost.
Nearly All Binary Searches and Mergesorts are Broken
void* blues
man 3 getopt
mysort.c
注意typedef的使用,简化了一些代码。
//#include "samples/prototypes.h"
#include <error.h>
#include <getopt.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define MAX_LINE_LEN 4096
#define MIN_NLINES 100
typedef int (*cmp_fn_t)(const void *p, const void *q);
int cmp_pstr(const void *p, const void *q)
{
return strcmp(*(char**)p, *(char**)q);
}
int cmp_pstr_len(const void *p, const void *q)
{
size_t ip = strlen(*(char**)p);// p is a pointer to an array element
size_t iq = strlen(*(char**)q);// hence char**
if (ip > iq)
return 1;
if (ip < iq)
return -1;
//return 0;
return cmp_pstr(p, q);
}
int cmp_pstr_numeric(const void *p, const void *q)
{
int ip = atoi(*(char**)p);
int iq = atoi(*(char**)q);
//This avoids overflow that could happen with subtraction.
if (ip > iq)
return 1;
if (ip < iq)
return -1;
return 0;
}
void reverse_lines(char **lines, size_t nlines){
int i = nlines - 1;
int j = 0;
while(i > j){
char *temp = lines[i];
lines[i] = lines[j];
lines[j] = temp;
i--;
j++;
}
}
void print_lines(char **lines, size_t nlines){
for (int i = 0; i < nlines; i++)
printf("%s", lines[i]);
}
void free_lines(char **lines, size_t nlines){
for (int i = 0; i < nlines; i++)
free(lines[i]);
free(lines);
}
void sort_lines(FILE *fp, cmp_fn_t cmp, bool uniq, bool reverse)
{
char line[MAX_LINE_LEN];
size_t maxlines = MIN_NLINES;
char **lines = calloc(maxlines, sizeof(char*));
size_t nlines = 0;
//read fp
while(ungetc(getc(fp), fp) != EOF){
assert(fgets(line, maxlines, fp) != NULL);
lines[nlines] = strdup(line);
nlines++;
if (nlines == maxlines){
maxlines *= 2;
lines = realloc(lines, maxlines * sizeof(char*));
assert(lines != NULL);
}
}
//sort
if (uniq == true){
char **lines_uniq = calloc(nlines, sizeof(char*));
size_t nlines_uniq = 0;
for (int i = 0; i < nlines; i++){
char **added = binsert(&lines[i], lines_uniq, &nlines_uniq,
sizeof(char*), cmp);
assert(strcmp(*added, lines[i]) == 0);
//^ This crashes if -n is specified and there is junk in input.
}
if (reverse == true)
reverse_lines(lines_uniq, nlines_uniq);
print_lines(lines_uniq, nlines_uniq);
free(lines_uniq);
}else{
qsort(lines, nlines, sizeof(char*), cmp);
if (reverse == true)
reverse_lines(lines, nlines);
print_lines(lines, nlines);
}
free_lines(lines, nlines);
}
int main(int argc, char *argv[])
{
cmp_fn_t cmp = cmp_pstr;
bool uniq = false, reverse = false;
int opt;
while ((opt = getopt(argc, argv, "lnru")) != -1) {
switch (opt) {
case 'l': cmp = cmp_pstr_len; break;
case 'n': cmp = cmp_pstr_numeric; break;
case 'r': reverse = true; break;
case 'u': uniq = true; break;
default: exit(1);
}
}
FILE *fp = stdin;
if (optind < argc) {
fp = fopen(argv[optind], "r");
if (fp == NULL) error(1, 0, "%s: no such file", argv[optind]);
}
sort_lines(fp, cmp, uniq, reverse);
fclose(fp);
return 0;
}
binsert

#include <string.h>
void my_insert(const void *base, size_t *p_nelem, size_t width, void *p, const void *key){
//shift elements from p onwards, including p
//insert key to where p was
for (void *curr = (char *)base + (*p_nelem-1)*width;
(size_t)curr >= (size_t)p;
curr = (char *)curr - width){
memcpy((char *)curr + width, curr, width);
}
memcpy(p, key, width);
(*p_nelem)++;
}
void *my_binsert(const void *key, void *base, size_t *p_nelem, size_t width, int (*compar)(const void *, const void *))
{
if (*p_nelem == 0) {
memcpy(base, key, width);
(*p_nelem)++;
return base;
}
char *base_init = base;
for (size_t nremain = *p_nelem; nremain != 0; nremain >>= 1) {
void *p = (char *)base + (nremain >> 1) * width;
int sign = compar(key, p);
if (sign == 0)
return p;
if (sign > 0) { /* key > p: move right */
if (nremain <= 2) {
char *p_insert = (char *)p + width;
my_insert(base_init, p_nelem, width, p_insert, key);
return p_insert;
}
base = (char *)p + width;
nremain--;
} else {/* else move left */
if (nremain == 1) {
my_insert(base_init, p_nelem, width, p, key);
return p;
}
}
}
return NULL; // FIXME
}
mysort
//#include "samples/prototypes.h"
#include <error.h>
#include <getopt.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define MAX_LINE_LEN 4096
#define MIN_NLINES 100
typedef int (*cmp_fn_t)(const void *p, const void *q);
int cmp_pstr(const void *p, const void *q)
{
return strcmp(*(char**)p, *(char**)q);
}
int cmp_pstr_len(const void *p, const void *q)
{
size_t ip = strlen(*(char**)p);// p is a pointer to an array element
size_t iq = strlen(*(char**)q);// hence char**
if (ip > iq)
return 1;
if (ip < iq)
return -1;
//return 0;
return cmp_pstr(p, q);
}
int cmp_pstr_numeric(const void *p, const void *q)
{
int ip = atoi(*(char**)p);
int iq = atoi(*(char**)q);
//This avoids overflow that could happen with subtraction.
if (ip > iq)
return 1;
if (ip < iq)
return -1;
return 0;
}
void reverse_lines(char **lines, size_t nlines){
int i = nlines - 1;
int j = 0;
while(i > j){
char *temp = lines[i];
lines[i] = lines[j];
lines[j] = temp;
i--;
j++;
}
}
void print_lines(char **lines, size_t nlines){
for (int i = 0; i < nlines; i++)
printf("%s", lines[i]);
}
void free_lines(char **lines, size_t nlines){
for (int i = 0; i < nlines; i++)
free(lines[i]);
free(lines);
}
void my_insert(const void *base, size_t *p_nelem, size_t width, void *p, const void *key){
//shift elements from p onwards, including p
//insert key to where p was
for (void *curr = (char *)base + (*p_nelem-1)*width;
(size_t)curr >= (size_t)p;
curr = (char *)curr - width){
memcpy((char *)curr + width, curr, width);
}
memcpy(p, key, width);
(*p_nelem)++;
}
void *my_binsert(const void *key, void *base, size_t *p_nelem, size_t width, int (*compar)(const void *, const void *))
{
if (*p_nelem == 0) {
memcpy(base, key, width);
(*p_nelem)++;
return base;
}
char *base_init = base;
for (size_t nremain = *p_nelem; nremain != 0; nremain >>= 1) {
void *p = (char *)base + (nremain >> 1) * width;
int sign = compar(key, p);
if (sign == 0)
return p;
if (sign > 0) { /* key > p: move right */
if (nremain <= 2) {
char *p_insert = (char *)p + width;
my_insert(base_init, p_nelem, width, p_insert, key);
return p_insert;
}
base = (char *)p + width;
nremain--;
} else {/* else move left */
if (nremain == 1) {
my_insert(base_init, p_nelem, width, p, key);
return p;
}
}
}
return NULL; // FIXME
}
void sort_lines(FILE *fp, cmp_fn_t cmp, bool uniq, bool reverse) {
char line[MAX_LINE_LEN];
size_t max_lines_num = MIN_NLINES;
char **lines = malloc(max_lines_num * sizeof(char*));
size_t lines_index = 0;
while (ungetc(getc(fp), fp) != EOF) {
assert(fgets(line, max_lines_num, fp) != NULL);
*(lines + lines_index) = strdup(line);
lines_index++;
if (lines_index == max_lines_num) {
max_lines_num *= 2;
lines = realloc(lines, max_lines_num * sizeof(char*));
assert(lines != NULL);
}
}
if (uniq) {
char **lines_uniq = malloc(lines_index * sizeof(char*));
size_t lines_uniq_index = 0;
for (size_t i = 0; i < lines_index; i++) {
char **insert_out = my_binsert(&*(lines + i), lines_uniq, &lines_uniq_index,
sizeof(char*), cmp);
assert(strcmp(*insert_out, *(lines + i)) == 0);
}
if (reverse) {
reverse_lines(lines_uniq, lines_uniq_index);
}
print_lines(lines_uniq, lines_uniq_index);
free(lines_uniq);
} else {
qsort(lines, lines_index, sizeof(char*), cmp);
if (reverse) {
reverse_lines(lines, lines_index);
}
print_lines(lines, lines_index);
}
free_lines(lines, lines_index);
}
int main(int argc, char *argv[])
{
cmp_fn_t cmp = cmp_pstr;
bool uniq = false, reverse = false;
int opt;
while ((opt = getopt(argc, argv, "lnru")) != -1) {
switch (opt) {
case 'l': cmp = cmp_pstr_len; break;
case 'n': cmp = cmp_pstr_numeric; break;
case 'r': reverse = true; break;
case 'u': uniq = true; break;
default: exit(1);
}
}
FILE *fp = stdin;
if (optind < argc) {
fp = fopen(argv[optind], "r");
if (fp == NULL) error(1, 0, "%s: no such file", argv[optind]);
}
sort_lines(fp, cmp, uniq, reverse);
fclose(fp);
return 0;
}
