a4.pdfEngineering a Sort Function.pdf

scandir

image.png
image.png
image.png
image.png

  1. int musl_scandir(const char *path, struct dirent ***res,
  2. int (*sel)(const struct dirent *),
  3. int (*cmp)(const struct dirent **, const struct dirent **))
  4. {
  5. DIR *d = opendir(path);
  6. struct dirent *de, **names = NULL, **tmp;
  7. size_t cnt = 0, len = 0;
  8. if (!d) return -1;
  9. while ((de = readdir(d))) {
  10. if (sel && !sel(de)) continue;
  11. if (cnt >= len) {
  12. len = 2*len+1;
  13. if (len > SIZE_MAX/sizeof(*names)) break;
  14. tmp = realloc(names, len * sizeof(*names));
  15. if (!tmp) break;
  16. names = tmp;
  17. }
  18. names[cnt] = malloc(de->d_reclen);
  19. if (!names[cnt]) break;
  20. memcpy(names[cnt++], de, de->d_reclen);
  21. }
  22. closedir(d);
  23. if (errno) {
  24. if (names) while (cnt-- > 0) free(names[cnt]);
  25. free(names);
  26. return -1;
  27. }
  28. if (cmp) qsort(names, cnt, sizeof *names, (int (*)(const void *, const void *))cmp);
  29. *res = names;
  30. return cnt;
  31. }

Implement myls

image.png

void ls(const char *dir, bool show_all, bool sort_by_type)
{
/*
show_all sort_by_type   filter
t       t               folder, not folder
t       f               *
f       f               not .*
f       t               folder not .*   not folder not .*
*/

  struct dirent **files[2] = {NULL, NULL};
  size_t count[2] = {-2, -2};

  if (show_all && sort_by_type) {
    count[0] = scandir(dir, &files[0], is_fold, dir_comp);
    count[1] = scandir(dir, &files[1], is_file, dir_comp);
  } else if (show_all && !sort_by_type) {
    count[0] = scandir(dir, &files[0], NULL, dir_comp);
  } else if (!show_all && !sort_by_type) {
    count[0] = scandir(dir, &files[0], wo_dot, dir_comp);
  } else {
    count[0] = scandir(dir, &files[0], fold_wo_dot, dir_comp);
    count[1] = scandir(dir, &files[1], file_wo_dot, dir_comp);
  }

  for (size_t i = 0; i < 2; i++) {
    if (count[i] == -1) {
      printf("./myls: cannot access %s: No such directory\n", dir);
      continue;
    }
    if (files[i] == NULL || count[i] == -2) {
      continue;
    }
    for (size_t j = 0; j < count[i]; j++) {
      if (is_fold(files[i][j])) {
        printf("%s/\n", files[i][j]->d_name);
      } else {
        printf("%s\n", files[i][j]->d_name);
      }
      free(files[i][j]);
    }
    free(files[i]);
  }
}

bsearch

image.png
image.png
Remember that a typecast is a completely compile-time operation, it incurs no runtime performance cost.

Nearly All Binary Searches and Mergesorts are Broken

这篇文章很重要。

void* blues

image.png

man 3 getopt

image.png

mysort.c

注意typedef的使用,简化了一些代码。

//#include "samples/prototypes.h"
#include <error.h>
#include <getopt.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define MAX_LINE_LEN 4096
#define MIN_NLINES 100

typedef int (*cmp_fn_t)(const void *p, const void *q);

int cmp_pstr(const void *p, const void *q)
{
  return strcmp(*(char**)p, *(char**)q);
}

int cmp_pstr_len(const void *p, const void *q)
{
  size_t ip = strlen(*(char**)p);// p is a pointer to an array element
  size_t iq = strlen(*(char**)q);// hence char**
  if (ip > iq)
    return 1;
  if (ip < iq)
    return -1;
  //return 0;
  return cmp_pstr(p, q);
}

int cmp_pstr_numeric(const void *p, const void *q)
{
  int ip = atoi(*(char**)p);
  int iq = atoi(*(char**)q);
  //This avoids overflow that could happen with subtraction.
  if (ip > iq)
    return 1;
  if (ip < iq)
    return -1;
  return 0;
}

void reverse_lines(char **lines, size_t nlines){
  int i = nlines - 1;
  int j = 0;
  while(i > j){
    char *temp = lines[i];
    lines[i] = lines[j];
    lines[j] = temp;
    i--;
    j++;
  }
}

void print_lines(char **lines, size_t nlines){
  for (int i = 0; i < nlines; i++)
    printf("%s", lines[i]);
}

void free_lines(char **lines, size_t nlines){
  for (int i = 0; i < nlines; i++)
    free(lines[i]);
  free(lines);
}

void sort_lines(FILE *fp, cmp_fn_t cmp, bool uniq, bool reverse)
{
  char line[MAX_LINE_LEN];
  size_t maxlines = MIN_NLINES;
  char **lines = calloc(maxlines, sizeof(char*));
  size_t nlines = 0;
  //read fp
  while(ungetc(getc(fp), fp) != EOF){
    assert(fgets(line, maxlines, fp) != NULL);
    lines[nlines] = strdup(line);
    nlines++;
    if (nlines == maxlines){
      maxlines *= 2;
      lines = realloc(lines, maxlines * sizeof(char*));
      assert(lines != NULL);
    }
  }
  //sort
  if (uniq == true){
    char **lines_uniq = calloc(nlines, sizeof(char*));
    size_t nlines_uniq = 0;
    for (int i = 0; i < nlines; i++){
      char **added = binsert(&lines[i], lines_uniq, &nlines_uniq,
                             sizeof(char*), cmp);
      assert(strcmp(*added, lines[i]) == 0);
      //^ This crashes if -n is specified and there is junk in input.
    }
    if (reverse == true)
      reverse_lines(lines_uniq, nlines_uniq);
    print_lines(lines_uniq, nlines_uniq);
    free(lines_uniq);
  }else{
    qsort(lines, nlines, sizeof(char*), cmp);
    if (reverse == true)
      reverse_lines(lines, nlines);
    print_lines(lines, nlines);
  }
  free_lines(lines, nlines);
}

int main(int argc, char *argv[])
{
    cmp_fn_t cmp = cmp_pstr;
    bool uniq = false, reverse = false;

    int opt;
    while ((opt = getopt(argc, argv, "lnru")) != -1) {
        switch (opt) {
            case 'l': cmp = cmp_pstr_len; break;
            case 'n': cmp = cmp_pstr_numeric; break;
            case 'r': reverse = true; break;
            case 'u': uniq = true; break;
            default: exit(1);
        }
    }

    FILE *fp = stdin;
    if (optind < argc) {
        fp = fopen(argv[optind], "r");
        if (fp == NULL) error(1, 0, "%s: no such file", argv[optind]);
    }
    sort_lines(fp, cmp, uniq, reverse);
    fclose(fp);
    return 0;
}

binsert

image.png

#include <string.h>

void my_insert(const void *base, size_t *p_nelem, size_t width, void *p, const void *key){
//shift elements from p onwards, including p
//insert key to where p was
  for (void *curr = (char *)base + (*p_nelem-1)*width;
       (size_t)curr >= (size_t)p;
       curr = (char *)curr - width){
    memcpy((char *)curr + width, curr, width);
  }
  memcpy(p, key, width);
  (*p_nelem)++;
}


void *my_binsert(const void *key, void *base, size_t *p_nelem, size_t width, int (*compar)(const void *, const void *))
{
  if (*p_nelem == 0) {
    memcpy(base, key, width);
    (*p_nelem)++;
    return base;
  }

  char *base_init = base;
  for (size_t nremain = *p_nelem; nremain != 0; nremain >>= 1) {
    void *p = (char *)base + (nremain >> 1) * width;
    int sign = compar(key, p);
    if (sign == 0)
      return p;
    if (sign > 0) { /* key > p: move right */
      if (nremain <= 2) {
        char *p_insert = (char *)p + width;
        my_insert(base_init, p_nelem, width, p_insert, key);
        return p_insert;
      }
      base = (char *)p + width;
      nremain--;
    } else {/* else move left */
      if (nremain == 1) {
        my_insert(base_init, p_nelem, width, p, key);
        return p;
      }
    }
  }
  return NULL; // FIXME
}

mysort

//#include "samples/prototypes.h"
#include <error.h>
#include <getopt.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#define MAX_LINE_LEN 4096
#define MIN_NLINES 100

typedef int (*cmp_fn_t)(const void *p, const void *q);

int cmp_pstr(const void *p, const void *q)
{
  return strcmp(*(char**)p, *(char**)q);
}

int cmp_pstr_len(const void *p, const void *q)
{
  size_t ip = strlen(*(char**)p);// p is a pointer to an array element
  size_t iq = strlen(*(char**)q);// hence char**
  if (ip > iq)
    return 1;
  if (ip < iq)
    return -1;
  //return 0;
  return cmp_pstr(p, q);
}

int cmp_pstr_numeric(const void *p, const void *q)
{
  int ip = atoi(*(char**)p);
  int iq = atoi(*(char**)q);
  //This avoids overflow that could happen with subtraction.
  if (ip > iq)
    return 1;
  if (ip < iq)
    return -1;
  return 0;
}

void reverse_lines(char **lines, size_t nlines){
  int i = nlines - 1;
  int j = 0;
  while(i > j){
    char *temp = lines[i];
    lines[i] = lines[j];
    lines[j] = temp;
    i--;
    j++;
  }
}

void print_lines(char **lines, size_t nlines){
  for (int i = 0; i < nlines; i++)
    printf("%s", lines[i]);
}

void free_lines(char **lines, size_t nlines){
  for (int i = 0; i < nlines; i++)
    free(lines[i]);
  free(lines);
}


void my_insert(const void *base, size_t *p_nelem, size_t width, void *p, const void *key){
//shift elements from p onwards, including p
//insert key to where p was
  for (void *curr = (char *)base + (*p_nelem-1)*width;
       (size_t)curr >= (size_t)p;
       curr = (char *)curr - width){
    memcpy((char *)curr + width, curr, width);
  }
  memcpy(p, key, width);
  (*p_nelem)++;
}


void *my_binsert(const void *key, void *base, size_t *p_nelem, size_t width, int (*compar)(const void *, const void *))
{
  if (*p_nelem == 0) {
    memcpy(base, key, width);
    (*p_nelem)++;
    return base;
  }

  char *base_init = base;
  for (size_t nremain = *p_nelem; nremain != 0; nremain >>= 1) {
    void *p = (char *)base + (nremain >> 1) * width;
    int sign = compar(key, p);
    if (sign == 0)
      return p;
    if (sign > 0) { /* key > p: move right */
      if (nremain <= 2) {
        char *p_insert = (char *)p + width;
        my_insert(base_init, p_nelem, width, p_insert, key);
        return p_insert;
      }
      base = (char *)p + width;
      nremain--;
    } else {/* else move left */
      if (nremain == 1) {
        my_insert(base_init, p_nelem, width, p, key);
        return p;
      }
    }
  }
  return NULL; // FIXME
}

void sort_lines(FILE *fp, cmp_fn_t cmp, bool uniq, bool reverse) {
  char line[MAX_LINE_LEN];
  size_t max_lines_num = MIN_NLINES;
  char **lines = malloc(max_lines_num * sizeof(char*));
  size_t lines_index = 0;

  while (ungetc(getc(fp), fp) != EOF) {
    assert(fgets(line, max_lines_num, fp) != NULL);
    *(lines + lines_index) = strdup(line);
    lines_index++;

    if (lines_index == max_lines_num) {
      max_lines_num *= 2;
      lines = realloc(lines, max_lines_num * sizeof(char*));
      assert(lines != NULL);
    }
  }

  if (uniq) {
    char **lines_uniq = malloc(lines_index * sizeof(char*));
    size_t lines_uniq_index = 0;

    for (size_t i = 0; i < lines_index; i++) {
      char **insert_out = my_binsert(&*(lines + i), lines_uniq, &lines_uniq_index,
                                    sizeof(char*), cmp);
      assert(strcmp(*insert_out, *(lines + i)) == 0);
    }
    if (reverse) {
      reverse_lines(lines_uniq, lines_uniq_index);
    }
    print_lines(lines_uniq, lines_uniq_index);
    free(lines_uniq);
  } else {
    qsort(lines, lines_index, sizeof(char*), cmp);
    if (reverse) {
      reverse_lines(lines, lines_index);
    }
    print_lines(lines, lines_index);
  }
  free_lines(lines, lines_index);
}

int main(int argc, char *argv[])
{
    cmp_fn_t cmp = cmp_pstr;
    bool uniq = false, reverse = false;

    int opt;
    while ((opt = getopt(argc, argv, "lnru")) != -1) {
        switch (opt) {
            case 'l': cmp = cmp_pstr_len; break;
            case 'n': cmp = cmp_pstr_numeric; break;
            case 'r': reverse = true; break;
            case 'u': uniq = true; break;
            default: exit(1);
        }
    }

    FILE *fp = stdin;
    if (optind < argc) {
        fp = fopen(argv[optind], "r");
        if (fp == NULL) error(1, 0, "%s: no such file", argv[optind]);
    }
    sort_lines(fp, cmp, uniq, reverse);
    fclose(fp);
    return 0;
}