WEEK5 Data Structures

1. 数组扩容

正常的Array创建会把数据放在栈内存。

也可以使malloc或者calloc分配内存，把数据放在堆空间内。

#include <stdio.h>
#include <stdlib.h>

int main(void){
    int l1[5];
    int *l2 = calloc(sizeof(int), 5);
    l2[0] = 100;
    printf("l1: %p\n", l1);
    printf("l2: %p\n", l2);
    printf("l2[0]: %d\n", l2[0]);
    printf("sizeof(l1): %lu\n", sizeof(l1));
    printf("sizeof(l2): %lu\n", sizeof(l2));
    free(l2);
    printf("l2[0]: %d\n", l2[0]);
    printf("l2: %p\n", l2);
    printf("sizeof(l2): %lu\n", sizeof(l2));
}

执行结果：

l1: 0x7ffdf801dad0
l2: 0x5560b186b3a0
l2[0]: 100
sizeof(l1): 20
sizeof(l2): 8
l2[0]: -410146117
l2: 0x5560b186b3a0
sizeof(l2): 8

区别在，使用sizeof()前者会返回数组大小，后者固定返回8个字节。

注意：在使用free之后，被分配的指针不变，且仍然可以使用，但是里面的数据会清空。

如果要扩容Array有两种方式：

使用数组创建或者malloc或者calloc重新分配内存，然后复制之前的数据，最后添加新数据。
realloc：重新分配内存并复制之前的数据。realloc如果发现在原内存块后有可用内存，将会返回相同的指针地址（这样数组扩展的效率很高）。而且，realloc会自动释放（free）之前的内存块。

#include <stdio.h>
#include <stdlib.h>

int main(void){
    int *l1 = calloc(sizeof(int), 5);
    l1[0] = 123;
    int *l2 = realloc(l1, sizeof(int) * 10);
    
    printf("l1: %p\n", l1);
    printf("l2: %p\n", l2);
    printf("l2[0]: %d\n", l2[0]);

    // free(l1); 不需要
    free(l2);
}

结果：

l1: 0x55cdd5a7f3b0
l2: 0x55cdd5a7f3b0
l2[0]: 123

2. 链表

->符号是语法糖，a -> b等价于*a.b。

常用于用struct作为数据结构，然后保存了struct的指针（注意，Struct变量不是指针，而是类似于数据结构）。

相比于Array，每个数据数需要额外8个字节存储指针。

定义Node：

typedef struct node{
   int val;
   struct node *next;
} node;

赋值：

#include <stdio.h>
#include <stdlib.h>

typedef struct node{
   int val;
   struct node *next;
} node;

int main(void){
    node *n1 = malloc(sizeof(node));
    if (n1){
        (*n1).val = 100;
        printf("%d\n", (*n1).val);
        n1 -> val = 200;
        printf("%d\n", n1 -> val);
      	n1 -> next = NULL;
    }
}

结果：

100
200

注意(*n1).val必须加括号。

如果变量m已经是Node变量，那么(&m) -> val也需要加括号。

3 链表扩容

如果直接创建Node变量，所有的变量数据会存储在栈空间。

如果使用malloc扩容，所有的变量数据会存储在堆空间，注意，在程序结束的时候需要释放（free）所有的Node的内存。

注意：内存的释放必须从最后一个Node开始，因为不能使用已经释放的Node的内存。

一般会使用直接分配内存的方式，因为只有进入堆内存，才能被其他线程共享。

下面的代码实现了：

singlylinkedlist* clist()：创建一个空的单链表。
int add(singlylinkedlist *list, int n)：往链表里加一个元素。
int addarray(singlylinkedlist *list, int n, int *nums)：往链表里加一个Array的元素。
void delete(singlylinkedlist *list)：删除链表的最后一个元素
void freelist(singlylinkedlist *list)：释放链表所占内存。
void printlist(singlylinkedlist *list)：打印链表。

#include <stdlib.h>
#include <stdio.h>

// define a struct for node
typedef struct node{
   int val;
   struct node *next;
} node;

// define a struct for singly linked list
typedef struct{
   int size;
   struct node *first;
} singlylinkedlist;

// prototypes
singlylinkedlist* clist();
int add(singlylinkedlist *list, int n);
int addarray(singlylinkedlist *list, int n, int *nums);
void delete(singlylinkedlist *list);
void freelist(singlylinkedlist *list);
void printlist(singlylinkedlist *list);
node* _cnode(int n, node* nextnode);
node* _last(singlylinkedlist *list);

// create a empty singly linked list
singlylinkedlist* clist(){
    singlylinkedlist *list = malloc(sizeof(singlylinkedlist));
    if (list == NULL){
        return NULL;
    }
    list -> size = 0;
    list -> first = NULL;
    return list;
}

// add one node to the end of the slingly linked list
int add(singlylinkedlist *list, int n){
    if (list == NULL){
        return -1;
    }
    node *last = _last(list);
    node *new = _cnode(n, NULL);
    if (new == NULL){
        return 0;
    }
    if (last == NULL){
        list -> first = new;
        (list -> size)++;
        return 1;
    }
    last -> next = new;
    (list -> size)++;
    return 1;
}

// add nodes to the end of the slingly linked list from an array
int addarray(singlylinkedlist *list, int n, int *nums){
    if (n == 0 || list == NULL){
        return -1;
    }
    node *last = _last(list);
    node *tmp = _cnode(nums[0], NULL);
    if (tmp == NULL){
        return 0;
    }
    if (last == NULL){
        list -> first = tmp;
    } else{
        last -> next = tmp;
    }
    (list -> size)++;
    for (int i = 1; i < n; i++){
        tmp -> next = _cnode(nums[i], NULL);
        tmp = tmp -> next;
        if (tmp == NULL){
            return i;
        }
        (list -> size)++;
    }
    return n;
}

// delete the last node of the singly linked list
void delete(singlylinkedlist *list){
    if (list == NULL || list -> size == 0){
        return;
    }
    if (list -> size == 1){
        free(list -> first);
        list -> first = NULL;
        (list -> size)--;
        return;
    }
    node *tmp = list -> first;
    for (int i = 1; i < (list -> size) - 1; i++){
        tmp = tmp -> next;
    }
    free(tmp -> next);
    tmp -> next = NULL;
    (list -> size)--;
    return;
}

// free the allocated memory of a singly linked list
void freelist(singlylinkedlist *list){
    if (list == NULL){
        return;
    }
    node *tmp = list -> first;
    free(list);
    while (tmp != NULL){
        node* cur = tmp;
        tmp = tmp -> next;
        free(cur);
    }
}

// print the value of all nodes
void printlist(singlylinkedlist *list){
    if (list == NULL){
        return;
    }
    printf("[");
    node *tmp = list -> first;
    while (tmp -> next != NULL){
        printf("%d, ", tmp -> val);
        tmp = tmp -> next;
    }
    printf("%d]\n", tmp -> val);
}

// create a new node
node* _cnode(int n, node *nextnode){
    node *tmp = malloc(sizeof(node));
    if (tmp == NULL){
        return NULL;
    }
    tmp -> next = nextnode;
    tmp -> val = n;
    return tmp;
}

// create a new node
node* _last(singlylinkedlist *list){
    if (list == NULL || list -> size == 0){
        return NULL;
    }
    node *last = list -> first;
    for (int i = 1; i < list -> size; i++){
        last = last -> next;
    }
    return last;
}

测试：

int main(void){
    singlylinkedlist *list = clist();
    add(list, 100);
    printf("size: %d\n", list -> size);
    add(list, 200);
    printf("size: %d\n", list -> size);
    add(list, 300);
    printf("size: %d\n", list -> size);
    int nums[] = {400, 500, 600};
    addarray(list, 3, nums);
    printf("size: %d\n", list -> size);
    delete(list);
    delete(list);
    printlist(list);
    freelist(list);
}

结果：

size: 1
size: 2
size: 3
size: 6
[100, 200, 300, 400]

valgrind测试：

ubuntu@c-test-node:~/C/w5$ valgrind ./singlylinkedlist 
...
==55074== 
==55074== HEAP SUMMARY:
==55074==     in use at exit: 0 bytes in 0 blocks
==55074==   total heap usage: 11 allocs, 11 frees, 341 bytes allocated
==55074== 
==55074== All heap blocks were freed -- no leaks are possible
==55074== 
==55074== For lists of detected and suppressed errors, rerun with: -s
==55074== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

4. Trees

Node和链表一样，只不过存了两个指针：

typedef struct node{
  int val;
  struct node *left;
  struct node *right;
} node;

可以使用递归来free，遍历和搜索。

void free_tree(node *root)
{
    if (root == NULL)
    {
        return;
    }
    free_tree(root->left);
    free_tree(root->right);
    free(root);
}

void print_tree(node *root)
{
    if (root == NULL)
    {
        return;
    }
    print_tree(root->left);
    printf("%i\n", root->number);
    print_tree(root->right);
}

5. 其他数据结构

5.1 Hash Table

其实就是一个Node数组，比如储存字符串的Node：

typedef struct node{
  char word[LONGEST_WORD + 1];
  struct node *next;
} node;

那么哈希表就是：

node *hashtbale[NUMBER_OF_BUCKETS];

5.2 Trie

是一种特殊的树（前缀树），专门用于存字符串，每一个Node都是一张长度为26的哈希表（也可以理解为每一个Node都指向26个子Node，这26个子Node的指针存放在一个长度为26的Array中，如果指针为NULL说明不含有下一个字母为对应字母的单词，反之同理。如果全部为NULL，那么就是某个单词的最终字母。），哈希表里每一个元素都指向下一层的Node。每一个哈希表只和单词中一个字符有关，之后的字符由子节点处理。

typedef struct node{
  bool is_word;
  struct node *children[SIZE_OF_ALPHABET];
} node;

...
  
node *trie[SIZE_OF_ALPHABET];

好处：

Depth = Max Word Length。由于通常最大长度是有限的（英语单词），查找/插入/删除时间复杂度是$O(1)$。

坏处：

单词少的时候，数据结构很Sparse。

5.3 ADT

之所以叫ADT是因为我们只关心功能，不关心实现原理：

Queue：

enqueue
dequeue

Stack：

pop
push

Dictionary：

get
put

6. 关于创建Struct的注意事项

一个Struct是一系列数据的集合，可能是基本数据，也可能是指针。
一个Struct创建之后，其内部的所有数据在内存上是相邻的。
如果使用xxloc方法，这些数据的集合会存放在堆空间，即这些变量的指针都属于堆空间内存。反之，如果直接创建对象，这些数据的集合会存放在栈空间，即这些变量的指针都属于栈空间内存。
对于Struct中的指针变量，变量值（也是指针）和这个Struct在堆中还是在栈中无关，取决于这些变量的定义。
- 如果使用xxloc方法，则这个值指向堆空间。
- 如果申明时候为基本类型数组，那么这指针变量指向的一个元素的内存地址，其实属于这个Struct所包含的内存地址之一。这个数组里的所有数据都包含在个Struct的内存地址中。

PreviousWEEK 4 Memory NextWEEK6 Python

Last updated 2 years ago