diff --git a/C/Sketches/Sketch_05.1_CameraWebServer/Sketch_05.1_CameraWebServer.ino b/C/Sketches/Sketch_05.1_CameraWebServer/Sketch_05.1_CameraWebServer.ino index 674b880..6ea3678 100644 --- a/C/Sketches/Sketch_05.1_CameraWebServer/Sketch_05.1_CameraWebServer.ino +++ b/C/Sketches/Sketch_05.1_CameraWebServer/Sketch_05.1_CameraWebServer.ino @@ -8,9 +8,12 @@ #include "esp_camera.h" #include +// =================== // Select camera model +// =================== #define CAMERA_MODEL_WROVER_KIT // Has PSRAM //#define CAMERA_MODEL_ESP_EYE // Has PSRAM +//#define CAMERA_MODEL_ESP32S3_EYE // Has PSRAM //#define CAMERA_MODEL_M5STACK_PSRAM // Has PSRAM //#define CAMERA_MODEL_M5STACK_V2_PSRAM // M5Camera version B Has PSRAM //#define CAMERA_MODEL_M5STACK_WIDE // Has PSRAM @@ -18,6 +21,10 @@ //#define CAMERA_MODEL_M5STACK_UNITCAM // No PSRAM //#define CAMERA_MODEL_AI_THINKER // Has PSRAM //#define CAMERA_MODEL_TTGO_T_JOURNAL // No PSRAM +// ** Espressif Internal Boards ** +//#define CAMERA_MODEL_ESP32_CAM_BOARD +//#define CAMERA_MODEL_ESP32S2_CAM_BOARD +//#define CAMERA_MODEL_ESP32S3_CAM_LCD #include "camera_pins.h" @@ -35,8 +42,6 @@ void setup() { Serial.println(); config_init(); - config.frame_size = FRAMESIZE_VGA; - config.jpeg_quality = 10; // camera init esp_err_t err = esp_camera_init(&config); @@ -46,7 +51,7 @@ void setup() { } sensor_t * s = esp_camera_sensor_get(); - s->set_vflip(s, 1); //1-Upside down, 0-No operation + s->set_vflip(s, 0); //1-Upside down, 0-No operation s->set_hmirror(s, 0); //1-Reverse left and right, 0-No operation s->set_brightness(s, 1); //up the blightness just a bit s->set_saturation(s, -1); //lower the saturation @@ -90,6 +95,11 @@ void config_init() { config.pin_pwdn = PWDN_GPIO_NUM; config.pin_reset = RESET_GPIO_NUM; config.xclk_freq_hz = 20000000; - config.pixel_format = PIXFORMAT_JPEG; + config.frame_size = FRAMESIZE_QVGA; + config.pixel_format = PIXFORMAT_JPEG; // for streaming + //config.pixel_format = PIXFORMAT_RGB565; // for face detection/recognition + config.grab_mode = CAMERA_GRAB_WHEN_EMPTY; + config.fb_location = CAMERA_FB_IN_PSRAM; + config.jpeg_quality = 12; config.fb_count = 1; } diff --git a/C/Sketches/Sketch_05.1_CameraWebServer/app_httpd.cpp b/C/Sketches/Sketch_05.1_CameraWebServer/app_httpd.cpp index 2789647..91a072d 100644 --- a/C/Sketches/Sketch_05.1_CameraWebServer/app_httpd.cpp +++ b/C/Sketches/Sketch_05.1_CameraWebServer/app_httpd.cpp @@ -17,7 +17,6 @@ #include "img_converters.h" #include "fb_gfx.h" #include "driver/ledc.h" -//#include "camera_index.h" #include "sdkconfig.h" #include "camera_index.h" @@ -29,14 +28,37 @@ static const char *TAG = "camera_httpd"; #endif +// Face Detection will not work on boards without (or with disabled) PSRAM +#ifdef BOARD_HAS_PSRAM +#define CONFIG_ESP_FACE_DETECT_ENABLED 1 +// Face Recognition takes upward from 15 seconds per frame on chips other than ESP32S3 +// Makes no sense to have it enabled for them +#if CONFIG_IDF_TARGET_ESP32S3 +#define CONFIG_ESP_FACE_RECOGNITION_ENABLED 1 +#else +#define CONFIG_ESP_FACE_RECOGNITION_ENABLED 0 +#endif +#else +#define CONFIG_ESP_FACE_DETECT_ENABLED 0 +#define CONFIG_ESP_FACE_RECOGNITION_ENABLED 0 +#endif + #if CONFIG_ESP_FACE_DETECT_ENABLED -#include "fd_forward.h" +#include +#include "human_face_detect_msr01.hpp" +#include "human_face_detect_mnp01.hpp" + +#define TWO_STAGE 1 /* very large firmware, very slow, reboots when streaming... -#define ENROLL_CONFIRM_TIMES 5 #define FACE_ID_SAVE_NUMBER 7 #endif @@ -78,12 +100,24 @@ httpd_handle_t camera_httpd = NULL; static int8_t detection_enabled = 0; -static mtmn_config_t mtmn_config = {0}; +// #if TWO_STAGE +// static HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F); +// static HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5); +// #else +// static HumanFaceDetectMSR01 s1(0.3F, 0.5F, 10, 0.2F); +// #endif #if CONFIG_ESP_FACE_RECOGNITION_ENABLED static int8_t recognition_enabled = 0; static int8_t is_enrolling = 0; -static face_id_list id_list = {0}; + +#if QUANT_TYPE + // S16 model + FaceRecognition112V1S16 recognizer; +#else + // S8 model + FaceRecognition112V1S8 recognizer; +#endif #endif #endif @@ -114,6 +148,7 @@ static ra_filter_t *ra_filter_init(ra_filter_t *filter, size_t sample_size) return filter; } +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO static int ra_filter_run(ra_filter_t *filter, int value) { if (!filter->values) @@ -131,21 +166,16 @@ static int ra_filter_run(ra_filter_t *filter, int value) } return filter->sum / filter->count; } +#endif #if CONFIG_ESP_FACE_DETECT_ENABLED #if CONFIG_ESP_FACE_RECOGNITION_ENABLED -static void rgb_print(dl_matrix3du_t *image_matrix, uint32_t color, const char *str) +static void rgb_print(fb_data_t *fb, uint32_t color, const char *str) { - fb_data_t fb; - fb.width = image_matrix->w; - fb.height = image_matrix->h; - fb.data = image_matrix->item; - fb.bytes_per_pixel = 3; - fb.format = FB_BGR888; - fb_gfx_print(&fb, (fb.width - (strlen(str) * 14)) / 2, 10, color, str); + fb_gfx_print(fb, (fb->width - (strlen(str) * 14)) / 2, 10, color, str); } -static int rgb_printf(dl_matrix3du_t *image_matrix, uint32_t color, const char *format, ...) +static int rgb_printf(fb_data_t *fb, uint32_t color, const char *format, ...) { char loc_buf[64]; char *temp = loc_buf; @@ -166,7 +196,7 @@ static int rgb_printf(dl_matrix3du_t *image_matrix, uint32_t color, const char * } vsnprintf(temp, len + 1, format, arg); va_end(arg); - rgb_print(image_matrix, color, temp); + rgb_print(fb, color, temp); if (len > 64) { free(temp); @@ -174,9 +204,9 @@ static int rgb_printf(dl_matrix3du_t *image_matrix, uint32_t color, const char * return len; } #endif -static void draw_face_boxes(dl_matrix3du_t *image_matrix, box_array_t *boxes, int face_id) +static void draw_face_boxes(fb_data_t *fb, std::list *results, int face_id) { - int x, y, w, h, i; + int x, y, w, h; uint32_t color = FACE_COLOR_YELLOW; if (face_id < 0) { @@ -186,89 +216,64 @@ static void draw_face_boxes(dl_matrix3du_t *image_matrix, box_array_t *boxes, in { color = FACE_COLOR_GREEN; } - fb_data_t fb; - fb.width = image_matrix->w; - fb.height = image_matrix->h; - fb.data = image_matrix->item; - fb.bytes_per_pixel = 3; - fb.format = FB_BGR888; - for (i = 0; i < boxes->len; i++) + if(fb->bytes_per_pixel == 2){ + //color = ((color >> 8) & 0xF800) | ((color >> 3) & 0x07E0) | (color & 0x001F); + color = ((color >> 16) & 0x001F) | ((color >> 3) & 0x07E0) | ((color << 8) & 0xF800); + } + int i = 0; + for (std::list::iterator prediction = results->begin(); prediction != results->end(); prediction++, i++) { // rectangle box - x = (int)boxes->box[i].box_p[0]; - y = (int)boxes->box[i].box_p[1]; - w = (int)boxes->box[i].box_p[2] - x + 1; - h = (int)boxes->box[i].box_p[3] - y + 1; - fb_gfx_drawFastHLine(&fb, x, y, w, color); - fb_gfx_drawFastHLine(&fb, x, y + h - 1, w, color); - fb_gfx_drawFastVLine(&fb, x, y, h, color); - fb_gfx_drawFastVLine(&fb, x + w - 1, y, h, color); -#if 0 - // landmark + x = (int)prediction->box[0]; + y = (int)prediction->box[1]; + w = (int)prediction->box[2] - x + 1; + h = (int)prediction->box[3] - y + 1; + if((x + w) > fb->width){ + w = fb->width - x; + } + if((y + h) > fb->height){ + h = fb->height - y; + } + fb_gfx_drawFastHLine(fb, x, y, w, color); + fb_gfx_drawFastHLine(fb, x, y + h - 1, w, color); + fb_gfx_drawFastVLine(fb, x, y, h, color); + fb_gfx_drawFastVLine(fb, x + w - 1, y, h, color); +#if TWO_STAGE + // landmarks (left eye, mouth left, nose, right eye, mouth right) int x0, y0, j; for (j = 0; j < 10; j+=2) { - x0 = (int)boxes->landmark[i].landmark_p[j]; - y0 = (int)boxes->landmark[i].landmark_p[j+1]; - fb_gfx_fillRect(&fb, x0, y0, 3, 3, color); + x0 = (int)prediction->keypoint[j]; + y0 = (int)prediction->keypoint[j+1]; + fb_gfx_fillRect(fb, x0, y0, 3, 3, color); } #endif } } #if CONFIG_ESP_FACE_RECOGNITION_ENABLED -static int run_face_recognition(dl_matrix3du_t *image_matrix, box_array_t *net_boxes) +static int run_face_recognition(fb_data_t *fb, std::list *results) { - dl_matrix3du_t *aligned_face = NULL; - int matched_id = 0; + std::vector landmarks = results->front().keypoint; + int id = -1; - aligned_face = dl_matrix3du_alloc(1, FACE_WIDTH, FACE_HEIGHT, 3); - if (!aligned_face) - { - ESP_LOGE(TAG, "Could not allocate face recognition buffer"); - return matched_id; - } - if (align_face(net_boxes, image_matrix, aligned_face) == ESP_OK) - { - if (is_enrolling == 1) - { - int8_t left_sample_face = enroll_face(&id_list, aligned_face); + Tensor tensor; + tensor.set_element((uint8_t *)fb->data).set_shape({fb->height, fb->width, 3}).set_auto_free(false); - if (left_sample_face == (ENROLL_CONFIRM_TIMES - 1)) - { - ESP_LOGD(TAG, "Enrolling Face ID: %d", id_list.tail); - } - ESP_LOGD(TAG, "Enrolling Face ID: %d sample %d", id_list.tail, ENROLL_CONFIRM_TIMES - left_sample_face); - rgb_printf(image_matrix, FACE_COLOR_CYAN, "ID[%u] Sample[%u]", id_list.tail, ENROLL_CONFIRM_TIMES - left_sample_face); - if (left_sample_face == 0) - { - is_enrolling = 0; - ESP_LOGD(TAG, "Enrolled Face ID: %d", id_list.tail); - } - } - else - { - matched_id = recognize_face(&id_list, aligned_face); - if (matched_id >= 0) - { - ESP_LOGW(TAG, "Match Face ID: %u", matched_id); - rgb_printf(image_matrix, FACE_COLOR_GREEN, "Hello Subject %u", matched_id); - } - else - { - ESP_LOGW(TAG, "No Match Found"); - rgb_print(image_matrix, FACE_COLOR_RED, "Intruder Alert!"); - matched_id = -1; - } - } - } - else - { - ESP_LOGW(TAG, "Face Not Aligned"); - //rgb_print(image_matrix, FACE_COLOR_YELLOW, "Human Detected"); + int enrolled_count = recognizer.get_enrolled_id_num(); + + if (enrolled_count < FACE_ID_SAVE_NUMBER && is_enrolling){ + id = recognizer.enroll_id(tensor, landmarks, "", true); + ESP_LOGI(TAG, "Enrolled ID: %d", id); + rgb_printf(fb, FACE_COLOR_CYAN, "ID[%u]", id); } - dl_matrix3du_free(aligned_face); - return matched_id; + face_info_t recognize = recognizer.recognize(tensor, landmarks); + if(recognize.id >= 0){ + rgb_printf(fb, FACE_COLOR_GREEN, "ID[%u]: %.2f", recognize.id, recognize.similarity); + } else { + rgb_print(fb, FACE_COLOR_RED, "Intruder Alert!"); + } + return recognize.id; } #endif #endif @@ -291,7 +296,9 @@ static esp_err_t bmp_handler(httpd_req_t *req) { camera_fb_t *fb = NULL; esp_err_t res = ESP_OK; +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO uint64_t fr_start = esp_timer_get_time(); +#endif fb = esp_camera_fb_get(); if (!fb) { @@ -320,7 +327,9 @@ static esp_err_t bmp_handler(httpd_req_t *req) } res = httpd_resp_send(req, (const char *)buf, buf_len); free(buf); +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO uint64_t fr_end = esp_timer_get_time(); +#endif ESP_LOGI(TAG, "BMP: %llums, %uB", (uint64_t)((fr_end - fr_start) / 1000), buf_len); return res; } @@ -344,7 +353,9 @@ static esp_err_t capture_handler(httpd_req_t *req) { camera_fb_t *fb = NULL; esp_err_t res = ESP_OK; +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO int64_t fr_start = esp_timer_get_time(); +#endif #ifdef CONFIG_LED_ILLUMINATOR_ENABLED enable_led(true); @@ -374,15 +385,21 @@ static esp_err_t capture_handler(httpd_req_t *req) size_t out_len, out_width, out_height; uint8_t *out_buf; bool s; +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO bool detected = false; +#endif int face_id = 0; if (!detection_enabled || fb->width > 400) { #endif +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO size_t fb_len = 0; +#endif if (fb->format == PIXFORMAT_JPEG) { +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO fb_len = fb->len; +#endif res = httpd_resp_send(req, (const char *)fb->buf, fb->len); } else @@ -390,68 +407,110 @@ static esp_err_t capture_handler(httpd_req_t *req) jpg_chunking_t jchunk = {req, 0}; res = frame2jpg_cb(fb, 80, jpg_encode_stream, &jchunk) ? ESP_OK : ESP_FAIL; httpd_resp_send_chunk(req, NULL, 0); +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO fb_len = jchunk.len; +#endif } esp_camera_fb_return(fb); +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO int64_t fr_end = esp_timer_get_time(); +#endif ESP_LOGI(TAG, "JPG: %uB %ums", (uint32_t)(fb_len), (uint32_t)((fr_end - fr_start) / 1000)); return res; #if CONFIG_ESP_FACE_DETECT_ENABLED } - dl_matrix3du_t *image_matrix = dl_matrix3du_alloc(1, fb->width, fb->height, 3); - if (!image_matrix) - { - esp_camera_fb_return(fb); - ESP_LOGE(TAG, "dl_matrix3du_alloc failed"); - httpd_resp_send_500(req); - return ESP_FAIL; - } - - out_buf = image_matrix->item; - out_len = fb->width * fb->height * 3; - out_width = fb->width; - out_height = fb->height; - - s = fmt2rgb888(fb->buf, fb->len, fb->format, out_buf); - esp_camera_fb_return(fb); - if (!s) - { - dl_matrix3du_free(image_matrix); - ESP_LOGE(TAG, "to rgb888 failed"); - httpd_resp_send_500(req); - return ESP_FAIL; - } - - box_array_t *net_boxes = face_detect(image_matrix, &mtmn_config); - - if (net_boxes) - { - detected = true; -#if CONFIG_ESP_FACE_RECOGNITION_ENABLED - if (recognition_enabled) - { - face_id = run_face_recognition(image_matrix, net_boxes); - } -#endif - draw_face_boxes(image_matrix, net_boxes, face_id); - dl_lib_free(net_boxes->score); - dl_lib_free(net_boxes->box); - if (net_boxes->landmark != NULL) - dl_lib_free(net_boxes->landmark); - dl_lib_free(net_boxes); - } - jpg_chunking_t jchunk = {req, 0}; - s = fmt2jpg_cb(out_buf, out_len, out_width, out_height, PIXFORMAT_RGB888, 90, jpg_encode_stream, &jchunk); - dl_matrix3du_free(image_matrix); - if (!s) + + if (fb->format == PIXFORMAT_RGB565 +#if CONFIG_ESP_FACE_RECOGNITION_ENABLED + && !recognition_enabled +#endif + ){ +#if TWO_STAGE + HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F); + HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5); + std::list &candidates = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}); + std::list &results = s2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}, candidates); +#else + HumanFaceDetectMSR01 s1(0.3F, 0.5F, 10, 0.2F); + std::list &results = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}); +#endif + if (results.size() > 0) { + fb_data_t rfb; + rfb.width = fb->width; + rfb.height = fb->height; + rfb.data = fb->buf; + rfb.bytes_per_pixel = 2; + rfb.format = FB_RGB565; +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + detected = true; +#endif + draw_face_boxes(&rfb, &results, face_id); + } + s = fmt2jpg_cb(fb->buf, fb->len, fb->width, fb->height, PIXFORMAT_RGB565, 90, jpg_encode_stream, &jchunk); + esp_camera_fb_return(fb); + } else { - ESP_LOGE(TAG, "JPEG compression failed"); - return ESP_FAIL; + out_len = fb->width * fb->height * 3; + out_width = fb->width; + out_height = fb->height; + out_buf = (uint8_t*)malloc(out_len); + if (!out_buf) { + ESP_LOGE(TAG, "out_buf malloc failed"); + httpd_resp_send_500(req); + return ESP_FAIL; + } + s = fmt2rgb888(fb->buf, fb->len, fb->format, out_buf); + esp_camera_fb_return(fb); + if (!s) { + free(out_buf); + ESP_LOGE(TAG, "to rgb888 failed"); + httpd_resp_send_500(req); + return ESP_FAIL; + } + + fb_data_t rfb; + rfb.width = out_width; + rfb.height = out_height; + rfb.data = out_buf; + rfb.bytes_per_pixel = 3; + rfb.format = FB_BGR888; + +#if TWO_STAGE + HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F); + HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5); + std::list &candidates = s1.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3}); + std::list &results = s2.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3}, candidates); +#else + HumanFaceDetectMSR01 s1(0.3F, 0.5F, 10, 0.2F); + std::list &results = s1.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3}); +#endif + + if (results.size() > 0) { +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + detected = true; +#endif +#if CONFIG_ESP_FACE_RECOGNITION_ENABLED + if (recognition_enabled) { + face_id = run_face_recognition(&rfb, &results); + } +#endif + draw_face_boxes(&rfb, &results, face_id); + } + + s = fmt2jpg_cb(out_buf, out_len, out_width, out_height, PIXFORMAT_RGB888, 90, jpg_encode_stream, &jchunk); + free(out_buf); } + if (!s) { + ESP_LOGE(TAG, "JPEG compression failed"); + httpd_resp_send_500(req); + return ESP_FAIL; + } +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO int64_t fr_end = esp_timer_get_time(); +#endif ESP_LOGI(TAG, "FACE: %uB %ums %s%d", (uint32_t)(jchunk.len), (uint32_t)((fr_end - fr_start) / 1000), detected ? "DETECTED " : "", face_id); return res; #endif @@ -466,14 +525,24 @@ static esp_err_t stream_handler(httpd_req_t *req) uint8_t *_jpg_buf = NULL; char *part_buf[128]; #if CONFIG_ESP_FACE_DETECT_ENABLED - dl_matrix3du_t *image_matrix = NULL; - bool detected = false; + #if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + bool detected = false; + int64_t fr_ready = 0; + int64_t fr_recognize = 0; + int64_t fr_encode = 0; + int64_t fr_face = 0; + int64_t fr_start = 0; + #endif int face_id = 0; - int64_t fr_start = 0; - int64_t fr_ready = 0; - int64_t fr_face = 0; - int64_t fr_recognize = 0; - int64_t fr_encode = 0; + size_t out_len = 0, out_width = 0, out_height = 0; + uint8_t *out_buf = NULL; + bool s = false; +#if TWO_STAGE + HumanFaceDetectMSR01 s1(0.1F, 0.5F, 10, 0.2F); + HumanFaceDetectMNP01 s2(0.5F, 0.3F, 5); +#else + HumanFaceDetectMSR01 s1(0.3F, 0.5F, 10, 0.2F); +#endif #endif static int64_t last_frame = 0; @@ -499,7 +568,9 @@ static esp_err_t stream_handler(httpd_req_t *req) while (true) { #if CONFIG_ESP_FACE_DETECT_ENABLED + #if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO detected = false; + #endif face_id = 0; #endif @@ -514,11 +585,13 @@ static esp_err_t stream_handler(httpd_req_t *req) _timestamp.tv_sec = fb->timestamp.tv_sec; _timestamp.tv_usec = fb->timestamp.tv_usec; #if CONFIG_ESP_FACE_DETECT_ENABLED + #if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO fr_start = esp_timer_get_time(); fr_ready = fr_start; - fr_face = fr_start; fr_encode = fr_start; fr_recognize = fr_start; + fr_face = fr_start; + #endif if (!detection_enabled || fb->width > 400) { #endif @@ -542,65 +615,112 @@ static esp_err_t stream_handler(httpd_req_t *req) } else { - - image_matrix = dl_matrix3du_alloc(1, fb->width, fb->height, 3); - - if (!image_matrix) - { - ESP_LOGE(TAG, "dl_matrix3du_alloc failed"); - res = ESP_FAIL; - } - else - { - if (!fmt2rgb888(fb->buf, fb->len, fb->format, image_matrix->item)) - { - ESP_LOGE(TAG, "fmt2rgb888 failed"); + if (fb->format == PIXFORMAT_RGB565 +#if CONFIG_ESP_FACE_RECOGNITION_ENABLED + && !recognition_enabled +#endif + ){ +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + fr_ready = esp_timer_get_time(); +#endif +#if TWO_STAGE + std::list &candidates = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}); + std::list &results = s2.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}, candidates); +#else + std::list &results = s1.infer((uint16_t *)fb->buf, {(int)fb->height, (int)fb->width, 3}); +#endif +#if CONFIG_ESP_FACE_DETECT_ENABLED && ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + fr_face = esp_timer_get_time(); + fr_recognize = fr_face; +#endif + if (results.size() > 0) { + fb_data_t rfb; + rfb.width = fb->width; + rfb.height = fb->height; + rfb.data = fb->buf; + rfb.bytes_per_pixel = 2; + rfb.format = FB_RGB565; +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + detected = true; +#endif + draw_face_boxes(&rfb, &results, face_id); + } + s = fmt2jpg(fb->buf, fb->len, fb->width, fb->height, PIXFORMAT_RGB565, 80, &_jpg_buf, &_jpg_buf_len); + esp_camera_fb_return(fb); + fb = NULL; + if (!s) { + ESP_LOGE(TAG, "fmt2jpg failed"); res = ESP_FAIL; } - else - { - fr_ready = esp_timer_get_time(); - box_array_t *net_boxes = NULL; - if (detection_enabled) - { - net_boxes = face_detect(image_matrix, &mtmn_config); - } - fr_face = esp_timer_get_time(); - fr_recognize = fr_face; - if (net_boxes || fb->format != PIXFORMAT_JPEG) - { - if (net_boxes) - { - detected = true; -#if CONFIG_ESP_FACE_RECOGNITION_ENABLED - if (recognition_enabled) - { - face_id = run_face_recognition(image_matrix, net_boxes); - } - fr_recognize = esp_timer_get_time(); +#if CONFIG_ESP_FACE_DETECT_ENABLED && ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + fr_encode = esp_timer_get_time(); #endif - draw_face_boxes(image_matrix, net_boxes, face_id); - dl_lib_free(net_boxes->score); - dl_lib_free(net_boxes->box); - if (net_boxes->landmark != NULL) - dl_lib_free(net_boxes->landmark); - dl_lib_free(net_boxes); + } else + { + out_len = fb->width * fb->height * 3; + out_width = fb->width; + out_height = fb->height; + out_buf = (uint8_t*)malloc(out_len); + if (!out_buf) { + ESP_LOGE(TAG, "out_buf malloc failed"); + res = ESP_FAIL; + } else { + s = fmt2rgb888(fb->buf, fb->len, fb->format, out_buf); + esp_camera_fb_return(fb); + fb = NULL; + if (!s) { + free(out_buf); + ESP_LOGE(TAG, "to rgb888 failed"); + res = ESP_FAIL; + } else { +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + fr_ready = esp_timer_get_time(); +#endif + + fb_data_t rfb; + rfb.width = out_width; + rfb.height = out_height; + rfb.data = out_buf; + rfb.bytes_per_pixel = 3; + rfb.format = FB_BGR888; + +#if TWO_STAGE + std::list &candidates = s1.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3}); + std::list &results = s2.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3}, candidates); +#else + std::list &results = s1.infer((uint8_t *)out_buf, {(int)out_height, (int)out_width, 3}); +#endif + +#if CONFIG_ESP_FACE_DETECT_ENABLED && ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + fr_face = esp_timer_get_time(); + fr_recognize = fr_face; +#endif + + if (results.size() > 0) { +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + detected = true; +#endif +#if CONFIG_ESP_FACE_RECOGNITION_ENABLED + if (recognition_enabled) { + face_id = run_face_recognition(&rfb, &results); + #if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + fr_recognize = esp_timer_get_time(); + #endif + } +#endif + draw_face_boxes(&rfb, &results, face_id); } - if (!fmt2jpg(image_matrix->item, fb->width * fb->height * 3, fb->width, fb->height, PIXFORMAT_RGB888, 90, &_jpg_buf, &_jpg_buf_len)) - { + s = fmt2jpg(out_buf, out_len, out_width, out_height, PIXFORMAT_RGB888, 90, &_jpg_buf, &_jpg_buf_len); + free(out_buf); + if (!s) { ESP_LOGE(TAG, "fmt2jpg failed"); + res = ESP_FAIL; } - esp_camera_fb_return(fb); - fb = NULL; +#if CONFIG_ESP_FACE_DETECT_ENABLED && ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO + fr_encode = esp_timer_get_time(); +#endif } - else - { - _jpg_buf = fb->buf; - _jpg_buf_len = fb->len; - } - fr_encode = esp_timer_get_time(); } - dl_matrix3du_free(image_matrix); } } #endif @@ -631,11 +751,12 @@ static esp_err_t stream_handler(httpd_req_t *req) } if (res != ESP_OK) { + ESP_LOGE(TAG, "send frame failed failed"); break; } int64_t fr_end = esp_timer_get_time(); -#if CONFIG_ESP_FACE_DETECT_ENABLED +#if CONFIG_ESP_FACE_DETECT_ENABLED && ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO int64_t ready_time = (fr_ready - fr_start) / 1000; int64_t face_time = (fr_face - fr_ready) / 1000; int64_t recognize_time = (fr_recognize - fr_face) / 1000; @@ -644,9 +765,10 @@ static esp_err_t stream_handler(httpd_req_t *req) #endif int64_t frame_time = fr_end - last_frame; - last_frame = fr_end; frame_time /= 1000; +#if ARDUHAL_LOG_LEVEL >= ARDUHAL_LOG_LEVEL_INFO uint32_t avg_frame_time = ra_filter_run(&ra_filter, frame_time); +#endif ESP_LOGI(TAG, "MJPG: %uB %ums (%.1ffps), AVG: %ums (%.1ffps)" #if CONFIG_ESP_FACE_DETECT_ENABLED ", %u+%u+%u+%u=%u %s%d" @@ -668,7 +790,6 @@ static esp_err_t stream_handler(httpd_req_t *req) enable_led(false); #endif - last_frame = 0; return res; } @@ -785,8 +906,10 @@ static esp_err_t cmd_handler(httpd_req_t *req) #endif } #if CONFIG_ESP_FACE_RECOGNITION_ENABLED - else if (!strcmp(variable, "face_enroll")) - is_enrolling = val; + else if (!strcmp(variable, "face_enroll")){ + is_enrolling = !is_enrolling; + ESP_LOGI(TAG, "Enrolling: %s", is_enrolling?"true":"false"); + } else if (!strcmp(variable, "face_recognize")) { recognition_enabled = val; if (recognition_enabled) { @@ -1087,90 +1210,152 @@ void startCameraServer() .uri = "/", .method = HTTP_GET, .handler = index_handler, - .user_ctx = NULL}; + .user_ctx = NULL +#ifdef CONFIG_HTTPD_WS_SUPPORT + , + .is_websocket = true, + .handle_ws_control_frames = false, + .supported_subprotocol = NULL +#endif + }; httpd_uri_t status_uri = { .uri = "/status", .method = HTTP_GET, .handler = status_handler, - .user_ctx = NULL}; + .user_ctx = NULL +#ifdef CONFIG_HTTPD_WS_SUPPORT + , + .is_websocket = true, + .handle_ws_control_frames = false, + .supported_subprotocol = NULL +#endif + }; httpd_uri_t cmd_uri = { .uri = "/control", .method = HTTP_GET, .handler = cmd_handler, - .user_ctx = NULL}; + .user_ctx = NULL +#ifdef CONFIG_HTTPD_WS_SUPPORT + , + .is_websocket = true, + .handle_ws_control_frames = false, + .supported_subprotocol = NULL +#endif + }; httpd_uri_t capture_uri = { .uri = "/capture", .method = HTTP_GET, .handler = capture_handler, - .user_ctx = NULL}; + .user_ctx = NULL +#ifdef CONFIG_HTTPD_WS_SUPPORT + , + .is_websocket = true, + .handle_ws_control_frames = false, + .supported_subprotocol = NULL +#endif + }; httpd_uri_t stream_uri = { .uri = "/stream", .method = HTTP_GET, .handler = stream_handler, - .user_ctx = NULL}; + .user_ctx = NULL +#ifdef CONFIG_HTTPD_WS_SUPPORT + , + .is_websocket = true, + .handle_ws_control_frames = false, + .supported_subprotocol = NULL +#endif + }; httpd_uri_t bmp_uri = { .uri = "/bmp", .method = HTTP_GET, .handler = bmp_handler, - .user_ctx = NULL}; + .user_ctx = NULL +#ifdef CONFIG_HTTPD_WS_SUPPORT + , + .is_websocket = true, + .handle_ws_control_frames = false, + .supported_subprotocol = NULL +#endif + }; httpd_uri_t xclk_uri = { .uri = "/xclk", .method = HTTP_GET, .handler = xclk_handler, - .user_ctx = NULL}; + .user_ctx = NULL +#ifdef CONFIG_HTTPD_WS_SUPPORT + , + .is_websocket = true, + .handle_ws_control_frames = false, + .supported_subprotocol = NULL +#endif + }; httpd_uri_t reg_uri = { .uri = "/reg", .method = HTTP_GET, .handler = reg_handler, - .user_ctx = NULL}; + .user_ctx = NULL +#ifdef CONFIG_HTTPD_WS_SUPPORT + , + .is_websocket = true, + .handle_ws_control_frames = false, + .supported_subprotocol = NULL +#endif + }; httpd_uri_t greg_uri = { .uri = "/greg", .method = HTTP_GET, .handler = greg_handler, - .user_ctx = NULL}; + .user_ctx = NULL +#ifdef CONFIG_HTTPD_WS_SUPPORT + , + .is_websocket = true, + .handle_ws_control_frames = false, + .supported_subprotocol = NULL +#endif + }; httpd_uri_t pll_uri = { .uri = "/pll", .method = HTTP_GET, .handler = pll_handler, - .user_ctx = NULL}; + .user_ctx = NULL +#ifdef CONFIG_HTTPD_WS_SUPPORT + , + .is_websocket = true, + .handle_ws_control_frames = false, + .supported_subprotocol = NULL +#endif + }; httpd_uri_t win_uri = { .uri = "/resolution", .method = HTTP_GET, .handler = win_handler, - .user_ctx = NULL}; + .user_ctx = NULL +#ifdef CONFIG_HTTPD_WS_SUPPORT + , + .is_websocket = true, + .handle_ws_control_frames = false, + .supported_subprotocol = NULL +#endif + }; ra_filter_init(&ra_filter, 20); -#if CONFIG_ESP_FACE_DETECT_ENABLED - - mtmn_config.type = FAST; - mtmn_config.min_face = 80; - mtmn_config.pyramid = 0.707; - mtmn_config.pyramid_times = 4; - mtmn_config.p_threshold.score = 0.6; - mtmn_config.p_threshold.nms = 0.7; - mtmn_config.p_threshold.candidate_number = 20; - mtmn_config.r_threshold.score = 0.7; - mtmn_config.r_threshold.nms = 0.7; - mtmn_config.r_threshold.candidate_number = 10; - mtmn_config.o_threshold.score = 0.7; - mtmn_config.o_threshold.nms = 0.7; - mtmn_config.o_threshold.candidate_number = 1; - #if CONFIG_ESP_FACE_RECOGNITION_ENABLED - face_id_init(&id_list, FACE_ID_SAVE_NUMBER, ENROLL_CONFIRM_TIMES); -#endif + recognizer.set_partition(ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, "fr"); + // load ids from flash partition + recognizer.set_ids_from_flash(); #endif ESP_LOGI(TAG, "Starting web server on port: '%d'", config.server_port); if (httpd_start(&camera_httpd, &config) == ESP_OK) diff --git a/C/Sketches/Sketch_05.1_CameraWebServer/camera_index.h b/C/Sketches/Sketch_05.1_CameraWebServer/camera_index.h index c1072a3..fa42e69 100644 --- a/C/Sketches/Sketch_05.1_CameraWebServer/camera_index.h +++ b/C/Sketches/Sketch_05.1_CameraWebServer/camera_index.h @@ -1568,3 +1568,4 @@ const uint8_t index_ov5640_html_gz[] = { 0x05, 0x83, 0x47, 0x67, 0x87, 0x33, 0x7F, 0x6E, 0x0D, 0x1E, 0xFD, 0x2F, 0x5E, 0x72, 0x0B, 0x89, 0x42, 0x10, 0x01, 0x00 }; + diff --git a/C/Sketches/Sketch_05.1_CameraWebServer/camera_pins.h b/C/Sketches/Sketch_05.1_CameraWebServer/camera_pins.h index 8b7e1d8..e1be287 100644 --- a/C/Sketches/Sketch_05.1_CameraWebServer/camera_pins.h +++ b/C/Sketches/Sketch_05.1_CameraWebServer/camera_pins.h @@ -170,6 +170,104 @@ #define HREF_GPIO_NUM 26 #define PCLK_GPIO_NUM 21 + +#elif defined(CAMERA_MODEL_ESP32_CAM_BOARD) +// The 18 pin header on the board has Y5 and Y3 swapped +#define USE_BOARD_HEADER 0 +#define PWDN_GPIO_NUM 32 +#define RESET_GPIO_NUM 33 +#define XCLK_GPIO_NUM 4 +#define SIOD_GPIO_NUM 18 +#define SIOC_GPIO_NUM 23 + +#define Y9_GPIO_NUM 36 +#define Y8_GPIO_NUM 19 +#define Y7_GPIO_NUM 21 +#define Y6_GPIO_NUM 39 +#if USE_BOARD_HEADER +#define Y5_GPIO_NUM 13 +#else +#define Y5_GPIO_NUM 35 +#endif +#define Y4_GPIO_NUM 14 +#if USE_BOARD_HEADER +#define Y3_GPIO_NUM 35 +#else +#define Y3_GPIO_NUM 13 +#endif +#define Y2_GPIO_NUM 34 +#define VSYNC_GPIO_NUM 5 +#define HREF_GPIO_NUM 27 +#define PCLK_GPIO_NUM 25 + +#elif defined(CAMERA_MODEL_ESP32S3_CAM_LCD) +#define PWDN_GPIO_NUM -1 +#define RESET_GPIO_NUM -1 +#define XCLK_GPIO_NUM 40 +#define SIOD_GPIO_NUM 17 +#define SIOC_GPIO_NUM 18 + +#define Y9_GPIO_NUM 39 +#define Y8_GPIO_NUM 41 +#define Y7_GPIO_NUM 42 +#define Y6_GPIO_NUM 12 +#define Y5_GPIO_NUM 3 +#define Y4_GPIO_NUM 14 +#define Y3_GPIO_NUM 47 +#define Y2_GPIO_NUM 13 +#define VSYNC_GPIO_NUM 21 +#define HREF_GPIO_NUM 38 +#define PCLK_GPIO_NUM 11 + +#elif defined(CAMERA_MODEL_ESP32S2_CAM_BOARD) +// The 18 pin header on the board has Y5 and Y3 swapped +#define USE_BOARD_HEADER 0 +#define PWDN_GPIO_NUM 1 +#define RESET_GPIO_NUM 2 +#define XCLK_GPIO_NUM 42 +#define SIOD_GPIO_NUM 41 +#define SIOC_GPIO_NUM 18 + +#define Y9_GPIO_NUM 16 +#define Y8_GPIO_NUM 39 +#define Y7_GPIO_NUM 40 +#define Y6_GPIO_NUM 15 +#if USE_BOARD_HEADER +#define Y5_GPIO_NUM 12 +#else +#define Y5_GPIO_NUM 13 +#endif +#define Y4_GPIO_NUM 5 +#if USE_BOARD_HEADER +#define Y3_GPIO_NUM 13 +#else +#define Y3_GPIO_NUM 12 +#endif +#define Y2_GPIO_NUM 14 +#define VSYNC_GPIO_NUM 38 +#define HREF_GPIO_NUM 4 +#define PCLK_GPIO_NUM 3 + +#elif defined(CAMERA_MODEL_ESP32S3_EYE) +#define PWDN_GPIO_NUM -1 +#define RESET_GPIO_NUM -1 +#define XCLK_GPIO_NUM 15 +#define SIOD_GPIO_NUM 4 +#define SIOC_GPIO_NUM 5 + +#define Y2_GPIO_NUM 11 +#define Y3_GPIO_NUM 9 +#define Y4_GPIO_NUM 8 +#define Y5_GPIO_NUM 10 +#define Y6_GPIO_NUM 12 +#define Y7_GPIO_NUM 18 +#define Y8_GPIO_NUM 17 +#define Y9_GPIO_NUM 16 + +#define VSYNC_GPIO_NUM 6 +#define HREF_GPIO_NUM 7 +#define PCLK_GPIO_NUM 13 + #else #error "Camera model not selected" #endif