Upload 6 files
Browse filesadded scripts for creating the model and loading it onto the raspberry pi
- create-model/computer_requirements.txt +6 -0
- create-model/create_image_classification_model.ipynb +0 -0
- create-model/ +40 -0
- create-model/ +35 -0
- rpi-object-detection/ +217 -0
- rpi-object-detection/rpi_requirements.txt +5 -0
@@ -0,0 +1,6 @@
1 |
tensorflow == 2.9.1
2 |
Pillow == 9.2.0
3 |
numpy == 1.23.2
4 |
opencv-python ==
5 |
matplotlib == 3.5.3
6 |
scikit-learn == 1.1.2
The diff for this file is too large to render.
See raw diff
@@ -0,0 +1,40 @@
1 |
# script to create training data npy file from the database of images
2 |
# the npy file can then be uploaded to google drive and read in the jupyter notebook
3 |
# can then create training_data for model training
4 |
5 |
import os
6 |
import cv2
7 |
import numpy as np
8 |
9 |
# initialize target image size for the training and testing data
10 |
img_height = 128
11 |
img_width = 128
12 |
13 |
categories = ["straight-liftarm", 'pins', 'bent-liftarm', 'gears-and-disc', 'special-connector', 'axles', 'axle-connectors-stoppers']
14 |
15 |
training_data = []
16 |
def get_category_images(list,path,label):
17 |
#print("old:", str(len(training_data)))
18 |
current = len(training_data)
19 |
for i in range(len(list)):
20 |
21 |
image = cv2.imread(os.path.join(path,list[i]),
22 |
23 |
image = cv2.resize(image, (128,128))
24 |
training_data.append([image, label])
25 |
except Exception:
26 |
27 |
new = len(training_data)
28 |
print(new - current)
29 |
30 |
31 |
for cat in categories:
32 |
cat_path = "RPI3_project/lego-test-data/database/" + cat
33 |
cat_list = os.listdir(cat_path)
34 |
cat_label = categories.index(cat)
35 |
get_category_images(cat_list, cat_path, cat_label)
36 |
37 |
38 |
td_array = np.array(training_data)
39 |
40 |
+'td_array_7cat', td_array)
@@ -0,0 +1,35 @@
1 |
# to test tflite model on individual images
2 |
# run on your own computer as raspberry pi can't install tensorflow, and we need the img_to_array function
3 |
4 |
import numpy as np
5 |
import tensorflow as tf
6 |
from tensorflow.keras.preprocessing.image import load_img
7 |
from tensorflow.keras.preprocessing.image import img_to_array
8 |
from PIL import Image, ImageOps
9 |
10 |
11 |
# Load TFLite model and allocate tensors.
12 |
interpreter = tf.lite.Interpreter(model_path="OGmodel.tflite")
13 |
14 |
15 |
# Get input and output tensors.
16 |
input_details = interpreter.get_input_details()
17 |
output_details = interpreter.get_output_details()
18 |
19 |
# Test model on random input data.
20 |
input_shape = input_details[0]['shape']
21 |
input_image ='lego-testing/testing/12image.jpg')
22 |
input_image = ImageOps.grayscale(input_image)
23 |
input_image = input_image.resize((28,28))
24 |
25 |
input_data = img_to_array(input_image)
26 |
27 |
#input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32)
28 |
interpreter.set_tensor(input_details[0]['index'], input_data)
29 |
30 |
31 |
32 |
# The function `get_tensor()` returns a copy of the tensor data.
33 |
# Use `tensor()` in order to get a pointer to the tensor.
34 |
output_data = interpreter.get_tensor(output_details[0]['index'])
35 |
@@ -0,0 +1,217 @@
1 |
# This program combines motion detection and object classification. It will ouput the most probable category of lego pieces
2 |
# after the picamera detects it in realtime.
3 |
# The motion detection portion of the script was adapted from pyimagesearch's project
4 |
# 'Building a Raspberry Pi security camera with OpenCV' and can be found at
5 |
6 |
7 |
# To run, open the terminal in RPI and navigate to folder containing the python script.
8 |
# Run python3 'path_to_script' --conf conf.json
9 |
10 |
# This script, when run, will activate the picamera to detect motion of objects (preferably against a white background)
11 |
# and enclose it in a green boundary box.
12 |
# If successive frames of motion is detected by the picamera, the boundary box will be extracted and image saved to a
13 |
# pre-specified folder in the RPI. The image contrast will be increased, and resized before being converted into an input tensor.
14 |
# The input tensor will be passed into the interpretor (a tensorflow lite model) which will output a probability vector.
15 |
# The vector index of the highest probability will be extracted to output the most likely class of the lego piece.
16 |
17 |
# This script can be modified to take the images required for the database. The motionCounter can be decreased to take more images.
18 |
19 |
from picamera.array import PiRGBArray
20 |
from picamera import PiCamera
21 |
import argparse
22 |
import warnings
23 |
import datetime
24 |
import imutils
25 |
import json
26 |
import time
27 |
import cv2
28 |
import os
29 |
30 |
#imports and initialisations for image recognition
31 |
from tflite_runtime.interpreter import Interpreter
32 |
from PIL import Image, ImageOps
33 |
import numpy as np
34 |
35 |
# Load TFLite model and allocate tensors.
36 |
interpreter = Interpreter(model_path="lego_tflite_model/detect.tflite") # insert path to the tflite model
37 |
38 |
path = r'/home/nullspacepi/Desktop/opencv-test/lego-pieces' # create variable for path to where camera pictures will be saved to
39 |
40 |
# Get input and output tensors.
41 |
input_details = interpreter.get_input_details()
42 |
output_details = interpreter.get_output_details()
43 |
input_shape = input_details[0]['shape']
44 |
45 |
# define a function that will convert the image captured into an array
46 |
def img_to_array(img, data_format='channels_last', dtype='float32'):
47 |
if data_format not in {'channels_first', 'channels_last'}:
48 |
raise ValueError('Unknown data_format: %s' % data_format)
49 |
50 |
x = np.asarray(img, dtype=dtype)
51 |
if len(x.shape) == 3:
52 |
if data_format == 'channels_first':
53 |
x = x.transpose(2, 0, 1)
54 |
elif len(x.shape) == 2:
55 |
if data_format == 'channels_first':
56 |
x = x.reshape((1, x.shape[0], x.shape[1]))
57 |
58 |
x = x.reshape((x.shape[0], x.shape[1], 1))
59 |
60 |
raise ValueError('Unsupported image shape: %s' % (x.shape,))
61 |
return x
62 |
63 |
# define a function that will increase the contrast of the image by manipulating its array. This will increase the likelihood
64 |
# of its features to be detected by the image classification tensorflow model
65 |
def increase_contrast_more(s):
66 |
minval = np.percentile(s, 2)
67 |
maxval = np.percentile(s, 98)
68 |
npImage = np.clip(s, minval, maxval)
69 |
70 |
npImage = npImage.astype(int)
71 |
72 |
min=np.min(npImage) # result=144
73 |
max=np.max(npImage) # result=216
74 |
75 |
# Make a LUT (Look-Up Table) to translate image values
76 |
77 |
78 |
s_clipped = LUT[npImage]
79 |
return s_clipped
80 |
81 |
# Read the labels from the text file as a Python list.
82 |
def load_labels(path):
83 |
with open(path, 'r') as f:
84 |
return [line.strip() for i, line in enumerate(f.readlines())]
85 |
86 |
# Read class labels and create a vector.
87 |
labels = load_labels("lego_tflite_model/labelmap.txt")
88 |
89 |
# construct the argument parser and parse the arguments
90 |
ap = argparse.ArgumentParser()
91 |
ap.add_argument("-c", "--conf", required=True, help="path to the JSON configuration file")
92 |
args = vars(ap.parse_args())
93 |
94 |
# filter warnings, load the configuration
95 |
96 |
conf = json.load(open(args["conf"]))
97 |
client = None
98 |
99 |
# initialize the camera and grab a reference to the raw camera capture
100 |
camera = PiCamera()
101 |
camera.resolution = tuple(conf["resolution"])
102 |
camera.framerate = conf["fps"]
103 |
rawCapture = PiRGBArray(camera, size=tuple(conf["resolution"]))
104 |
105 |
# allow the camera to warmup, then initialize the average frame, last
106 |
# uploaded timestamp, and frame motion counter
107 |
print("[INFO] warming up...")
108 |
109 |
avg = None
110 |
motionCounter = 0
111 |
image_number = 0
112 |
113 |
# capture frames from the camera
114 |
for f in camera.capture_continuous(rawCapture, format="bgr", use_video_port=True):
115 |
# grab the raw NumPy array representing the image and initialize
116 |
# the timestamp and occupied/unoccupied text
117 |
frame = f.array
118 |
text = "No piece"
119 |
120 |
# resize the frame, convert it to grayscale, and blur it
121 |
frame = imutils.resize(frame, width=500)
122 |
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
123 |
gray = cv2.GaussianBlur(gray, (21, 21), 0)
124 |
125 |
# if the average frame is None, initialize it
126 |
if avg is None:
127 |
print("[INFO] starting background model...")
128 |
avg = gray.copy().astype("float")
129 |
130 |
131 |
132 |
133 |
# accumulate the weighted average between the current frame and
134 |
# previous frames, then compute the difference between the current
135 |
# frame and running average
136 |
cv2.accumulateWeighted(gray, avg, 0.5)
137 |
frameDelta = cv2.absdiff(gray, cv2.convertScaleAbs(avg))
138 |
139 |
# threshold the delta image, dilate the thresholded image to fill
140 |
# in holes, then find contours on thresholded image
141 |
thresh = cv2.threshold(frameDelta, conf["delta_thresh"], 255,
142 |
143 |
thresh = cv2.dilate(thresh, None, iterations=2)
144 |
cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
145 |
146 |
cnts = imutils.grab_contours(cnts)
147 |
148 |
# loop over the contours
149 |
150 |
for c in cnts:
151 |
# if the contour is too small, ignore it
152 |
if cv2.contourArea(c) < conf["min_area"]:
153 |
154 |
155 |
# compute the bounding box for the contour, draw it on the frame,
156 |
# and update the text
157 |
(x, y, w, h) = cv2.boundingRect(c)
158 |
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
159 |
piece_image = frame[y:y+h,x:x+w]
160 |
text = "Piece found"
161 |
# cv2.imshow("Image", image)
162 |
163 |
164 |
if text == "Piece found":
165 |
# to save images of bounding boxes
166 |
167 |
168 |
motionCounter += 1
169 |
print("motionCounter= ", motionCounter)
170 |
print("image_number= ", image_number)
171 |
172 |
# # Save image if motion is detected for 8 or more successive frames
173 |
if motionCounter >= 8:
174 |
image_number +=1
175 |
image_name = str(image_number)+"image.jpg"
176 |
cv2.imwrite(os.path.join(path, image_name), piece_image)
177 |
motionCounter = 0 #reset the motion counter
178 |
179 |
# Open the image, resize it and increase its contrast
180 |
input_image ='lego-pieces/'+ image_name)
181 |
input_image = ImageOps.grayscale(input_image)
182 |
input_image = input_image.resize((128,128))
183 |
input_data = img_to_array(input_image)
184 |
input_data = increase_contrast_more(input_data)
185 |
186 |
187 |
# Pass the np.array of the image through the tflite model. This will output a probablity vector
188 |
interpreter.set_tensor(input_details[0]['index'], input_data)
189 |
190 |
output_data = interpreter.get_tensor(output_details[0]['index'])
191 |
192 |
# Get the index of the highest value in the probability vector.
193 |
# This index value will correspond to the labels vector created above (i.e index value 1 will mean the object is most likely labels[1])
194 |
category_number = np.argmax(output_data[0])
195 |
196 |
197 |
# Return the classification label of the image
198 |
classification_label = labels[category_number]
199 |
print("Image Label for " + image_name + " is :", classification_label)
200 |
201 |
202 |
203 |
204 |
motionCounter = 0
205 |
206 |
207 |
208 |
# check to see if the frames should be displayed to screen
209 |
if conf["show_video"]:
210 |
# display the feed
211 |
cv2.imshow("Feed", frame)
212 |
key = cv2.waitKey(1) & 0xFF
213 |
# if the `q` key is pressed, break from the lop
214 |
if key == ord("q"):
215 |
216 |
# clear the stream in preparation for the next frame
217 |
@@ -0,0 +1,5 @@
1 |
2 |
picamera== 1.13
3 |
tflite-runtime == 2.9.1
4 |
Pillow >= 9.0.1
5 |
numpy == 1.23.2