Add YAMNet sound classification to headmic
New sound_id.py module with SoundClassifier class that runs YAMNet (521 audio event categories) on CPU TFLite. Classifies audio every 0.5s from a ring buffer fed by the existing audio stream. Categories: speech, alert, music, animal, household, environment, silence. Smoothing via 20-sample history window for stable dominant category. New endpoints: GET /sounds, GET /sounds/history Updated: /health (sound_classification_enabled), /status (audio_scene) Graceful degradation if model files not present. Model download (not tracked in git): curl -sL 'https://tfhub.dev/google/lite-model/yamnet/classification/tflite/1?lite-format=tflite' -o models/yamnet.tflite Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -2,6 +2,9 @@
|
||||
*.ppn
|
||||
Hey-Vivi_*/
|
||||
|
||||
# ML models (downloaded separately)
|
||||
models/*.tflite
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
81
headmic.py
81
headmic.py
@@ -34,6 +34,7 @@ import wave
|
||||
from pathlib import Path
|
||||
from typing import Optional, List
|
||||
|
||||
import numpy as np
|
||||
import httpx
|
||||
import pvporcupine
|
||||
import webrtcvad
|
||||
@@ -117,9 +118,15 @@ class ServiceState:
|
||||
self.last_wake_time: Optional[float] = None
|
||||
self.wake_count = 0
|
||||
self.error: Optional[str] = None
|
||||
self.audio_scene: Optional[dict] = None
|
||||
self.sound_classification_enabled: bool = False
|
||||
|
||||
state = ServiceState()
|
||||
|
||||
# Sound classifier globals
|
||||
sound_classifier = None
|
||||
sound_ring_buffer = None # collections.deque, filled by listener_loop
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Audio Stream using ALSA directly (arecord)
|
||||
@@ -255,6 +262,10 @@ def listener_loop():
|
||||
# Convert bytes to int16 array for Porcupine
|
||||
pcm = struct.unpack_from("h" * 512, frame_data)
|
||||
|
||||
# Feed sound classifier ring buffer
|
||||
if sound_ring_buffer is not None:
|
||||
sound_ring_buffer.append(frame_data)
|
||||
|
||||
# Check for wake word
|
||||
keyword_index = porcupine.process(pcm)
|
||||
|
||||
@@ -327,6 +338,31 @@ def listener_loop():
|
||||
logger.info("Listener stopped")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Sound Classification Thread
|
||||
# ============================================================================
|
||||
|
||||
def sound_classifier_loop():
|
||||
"""Background thread for continuous sound classification."""
|
||||
global state
|
||||
logger.info("Sound classifier thread started")
|
||||
while state.running:
|
||||
if sound_ring_buffer is None or len(sound_ring_buffer) < 30:
|
||||
time.sleep(0.1)
|
||||
continue
|
||||
|
||||
try:
|
||||
frames = list(sound_ring_buffer)
|
||||
audio = np.frombuffer(b"".join(frames), dtype=np.int16)
|
||||
result = sound_classifier.classify(audio)
|
||||
state.audio_scene = result
|
||||
except Exception as e:
|
||||
logger.warning("Sound classification error: %s", e)
|
||||
|
||||
time.sleep(0.5)
|
||||
logger.info("Sound classifier thread stopped")
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# FastAPI
|
||||
# ============================================================================
|
||||
@@ -336,7 +372,30 @@ app = FastAPI(title="HeadMic", description="Vixy's Ears 🦊👂")
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup():
|
||||
global sound_classifier, sound_ring_buffer
|
||||
|
||||
state.running = True
|
||||
|
||||
# Init sound classifier (optional — graceful if model missing)
|
||||
model_dir = Path(__file__).parent / "models"
|
||||
model_path = model_dir / "yamnet.tflite"
|
||||
class_map_path = model_dir / "yamnet_class_map.csv"
|
||||
if model_path.exists() and class_map_path.exists():
|
||||
try:
|
||||
from sound_id import SoundClassifier
|
||||
sound_classifier = SoundClassifier(str(model_path), str(class_map_path))
|
||||
# 31 frames of 512 samples = ~0.99s at 16kHz
|
||||
sound_ring_buffer = collections.deque(maxlen=31)
|
||||
state.sound_classification_enabled = True
|
||||
logger.info("Sound classification enabled (YAMNet)")
|
||||
|
||||
sc_thread = threading.Thread(target=sound_classifier_loop, daemon=True)
|
||||
sc_thread.start()
|
||||
except Exception as e:
|
||||
logger.warning("Sound classification unavailable: %s", e)
|
||||
else:
|
||||
logger.info("Sound classification models not found, skipping")
|
||||
|
||||
thread = threading.Thread(target=listener_loop, daemon=True)
|
||||
thread.start()
|
||||
logger.info("HeadMic started")
|
||||
@@ -365,6 +424,7 @@ async def health():
|
||||
"recording": state.recording,
|
||||
"processing": state.processing,
|
||||
"wake_count": state.wake_count,
|
||||
"sound_classification_enabled": state.sound_classification_enabled,
|
||||
"error": state.error
|
||||
}
|
||||
|
||||
@@ -378,6 +438,7 @@ async def status():
|
||||
"last_transcription": state.last_transcription,
|
||||
"last_wake_time": state.last_wake_time,
|
||||
"wake_count": state.wake_count,
|
||||
"audio_scene": state.audio_scene["dominant_category"] if state.audio_scene else None,
|
||||
"error": state.error
|
||||
}
|
||||
|
||||
@@ -390,6 +451,26 @@ async def last():
|
||||
}
|
||||
|
||||
|
||||
@app.get("/sounds")
|
||||
async def sounds():
|
||||
"""Current audio scene classification."""
|
||||
if not state.sound_classification_enabled:
|
||||
raise HTTPException(status_code=503, detail="Sound classification not available")
|
||||
if state.audio_scene is None:
|
||||
return {"category": None, "top_classes": [], "dominant_category": None, "timestamp": None}
|
||||
return state.audio_scene
|
||||
|
||||
|
||||
@app.get("/sounds/history")
|
||||
async def sounds_history(seconds: int = 30):
|
||||
"""Recent sound classification history."""
|
||||
if not state.sound_classification_enabled:
|
||||
raise HTTPException(status_code=503, detail="Sound classification not available")
|
||||
if sound_classifier is None:
|
||||
return {"history": []}
|
||||
return {"history": sound_classifier.get_history(seconds)}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8446)
|
||||
|
||||
522
models/yamnet_class_map.csv
Normal file
522
models/yamnet_class_map.csv
Normal file
@@ -0,0 +1,522 @@
|
||||
index,mid,display_name
|
||||
0,/m/09x0r,Speech
|
||||
1,/m/0ytgt,"Child speech, kid speaking"
|
||||
2,/m/01h8n0,Conversation
|
||||
3,/m/02qldy,"Narration, monologue"
|
||||
4,/m/0261r1,Babbling
|
||||
5,/m/0brhx,Speech synthesizer
|
||||
6,/m/07p6fty,Shout
|
||||
7,/m/07q4ntr,Bellow
|
||||
8,/m/07rwj3x,Whoop
|
||||
9,/m/07sr1lc,Yell
|
||||
10,/t/dd00135,Children shouting
|
||||
11,/m/03qc9zr,Screaming
|
||||
12,/m/02rtxlg,Whispering
|
||||
13,/m/01j3sz,Laughter
|
||||
14,/t/dd00001,Baby laughter
|
||||
15,/m/07r660_,Giggle
|
||||
16,/m/07s04w4,Snicker
|
||||
17,/m/07sq110,Belly laugh
|
||||
18,/m/07rgt08,"Chuckle, chortle"
|
||||
19,/m/0463cq4,"Crying, sobbing"
|
||||
20,/t/dd00002,"Baby cry, infant cry"
|
||||
21,/m/07qz6j3,Whimper
|
||||
22,/m/07qw_06,"Wail, moan"
|
||||
23,/m/07plz5l,Sigh
|
||||
24,/m/015lz1,Singing
|
||||
25,/m/0l14jd,Choir
|
||||
26,/m/01swy6,Yodeling
|
||||
27,/m/02bk07,Chant
|
||||
28,/m/01c194,Mantra
|
||||
29,/t/dd00005,Child singing
|
||||
30,/t/dd00006,Synthetic singing
|
||||
31,/m/06bxc,Rapping
|
||||
32,/m/02fxyj,Humming
|
||||
33,/m/07s2xch,Groan
|
||||
34,/m/07r4k75,Grunt
|
||||
35,/m/01w250,Whistling
|
||||
36,/m/0lyf6,Breathing
|
||||
37,/m/07mzm6,Wheeze
|
||||
38,/m/01d3sd,Snoring
|
||||
39,/m/07s0dtb,Gasp
|
||||
40,/m/07pyy8b,Pant
|
||||
41,/m/07q0yl5,Snort
|
||||
42,/m/01b_21,Cough
|
||||
43,/m/0dl9sf8,Throat clearing
|
||||
44,/m/01hsr_,Sneeze
|
||||
45,/m/07ppn3j,Sniff
|
||||
46,/m/06h7j,Run
|
||||
47,/m/07qv_x_,Shuffle
|
||||
48,/m/07pbtc8,"Walk, footsteps"
|
||||
49,/m/03cczk,"Chewing, mastication"
|
||||
50,/m/07pdhp0,Biting
|
||||
51,/m/0939n_,Gargling
|
||||
52,/m/01g90h,Stomach rumble
|
||||
53,/m/03q5_w,"Burping, eructation"
|
||||
54,/m/02p3nc,Hiccup
|
||||
55,/m/02_nn,Fart
|
||||
56,/m/0k65p,Hands
|
||||
57,/m/025_jnm,Finger snapping
|
||||
58,/m/0l15bq,Clapping
|
||||
59,/m/01jg02,"Heart sounds, heartbeat"
|
||||
60,/m/01jg1z,Heart murmur
|
||||
61,/m/053hz1,Cheering
|
||||
62,/m/028ght,Applause
|
||||
63,/m/07rkbfh,Chatter
|
||||
64,/m/03qtwd,Crowd
|
||||
65,/m/07qfr4h,"Hubbub, speech noise, speech babble"
|
||||
66,/t/dd00013,Children playing
|
||||
67,/m/0jbk,Animal
|
||||
68,/m/068hy,"Domestic animals, pets"
|
||||
69,/m/0bt9lr,Dog
|
||||
70,/m/05tny_,Bark
|
||||
71,/m/07r_k2n,Yip
|
||||
72,/m/07qf0zm,Howl
|
||||
73,/m/07rc7d9,Bow-wow
|
||||
74,/m/0ghcn6,Growling
|
||||
75,/t/dd00136,Whimper (dog)
|
||||
76,/m/01yrx,Cat
|
||||
77,/m/02yds9,Purr
|
||||
78,/m/07qrkrw,Meow
|
||||
79,/m/07rjwbb,Hiss
|
||||
80,/m/07r81j2,Caterwaul
|
||||
81,/m/0ch8v,"Livestock, farm animals, working animals"
|
||||
82,/m/03k3r,Horse
|
||||
83,/m/07rv9rh,Clip-clop
|
||||
84,/m/07q5rw0,"Neigh, whinny"
|
||||
85,/m/01xq0k1,"Cattle, bovinae"
|
||||
86,/m/07rpkh9,Moo
|
||||
87,/m/0239kh,Cowbell
|
||||
88,/m/068zj,Pig
|
||||
89,/t/dd00018,Oink
|
||||
90,/m/03fwl,Goat
|
||||
91,/m/07q0h5t,Bleat
|
||||
92,/m/07bgp,Sheep
|
||||
93,/m/025rv6n,Fowl
|
||||
94,/m/09b5t,"Chicken, rooster"
|
||||
95,/m/07st89h,Cluck
|
||||
96,/m/07qn5dc,"Crowing, cock-a-doodle-doo"
|
||||
97,/m/01rd7k,Turkey
|
||||
98,/m/07svc2k,Gobble
|
||||
99,/m/09ddx,Duck
|
||||
100,/m/07qdb04,Quack
|
||||
101,/m/0dbvp,Goose
|
||||
102,/m/07qwf61,Honk
|
||||
103,/m/01280g,Wild animals
|
||||
104,/m/0cdnk,"Roaring cats (lions, tigers)"
|
||||
105,/m/04cvmfc,Roar
|
||||
106,/m/015p6,Bird
|
||||
107,/m/020bb7,"Bird vocalization, bird call, bird song"
|
||||
108,/m/07pggtn,"Chirp, tweet"
|
||||
109,/m/07sx8x_,Squawk
|
||||
110,/m/0h0rv,"Pigeon, dove"
|
||||
111,/m/07r_25d,Coo
|
||||
112,/m/04s8yn,Crow
|
||||
113,/m/07r5c2p,Caw
|
||||
114,/m/09d5_,Owl
|
||||
115,/m/07r_80w,Hoot
|
||||
116,/m/05_wcq,"Bird flight, flapping wings"
|
||||
117,/m/01z5f,"Canidae, dogs, wolves"
|
||||
118,/m/06hps,"Rodents, rats, mice"
|
||||
119,/m/04rmv,Mouse
|
||||
120,/m/07r4gkf,Patter
|
||||
121,/m/03vt0,Insect
|
||||
122,/m/09xqv,Cricket
|
||||
123,/m/09f96,Mosquito
|
||||
124,/m/0h2mp,"Fly, housefly"
|
||||
125,/m/07pjwq1,Buzz
|
||||
126,/m/01h3n,"Bee, wasp, etc."
|
||||
127,/m/09ld4,Frog
|
||||
128,/m/07st88b,Croak
|
||||
129,/m/078jl,Snake
|
||||
130,/m/07qn4z3,Rattle
|
||||
131,/m/032n05,Whale vocalization
|
||||
132,/m/04rlf,Music
|
||||
133,/m/04szw,Musical instrument
|
||||
134,/m/0fx80y,Plucked string instrument
|
||||
135,/m/0342h,Guitar
|
||||
136,/m/02sgy,Electric guitar
|
||||
137,/m/018vs,Bass guitar
|
||||
138,/m/042v_gx,Acoustic guitar
|
||||
139,/m/06w87,"Steel guitar, slide guitar"
|
||||
140,/m/01glhc,Tapping (guitar technique)
|
||||
141,/m/07s0s5r,Strum
|
||||
142,/m/018j2,Banjo
|
||||
143,/m/0jtg0,Sitar
|
||||
144,/m/04rzd,Mandolin
|
||||
145,/m/01bns_,Zither
|
||||
146,/m/07xzm,Ukulele
|
||||
147,/m/05148p4,Keyboard (musical)
|
||||
148,/m/05r5c,Piano
|
||||
149,/m/01s0ps,Electric piano
|
||||
150,/m/013y1f,Organ
|
||||
151,/m/03xq_f,Electronic organ
|
||||
152,/m/03gvt,Hammond organ
|
||||
153,/m/0l14qv,Synthesizer
|
||||
154,/m/01v1d8,Sampler
|
||||
155,/m/03q5t,Harpsichord
|
||||
156,/m/0l14md,Percussion
|
||||
157,/m/02hnl,Drum kit
|
||||
158,/m/0cfdd,Drum machine
|
||||
159,/m/026t6,Drum
|
||||
160,/m/06rvn,Snare drum
|
||||
161,/m/03t3fj,Rimshot
|
||||
162,/m/02k_mr,Drum roll
|
||||
163,/m/0bm02,Bass drum
|
||||
164,/m/011k_j,Timpani
|
||||
165,/m/01p970,Tabla
|
||||
166,/m/01qbl,Cymbal
|
||||
167,/m/03qtq,Hi-hat
|
||||
168,/m/01sm1g,Wood block
|
||||
169,/m/07brj,Tambourine
|
||||
170,/m/05r5wn,Rattle (instrument)
|
||||
171,/m/0xzly,Maraca
|
||||
172,/m/0mbct,Gong
|
||||
173,/m/016622,Tubular bells
|
||||
174,/m/0j45pbj,Mallet percussion
|
||||
175,/m/0dwsp,"Marimba, xylophone"
|
||||
176,/m/0dwtp,Glockenspiel
|
||||
177,/m/0dwt5,Vibraphone
|
||||
178,/m/0l156b,Steelpan
|
||||
179,/m/05pd6,Orchestra
|
||||
180,/m/01kcd,Brass instrument
|
||||
181,/m/0319l,French horn
|
||||
182,/m/07gql,Trumpet
|
||||
183,/m/07c6l,Trombone
|
||||
184,/m/0l14_3,Bowed string instrument
|
||||
185,/m/02qmj0d,String section
|
||||
186,/m/07y_7,"Violin, fiddle"
|
||||
187,/m/0d8_n,Pizzicato
|
||||
188,/m/01xqw,Cello
|
||||
189,/m/02fsn,Double bass
|
||||
190,/m/085jw,"Wind instrument, woodwind instrument"
|
||||
191,/m/0l14j_,Flute
|
||||
192,/m/06ncr,Saxophone
|
||||
193,/m/01wy6,Clarinet
|
||||
194,/m/03m5k,Harp
|
||||
195,/m/0395lw,Bell
|
||||
196,/m/03w41f,Church bell
|
||||
197,/m/027m70_,Jingle bell
|
||||
198,/m/0gy1t2s,Bicycle bell
|
||||
199,/m/07n_g,Tuning fork
|
||||
200,/m/0f8s22,Chime
|
||||
201,/m/026fgl,Wind chime
|
||||
202,/m/0150b9,Change ringing (campanology)
|
||||
203,/m/03qjg,Harmonica
|
||||
204,/m/0mkg,Accordion
|
||||
205,/m/0192l,Bagpipes
|
||||
206,/m/02bxd,Didgeridoo
|
||||
207,/m/0l14l2,Shofar
|
||||
208,/m/07kc_,Theremin
|
||||
209,/m/0l14t7,Singing bowl
|
||||
210,/m/01hgjl,Scratching (performance technique)
|
||||
211,/m/064t9,Pop music
|
||||
212,/m/0glt670,Hip hop music
|
||||
213,/m/02cz_7,Beatboxing
|
||||
214,/m/06by7,Rock music
|
||||
215,/m/03lty,Heavy metal
|
||||
216,/m/05r6t,Punk rock
|
||||
217,/m/0dls3,Grunge
|
||||
218,/m/0dl5d,Progressive rock
|
||||
219,/m/07sbbz2,Rock and roll
|
||||
220,/m/05w3f,Psychedelic rock
|
||||
221,/m/06j6l,Rhythm and blues
|
||||
222,/m/0gywn,Soul music
|
||||
223,/m/06cqb,Reggae
|
||||
224,/m/01lyv,Country
|
||||
225,/m/015y_n,Swing music
|
||||
226,/m/0gg8l,Bluegrass
|
||||
227,/m/02x8m,Funk
|
||||
228,/m/02w4v,Folk music
|
||||
229,/m/06j64v,Middle Eastern music
|
||||
230,/m/03_d0,Jazz
|
||||
231,/m/026z9,Disco
|
||||
232,/m/0ggq0m,Classical music
|
||||
233,/m/05lls,Opera
|
||||
234,/m/02lkt,Electronic music
|
||||
235,/m/03mb9,House music
|
||||
236,/m/07gxw,Techno
|
||||
237,/m/07s72n,Dubstep
|
||||
238,/m/0283d,Drum and bass
|
||||
239,/m/0m0jc,Electronica
|
||||
240,/m/08cyft,Electronic dance music
|
||||
241,/m/0fd3y,Ambient music
|
||||
242,/m/07lnk,Trance music
|
||||
243,/m/0g293,Music of Latin America
|
||||
244,/m/0ln16,Salsa music
|
||||
245,/m/0326g,Flamenco
|
||||
246,/m/0155w,Blues
|
||||
247,/m/05fw6t,Music for children
|
||||
248,/m/02v2lh,New-age music
|
||||
249,/m/0y4f8,Vocal music
|
||||
250,/m/0z9c,A capella
|
||||
251,/m/0164x2,Music of Africa
|
||||
252,/m/0145m,Afrobeat
|
||||
253,/m/02mscn,Christian music
|
||||
254,/m/016cjb,Gospel music
|
||||
255,/m/028sqc,Music of Asia
|
||||
256,/m/015vgc,Carnatic music
|
||||
257,/m/0dq0md,Music of Bollywood
|
||||
258,/m/06rqw,Ska
|
||||
259,/m/02p0sh1,Traditional music
|
||||
260,/m/05rwpb,Independent music
|
||||
261,/m/074ft,Song
|
||||
262,/m/025td0t,Background music
|
||||
263,/m/02cjck,Theme music
|
||||
264,/m/03r5q_,Jingle (music)
|
||||
265,/m/0l14gg,Soundtrack music
|
||||
266,/m/07pkxdp,Lullaby
|
||||
267,/m/01z7dr,Video game music
|
||||
268,/m/0140xf,Christmas music
|
||||
269,/m/0ggx5q,Dance music
|
||||
270,/m/04wptg,Wedding music
|
||||
271,/t/dd00031,Happy music
|
||||
272,/t/dd00033,Sad music
|
||||
273,/t/dd00034,Tender music
|
||||
274,/t/dd00035,Exciting music
|
||||
275,/t/dd00036,Angry music
|
||||
276,/t/dd00037,Scary music
|
||||
277,/m/03m9d0z,Wind
|
||||
278,/m/09t49,Rustling leaves
|
||||
279,/t/dd00092,Wind noise (microphone)
|
||||
280,/m/0jb2l,Thunderstorm
|
||||
281,/m/0ngt1,Thunder
|
||||
282,/m/0838f,Water
|
||||
283,/m/06mb1,Rain
|
||||
284,/m/07r10fb,Raindrop
|
||||
285,/t/dd00038,Rain on surface
|
||||
286,/m/0j6m2,Stream
|
||||
287,/m/0j2kx,Waterfall
|
||||
288,/m/05kq4,Ocean
|
||||
289,/m/034srq,"Waves, surf"
|
||||
290,/m/06wzb,Steam
|
||||
291,/m/07swgks,Gurgling
|
||||
292,/m/02_41,Fire
|
||||
293,/m/07pzfmf,Crackle
|
||||
294,/m/07yv9,Vehicle
|
||||
295,/m/019jd,"Boat, Water vehicle"
|
||||
296,/m/0hsrw,"Sailboat, sailing ship"
|
||||
297,/m/056ks2,"Rowboat, canoe, kayak"
|
||||
298,/m/02rlv9,"Motorboat, speedboat"
|
||||
299,/m/06q74,Ship
|
||||
300,/m/012f08,Motor vehicle (road)
|
||||
301,/m/0k4j,Car
|
||||
302,/m/0912c9,"Vehicle horn, car horn, honking"
|
||||
303,/m/07qv_d5,Toot
|
||||
304,/m/02mfyn,Car alarm
|
||||
305,/m/04gxbd,"Power windows, electric windows"
|
||||
306,/m/07rknqz,Skidding
|
||||
307,/m/0h9mv,Tire squeal
|
||||
308,/t/dd00134,Car passing by
|
||||
309,/m/0ltv,"Race car, auto racing"
|
||||
310,/m/07r04,Truck
|
||||
311,/m/0gvgw0,Air brake
|
||||
312,/m/05x_td,"Air horn, truck horn"
|
||||
313,/m/02rhddq,Reversing beeps
|
||||
314,/m/03cl9h,"Ice cream truck, ice cream van"
|
||||
315,/m/01bjv,Bus
|
||||
316,/m/03j1ly,Emergency vehicle
|
||||
317,/m/04qvtq,Police car (siren)
|
||||
318,/m/012n7d,Ambulance (siren)
|
||||
319,/m/012ndj,"Fire engine, fire truck (siren)"
|
||||
320,/m/04_sv,Motorcycle
|
||||
321,/m/0btp2,"Traffic noise, roadway noise"
|
||||
322,/m/06d_3,Rail transport
|
||||
323,/m/07jdr,Train
|
||||
324,/m/04zmvq,Train whistle
|
||||
325,/m/0284vy3,Train horn
|
||||
326,/m/01g50p,"Railroad car, train wagon"
|
||||
327,/t/dd00048,Train wheels squealing
|
||||
328,/m/0195fx,"Subway, metro, underground"
|
||||
329,/m/0k5j,Aircraft
|
||||
330,/m/014yck,Aircraft engine
|
||||
331,/m/04229,Jet engine
|
||||
332,/m/02l6bg,"Propeller, airscrew"
|
||||
333,/m/09ct_,Helicopter
|
||||
334,/m/0cmf2,"Fixed-wing aircraft, airplane"
|
||||
335,/m/0199g,Bicycle
|
||||
336,/m/06_fw,Skateboard
|
||||
337,/m/02mk9,Engine
|
||||
338,/t/dd00065,Light engine (high frequency)
|
||||
339,/m/08j51y,"Dental drill, dentist's drill"
|
||||
340,/m/01yg9g,Lawn mower
|
||||
341,/m/01j4z9,Chainsaw
|
||||
342,/t/dd00066,Medium engine (mid frequency)
|
||||
343,/t/dd00067,Heavy engine (low frequency)
|
||||
344,/m/01h82_,Engine knocking
|
||||
345,/t/dd00130,Engine starting
|
||||
346,/m/07pb8fc,Idling
|
||||
347,/m/07q2z82,"Accelerating, revving, vroom"
|
||||
348,/m/02dgv,Door
|
||||
349,/m/03wwcy,Doorbell
|
||||
350,/m/07r67yg,Ding-dong
|
||||
351,/m/02y_763,Sliding door
|
||||
352,/m/07rjzl8,Slam
|
||||
353,/m/07r4wb8,Knock
|
||||
354,/m/07qcpgn,Tap
|
||||
355,/m/07q6cd_,Squeak
|
||||
356,/m/0642b4,Cupboard open or close
|
||||
357,/m/0fqfqc,Drawer open or close
|
||||
358,/m/04brg2,"Dishes, pots, and pans"
|
||||
359,/m/023pjk,"Cutlery, silverware"
|
||||
360,/m/07pn_8q,Chopping (food)
|
||||
361,/m/0dxrf,Frying (food)
|
||||
362,/m/0fx9l,Microwave oven
|
||||
363,/m/02pjr4,Blender
|
||||
364,/m/02jz0l,"Water tap, faucet"
|
||||
365,/m/0130jx,Sink (filling or washing)
|
||||
366,/m/03dnzn,Bathtub (filling or washing)
|
||||
367,/m/03wvsk,Hair dryer
|
||||
368,/m/01jt3m,Toilet flush
|
||||
369,/m/012xff,Toothbrush
|
||||
370,/m/04fgwm,Electric toothbrush
|
||||
371,/m/0d31p,Vacuum cleaner
|
||||
372,/m/01s0vc,Zipper (clothing)
|
||||
373,/m/03v3yw,Keys jangling
|
||||
374,/m/0242l,Coin (dropping)
|
||||
375,/m/01lsmm,Scissors
|
||||
376,/m/02g901,"Electric shaver, electric razor"
|
||||
377,/m/05rj2,Shuffling cards
|
||||
378,/m/0316dw,Typing
|
||||
379,/m/0c2wf,Typewriter
|
||||
380,/m/01m2v,Computer keyboard
|
||||
381,/m/081rb,Writing
|
||||
382,/m/07pp_mv,Alarm
|
||||
383,/m/07cx4,Telephone
|
||||
384,/m/07pp8cl,Telephone bell ringing
|
||||
385,/m/01hnzm,Ringtone
|
||||
386,/m/02c8p,"Telephone dialing, DTMF"
|
||||
387,/m/015jpf,Dial tone
|
||||
388,/m/01z47d,Busy signal
|
||||
389,/m/046dlr,Alarm clock
|
||||
390,/m/03kmc9,Siren
|
||||
391,/m/0dgbq,Civil defense siren
|
||||
392,/m/030rvx,Buzzer
|
||||
393,/m/01y3hg,"Smoke detector, smoke alarm"
|
||||
394,/m/0c3f7m,Fire alarm
|
||||
395,/m/04fq5q,Foghorn
|
||||
396,/m/0l156k,Whistle
|
||||
397,/m/06hck5,Steam whistle
|
||||
398,/t/dd00077,Mechanisms
|
||||
399,/m/02bm9n,"Ratchet, pawl"
|
||||
400,/m/01x3z,Clock
|
||||
401,/m/07qjznt,Tick
|
||||
402,/m/07qjznl,Tick-tock
|
||||
403,/m/0l7xg,Gears
|
||||
404,/m/05zc1,Pulleys
|
||||
405,/m/0llzx,Sewing machine
|
||||
406,/m/02x984l,Mechanical fan
|
||||
407,/m/025wky1,Air conditioning
|
||||
408,/m/024dl,Cash register
|
||||
409,/m/01m4t,Printer
|
||||
410,/m/0dv5r,Camera
|
||||
411,/m/07bjf,Single-lens reflex camera
|
||||
412,/m/07k1x,Tools
|
||||
413,/m/03l9g,Hammer
|
||||
414,/m/03p19w,Jackhammer
|
||||
415,/m/01b82r,Sawing
|
||||
416,/m/02p01q,Filing (rasp)
|
||||
417,/m/023vsd,Sanding
|
||||
418,/m/0_ksk,Power tool
|
||||
419,/m/01d380,Drill
|
||||
420,/m/014zdl,Explosion
|
||||
421,/m/032s66,"Gunshot, gunfire"
|
||||
422,/m/04zjc,Machine gun
|
||||
423,/m/02z32qm,Fusillade
|
||||
424,/m/0_1c,Artillery fire
|
||||
425,/m/073cg4,Cap gun
|
||||
426,/m/0g6b5,Fireworks
|
||||
427,/g/122z_qxw,Firecracker
|
||||
428,/m/07qsvvw,"Burst, pop"
|
||||
429,/m/07pxg6y,Eruption
|
||||
430,/m/07qqyl4,Boom
|
||||
431,/m/083vt,Wood
|
||||
432,/m/07pczhz,Chop
|
||||
433,/m/07pl1bw,Splinter
|
||||
434,/m/07qs1cx,Crack
|
||||
435,/m/039jq,Glass
|
||||
436,/m/07q7njn,"Chink, clink"
|
||||
437,/m/07rn7sz,Shatter
|
||||
438,/m/04k94,Liquid
|
||||
439,/m/07rrlb6,"Splash, splatter"
|
||||
440,/m/07p6mqd,Slosh
|
||||
441,/m/07qlwh6,Squish
|
||||
442,/m/07r5v4s,Drip
|
||||
443,/m/07prgkl,Pour
|
||||
444,/m/07pqc89,"Trickle, dribble"
|
||||
445,/t/dd00088,Gush
|
||||
446,/m/07p7b8y,Fill (with liquid)
|
||||
447,/m/07qlf79,Spray
|
||||
448,/m/07ptzwd,Pump (liquid)
|
||||
449,/m/07ptfmf,Stir
|
||||
450,/m/0dv3j,Boiling
|
||||
451,/m/0790c,Sonar
|
||||
452,/m/0dl83,Arrow
|
||||
453,/m/07rqsjt,"Whoosh, swoosh, swish"
|
||||
454,/m/07qnq_y,"Thump, thud"
|
||||
455,/m/07rrh0c,Thunk
|
||||
456,/m/0b_fwt,Electronic tuner
|
||||
457,/m/02rr_,Effects unit
|
||||
458,/m/07m2kt,Chorus effect
|
||||
459,/m/018w8,Basketball bounce
|
||||
460,/m/07pws3f,Bang
|
||||
461,/m/07ryjzk,"Slap, smack"
|
||||
462,/m/07rdhzs,"Whack, thwack"
|
||||
463,/m/07pjjrj,"Smash, crash"
|
||||
464,/m/07pc8lb,Breaking
|
||||
465,/m/07pqn27,Bouncing
|
||||
466,/m/07rbp7_,Whip
|
||||
467,/m/07pyf11,Flap
|
||||
468,/m/07qb_dv,Scratch
|
||||
469,/m/07qv4k0,Scrape
|
||||
470,/m/07pdjhy,Rub
|
||||
471,/m/07s8j8t,Roll
|
||||
472,/m/07plct2,Crushing
|
||||
473,/t/dd00112,"Crumpling, crinkling"
|
||||
474,/m/07qcx4z,Tearing
|
||||
475,/m/02fs_r,"Beep, bleep"
|
||||
476,/m/07qwdck,Ping
|
||||
477,/m/07phxs1,Ding
|
||||
478,/m/07rv4dm,Clang
|
||||
479,/m/07s02z0,Squeal
|
||||
480,/m/07qh7jl,Creak
|
||||
481,/m/07qwyj0,Rustle
|
||||
482,/m/07s34ls,Whir
|
||||
483,/m/07qmpdm,Clatter
|
||||
484,/m/07p9k1k,Sizzle
|
||||
485,/m/07qc9xj,Clicking
|
||||
486,/m/07rwm0c,Clickety-clack
|
||||
487,/m/07phhsh,Rumble
|
||||
488,/m/07qyrcz,Plop
|
||||
489,/m/07qfgpx,"Jingle, tinkle"
|
||||
490,/m/07rcgpl,Hum
|
||||
491,/m/07p78v5,Zing
|
||||
492,/t/dd00121,Boing
|
||||
493,/m/07s12q4,Crunch
|
||||
494,/m/028v0c,Silence
|
||||
495,/m/01v_m0,Sine wave
|
||||
496,/m/0b9m1,Harmonic
|
||||
497,/m/0hdsk,Chirp tone
|
||||
498,/m/0c1dj,Sound effect
|
||||
499,/m/07pt_g0,Pulse
|
||||
500,/t/dd00125,"Inside, small room"
|
||||
501,/t/dd00126,"Inside, large room or hall"
|
||||
502,/t/dd00127,"Inside, public space"
|
||||
503,/t/dd00128,"Outside, urban or manmade"
|
||||
504,/t/dd00129,"Outside, rural or natural"
|
||||
505,/m/01b9nn,Reverberation
|
||||
506,/m/01jnbd,Echo
|
||||
507,/m/096m7z,Noise
|
||||
508,/m/06_y0by,Environmental noise
|
||||
509,/m/07rgkc5,Static
|
||||
510,/m/06xkwv,Mains hum
|
||||
511,/m/0g12c5,Distortion
|
||||
512,/m/08p9q4,Sidetone
|
||||
513,/m/07szfh9,Cacophony
|
||||
514,/m/0chx_,White noise
|
||||
515,/m/0cj0r,Pink noise
|
||||
516,/m/07p_0gm,Throbbing
|
||||
517,/m/01jwx6,Vibration
|
||||
518,/m/07c52,Television
|
||||
519,/m/06bz3,Radio
|
||||
520,/m/07hvw1,Field recording
|
||||
|
182
sound_id.py
Normal file
182
sound_id.py
Normal file
@@ -0,0 +1,182 @@
|
||||
"""
|
||||
Sound Identification Module for HeadMic
|
||||
YAMNet audio event classifier — CPU TFLite (Edge TPU ready)
|
||||
"""
|
||||
|
||||
import collections
|
||||
import csv
|
||||
import logging
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
logger = logging.getLogger("sound_id")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# YAMNet expects 0.975s of 16kHz audio = 15600 samples
|
||||
YAMNET_SAMPLE_RATE = 16000
|
||||
YAMNET_INPUT_SAMPLES = 15600
|
||||
|
||||
# Category mapping: group YAMNet's 521 classes into useful buckets
|
||||
CATEGORY_GROUPS = {
|
||||
"speech": [
|
||||
"Speech", "Child speech, kid speaking", "Conversation",
|
||||
"Narration, monologue", "Babbling", "Speech synthesizer",
|
||||
"Shout", "Yell", "Whispering", "Laughter", "Chatter",
|
||||
],
|
||||
"alert": [
|
||||
"Doorbell", "Ding-dong", "Knock", "Tap", "Alarm",
|
||||
"Alarm clock", "Smoke detector, smoke alarm", "Fire alarm",
|
||||
"Siren", "Buzzer", "Telephone", "Telephone bell ringing",
|
||||
"Ringtone", "Telephone dialing, DTMF", "Bell", "Church bell",
|
||||
],
|
||||
"music": [
|
||||
"Music", "Musical instrument", "Singing", "Song", "Guitar",
|
||||
"Electric guitar", "Acoustic guitar", "Bass guitar", "Piano",
|
||||
"Keyboard (musical)", "Drum", "Drum kit", "Snare drum",
|
||||
"Bass drum", "Cymbal", "Hi-hat", "Violin, fiddle", "Flute",
|
||||
"Trumpet", "Saxophone", "Harmonica", "Organ", "Synthesizer",
|
||||
"Plucked string instrument", "Strum", "Hip hop music",
|
||||
"Pop music", "Rock music", "Heavy metal", "Punk rock",
|
||||
"Grunge", "Progressive rock", "Rock and roll", "Jazz",
|
||||
"Blues", "Soul music", "Reggae", "Country", "Swing music",
|
||||
"Bluegrass", "Funk", "Folk music", "Middle Eastern music",
|
||||
"Electronic music", "House music", "Techno", "Dubstep",
|
||||
"Drum and bass", "Electronica", "Electronic dance music",
|
||||
"Ambient music", "Trance music", "Music of Latin America",
|
||||
"Salsa music", "Flamenco", "Gospel music", "Christian music",
|
||||
"Music of Africa", "Afrobeat", "Music of Asia",
|
||||
],
|
||||
"animal": [
|
||||
"Dog", "Bark", "Howl", "Bow-wow", "Growling", "Whimper",
|
||||
"Cat", "Purr", "Meow", "Hiss", "Caterwaul",
|
||||
"Bird", "Bird vocalization, bird call, bird song", "Chirp, tweet",
|
||||
"Squawk", "Crow", "Caw", "Owl", "Pigeon, dove",
|
||||
"Insect", "Cricket", "Mosquito", "Fly, housefly", "Bee, wasp, etc.",
|
||||
"Frog", "Rooster", "Chicken, hen", "Duck", "Goose",
|
||||
],
|
||||
"household": [
|
||||
"Door", "Doorbell", "Sliding door", "Slam",
|
||||
"Drawer open or close", "Cupboard open or close",
|
||||
"Cutlery, silverware", "Dishes, pots, and pans",
|
||||
"Glass", "Chink, clink", "Water tap, faucet",
|
||||
"Sink (filling or washing)", "Bathtub (filling or washing)",
|
||||
"Toilet flush", "Vacuum cleaner", "Blender",
|
||||
"Microwave oven", "Typing", "Computer keyboard",
|
||||
"Writing", "Keys jangling", "Coin (dropping)",
|
||||
"Footsteps", "Walk, footsteps", "Run",
|
||||
"Clapping", "Finger snapping",
|
||||
],
|
||||
"environment": [
|
||||
"Wind", "Rustling leaves", "Wind noise (microphone)",
|
||||
"Rain", "Raindrop", "Rain on surface", "Thunder",
|
||||
"Thunderstorm", "Stream", "Waterfall", "Ocean",
|
||||
"Waves, surf", "Traffic noise, roadway noise",
|
||||
"Car", "Engine", "Idling", "Truck", "Bus", "Motorcycle",
|
||||
"Aircraft", "Aircraft engine", "Helicopter",
|
||||
"Train", "Railroad car, train wagon",
|
||||
],
|
||||
"silence": [
|
||||
"Silence", "White noise", "Static",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
class SoundClassifier:
|
||||
def __init__(self, model_path, class_map_path, use_edgetpu=False):
|
||||
# Load class names
|
||||
self._class_names = []
|
||||
with open(class_map_path) as f:
|
||||
reader = csv.reader(f)
|
||||
next(reader) # skip header
|
||||
for row in reader:
|
||||
self._class_names.append(row[2])
|
||||
logger.info("Loaded %d YAMNet class names", len(self._class_names))
|
||||
|
||||
# Build reverse lookup: class_name -> category
|
||||
self._class_to_category = {}
|
||||
for category, names in CATEGORY_GROUPS.items():
|
||||
for name in names:
|
||||
self._class_to_category[name] = category
|
||||
|
||||
# Load TFLite model
|
||||
import ai_edge_litert.interpreter as tfl
|
||||
|
||||
if use_edgetpu:
|
||||
delegate = tfl.load_delegate("libedgetpu.so.1")
|
||||
self._interp = tfl.Interpreter(
|
||||
model_path=str(model_path),
|
||||
experimental_delegates=[delegate],
|
||||
)
|
||||
logger.info("YAMNet loaded on Edge TPU")
|
||||
else:
|
||||
self._interp = tfl.Interpreter(model_path=str(model_path))
|
||||
logger.info("YAMNet loaded on CPU")
|
||||
|
||||
self._interp.allocate_tensors()
|
||||
self._input = self._interp.get_input_details()[0]
|
||||
self._output = self._interp.get_output_details()[0]
|
||||
logger.info(
|
||||
"YAMNet ready: input %s %s, output %s",
|
||||
self._input["shape"], self._input["dtype"], self._output["shape"],
|
||||
)
|
||||
|
||||
# Classification history for smoothing
|
||||
self._history = collections.deque(maxlen=20)
|
||||
|
||||
def classify(self, audio_int16):
|
||||
"""Classify an audio buffer.
|
||||
|
||||
Args:
|
||||
audio_int16: numpy int16 array of PCM samples at 16kHz
|
||||
|
||||
Returns:
|
||||
dict with category, top_classes, dominant_category, timestamp
|
||||
"""
|
||||
# Convert int16 PCM to float32 [-1.0, 1.0]
|
||||
audio_f32 = audio_int16.astype(np.float32) / 32768.0
|
||||
|
||||
# Pad or trim to exact input size
|
||||
if len(audio_f32) < YAMNET_INPUT_SAMPLES:
|
||||
audio_f32 = np.pad(audio_f32, (0, YAMNET_INPUT_SAMPLES - len(audio_f32)))
|
||||
elif len(audio_f32) > YAMNET_INPUT_SAMPLES:
|
||||
audio_f32 = audio_f32[-YAMNET_INPUT_SAMPLES:]
|
||||
|
||||
self._interp.set_tensor(self._input["index"], audio_f32)
|
||||
self._interp.invoke()
|
||||
|
||||
scores = self._interp.get_tensor(self._output["index"])[0]
|
||||
|
||||
# Top 5 classes
|
||||
top_indices = np.argsort(scores)[-5:][::-1]
|
||||
top_classes = []
|
||||
for idx in top_indices:
|
||||
name = self._class_names[idx] if idx < len(self._class_names) else "Unknown"
|
||||
top_classes.append({"name": name, "score": round(float(scores[idx]), 3)})
|
||||
|
||||
# Map top class to category
|
||||
top_name = top_classes[0]["name"]
|
||||
category = self._class_to_category.get(top_name, "other")
|
||||
|
||||
# Update history
|
||||
now = time.time()
|
||||
self._history.append({"category": category, "timestamp": now})
|
||||
|
||||
# Dominant category from recent history
|
||||
if self._history:
|
||||
cat_counts = collections.Counter(h["category"] for h in self._history)
|
||||
dominant = cat_counts.most_common(1)[0][0]
|
||||
else:
|
||||
dominant = category
|
||||
|
||||
return {
|
||||
"category": category,
|
||||
"top_classes": top_classes,
|
||||
"dominant_category": dominant,
|
||||
"timestamp": now,
|
||||
}
|
||||
|
||||
def get_history(self, seconds=30):
|
||||
"""Return recent classification history."""
|
||||
cutoff = time.time() - seconds
|
||||
return [h for h in self._history if h["timestamp"] >= cutoff]
|
||||
Reference in New Issue
Block a user