Skip to content

Commit a90c111

Browse files
committed
Use CoreML on Apple platforms
1 parent 5dfd94f commit a90c111

File tree

8 files changed

+353
-3
lines changed

8 files changed

+353
-3
lines changed

Package.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ let package = Package(
99
targets: [
1010
.target(name: "SwiftWhisper", dependencies: [.target(name: "whisper_cpp")]),
1111
.target(name: "whisper_cpp", cSettings: [
12-
.define("GGML_USE_ACCELERATE", .when(platforms: [.macOS, .macCatalyst, .iOS]))
12+
.define("GGML_USE_ACCELERATE", .when(platforms: [.macOS, .macCatalyst, .iOS])),
13+
.define("WHISPER_USE_COREML", .when(platforms: [.macOS, .macCatalyst, .iOS])),
14+
.define("WHISPER_COREML_ALLOW_FALLBACK", .when(platforms: [.macOS, .macCatalyst, .iOS]))
1315
]),
1416
.testTarget(name: "WhisperTests", dependencies: [.target(name: "SwiftWhisper")], resources: [.copy("TestResources/")])
1517
],

README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,12 @@ protocol WhisperDelegate {
7272

7373
### Downloading Models :inbox_tray:
7474

75-
You can find the pre-trained models at [here](https://huggingface.co/ggerganov/whisper.cpp) for download.
75+
You can find the pre-trained models [here](https://huggingface.co/ggerganov/whisper.cpp) for download.
76+
77+
### CoreML Support :brain:
78+
79+
If a CoreML model under the same name as the normal model can be found with the suffix altered to `-encoder.mlmodelc` (Example: `tiny.bin` would also sit beside a `tiny-encoder.mlmodelc` file), and you are using the `Whisper(fromFileURL:)` initializer, then the CoreML will be used for transcription! You can verify it's active by checking the console output.
80+
7681

7782
### Converting audio to 16kHz PCM :wrench:
7883

@@ -113,7 +118,7 @@ func convertAudioFileToPCMArray(fileURL: URL, completionHandler: @escaping (Resu
113118
}
114119
```
115120

116-
### Speed boost :rocket:
121+
### Development speed boost :rocket:
117122

118123
You may find the performance of the transcription slow when compiling your app for the `Debug` build configuration. This is because the compiler doesn't fully optimize SwiftWhisper unless the build configuration is set to `Release`.
119124

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../whisper.cpp/coreml/whisper-encoder-impl.h
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../whisper.cpp/coreml/whisper-encoder-impl.m
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../whisper.cpp/coreml/whisper-encoder.h
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../whisper.cpp/coreml/whisper-encoder.mm
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
[
2+
{
3+
"metadataOutputVersion" : "3.0",
4+
"storagePrecision" : "Float16",
5+
"outputSchema" : [
6+
{
7+
"hasShapeFlexibility" : "0",
8+
"isOptional" : "0",
9+
"dataType" : "Float32",
10+
"formattedType" : "MultiArray (Float32)",
11+
"shortDescription" : "",
12+
"shape" : "[]",
13+
"name" : "output",
14+
"type" : "MultiArray"
15+
}
16+
],
17+
"modelParameters" : [
18+
19+
],
20+
"specificationVersion" : 6,
21+
"mlProgramOperationTypeHistogram" : {
22+
"Linear" : 24,
23+
"Matmul" : 8,
24+
"Cast" : 2,
25+
"Conv" : 2,
26+
"Softmax" : 4,
27+
"Add" : 9,
28+
"LayerNorm" : 9,
29+
"Mul" : 8,
30+
"Transpose" : 17,
31+
"Gelu" : 6,
32+
"Reshape" : 16
33+
},
34+
"computePrecision" : "Mixed (Float16, Float32, Int32)",
35+
"isUpdatable" : "0",
36+
"availability" : {
37+
"macOS" : "12.0",
38+
"tvOS" : "15.0",
39+
"watchOS" : "8.0",
40+
"iOS" : "15.0",
41+
"macCatalyst" : "15.0"
42+
},
43+
"modelType" : {
44+
"name" : "MLModelType_mlProgram"
45+
},
46+
"userDefinedMetadata" : {
47+
48+
},
49+
"inputSchema" : [
50+
{
51+
"hasShapeFlexibility" : "0",
52+
"isOptional" : "0",
53+
"dataType" : "Float32",
54+
"formattedType" : "MultiArray (Float32 1 × 80 × 3000)",
55+
"shortDescription" : "",
56+
"shape" : "[1, 80, 3000]",
57+
"name" : "logmel_data",
58+
"type" : "MultiArray"
59+
}
60+
],
61+
"generatedClassName" : "coreml_encoder_tiny",
62+
"method" : "predict"
63+
}
64+
]

Tests/WhisperTests/TestResources/tiny-encoder.mlmodelc/model.mil

Lines changed: 275 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)