@@ -99,6 +99,9 @@ public class YourService(IDashScopeClient client)
9999 - [ 长上下文(Qwen-Long)] ( #长上下文(Qwen-Long) )
100100
101101- [ 多模态] ( #多模态 ) - QWen-VL,QVQ 等,支持推理/视觉理解/OCR/音频理解等场景
102+ - [ 视觉理解/推理] ( #视觉理解/推理 ) - 图像/视频输入与理解,支持推理模式
103+ - [ 文字提取] ( #文字提取 ) - OCR 任务,读取表格/文档/公式等
104+
102105- [ 语音合成] ( #语音合成 ) - CosyVoice,Sambert 等,支持 TTS 等应用场景
103106- [ 图像生成] ( #图像生成 ) - wanx2.1 等,支持文生图,人像风格重绘等应用场景
104107- [ 应用调用] ( #应用调用 )
@@ -2445,6 +2448,105 @@ messages.Add(
24452448 ]));
24462449` ` `
24472450
2451+ # ## 文字提取
2452+
2453+ 使用 ` qwen-vl-ocr` 系列模型可以很好的完成文字提取任务,基础用法(使用本地文件):
2454+
2455+ ` ` ` csharp
2456+ // upload file
2457+ await using var tilted = File.OpenRead(" tilted.png" );
2458+ var ossLink = await client.UploadTemporaryFileAsync(" qwen-vl-ocr-latest" , tilted, " tilted.jpg" );
2459+ Console.WriteLine($" File uploaded: {ossLink}" );
2460+ var messages = new List<MultimodalMessage> ();
2461+ messages.Add(
2462+ MultimodalMessage.User(
2463+ [
2464+ // 如果你的图片存在偏斜,可尝试将 enableRotate 设置为 true
2465+ MultimodalMessageContent.ImageContent(ossLink, enableRotate: true),
2466+ ]));
2467+ var completion = client.GetMultimodalGenerationStreamAsync(
2468+ new ModelRequest< MultimodalInput, IMultimodalParameters> ()
2469+ {
2470+ Model = " qwen-vl-ocr-latest" ,
2471+ Input = new MultimodalInput { Messages = messages },
2472+ Parameters = new MultimodalParameters
2473+ {
2474+ IncrementalOutput = true,
2475+ }
2476+ });
2477+ ` ` `
2478+
2479+ 完整示例:
2480+
2481+ ` ` ` csharp
2482+ // upload file
2483+ await using var tilted = File.OpenRead(" tilted.png" );
2484+ var ossLink = await client.UploadTemporaryFileAsync(" qwen-vl-ocr-latest" , tilted, " tilted.jpg" );
2485+ Console.WriteLine($" File uploaded: {ossLink}" );
2486+ var messages = new List<MultimodalMessage> ();
2487+ messages.Add(
2488+ MultimodalMessage.User(
2489+ [
2490+ MultimodalMessageContent.ImageContent(ossLink, enableRotate: true),
2491+ ]));
2492+ var completion = client.GetMultimodalGenerationStreamAsync(
2493+ new ModelRequest< MultimodalInput, IMultimodalParameters> ()
2494+ {
2495+ Model = " qwen-vl-ocr-latest" ,
2496+ Input = new MultimodalInput () { Messages = messages },
2497+ Parameters = new MultimodalParameters ()
2498+ {
2499+ IncrementalOutput = true,
2500+ }
2501+ });
2502+ var reply = new StringBuilder ();
2503+ var first = false ;
2504+ MultimodalTokenUsage? usage = null;
2505+ await foreach (var chunk in completion)
2506+ {
2507+ var choice = chunk.Output.Choices[0];
2508+ if (first)
2509+ {
2510+ first = false ;
2511+ Console.Write(" Assistant > " );
2512+ }
2513+
2514+ if (choice.Message.Content.Count == 0)
2515+ {
2516+ continue ;
2517+ }
2518+
2519+ Console.Write(choice.Message.Content[0].Text);
2520+ reply.Append(choice.Message.Content[0].Text);
2521+ usage = chunk.Usage;
2522+ }
2523+
2524+ Console.WriteLine ();
2525+ messages.Add(MultimodalMessage.Assistant([MultimodalMessageContent.TextContent(reply.ToString ())]));
2526+ if (usage ! = null)
2527+ {
2528+ Console.WriteLine(
2529+ $" Usage: in({usage.InputTokens})/out({usage.OutputTokens})/image({usage.ImageTokens})/total({usage.TotalTokens})" );
2530+ }
2531+
2532+ /*
2533+ File uploaded: oss://dashscope-instant/52afe077fb4825c6d74411758cb1ab98/2025-11-28/435ea45f-9942-4fd4-983a-9ea8a3cd5ecb/tilted.jpg
2534+ 产品介绍
2535+ 本品采用韩国进口纤维丝制造,不缩水、不变形、不发霉、
2536+ 不生菌、不伤物品表面。具有真正的不粘油、吸水力强、耐水
2537+ 浸、清洗干净、无毒、无残留、易晾干等特点。
2538+ 店家使用经验:不锈钢、陶瓷制品、浴盆、整体浴室大部分是
2539+ 白色的光洁表面,用其他的抹布擦洗表面污渍不易洗掉,太尖
2540+ 的容易划出划痕。使用这个仿真丝瓜布,沾少量中性洗涤剂揉
2541+ 出泡沫,很容易把这些表面污渍擦洗干净。
2542+ 6941990612023
2543+ 货号:2023
2544+ Usage: in(2434)/out(155)/image(2410)/total(2589)
2545+ * /
2546+ ` ` `
2547+
2548+
2549+
24482550# # 语音合成
24492551
24502552通过 ` dashScopeClient.CreateSpeechSynthesizerSocketSessionAsync()` 来创建一个语音合成会话。
0 commit comments