summaryrefslogtreecommitdiffstats
path: root/dom/media/webspeech/synth/nsISpeechService.idl
blob: 710686f1ed8080117624a7677a1df2f9cb5629f1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/* -*- Mode: IDL; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsISupports.idl"

typedef unsigned short SpeechServiceType;

/**
 * A callback is implemented by the service. For direct audio services, it is
 * required to implement these, although it could be helpful to use the
 * cancel method for shutting down the speech resources.
 */
[scriptable, uuid(c576de0c-8a3d-4570-be7e-9876d3e5bed2)]
interface nsISpeechTaskCallback : nsISupports
{
  /**
   * The user or application has paused the speech.
   */
  void onPause();

  /**
   * The user or application has resumed the speech.
   */
  void onResume();

  /**
   * The user or application has canceled the speech.
   */
  void onCancel();

  /**
   * The user or application has changed the volume of this speech.
   * This is only used on indirect audio service type.
   */
  void onVolumeChanged(in float aVolume);
};


/**
 * A task is associated with a single utterance. It is provided by the browser
 * to the service in the speak() method.
 */
[scriptable, builtinclass, uuid(ad59949c-2437-4b35-8eeb-d760caab75c5)]
interface nsISpeechTask : nsISupports
{
  /**
   * Prepare browser for speech.
   *
   * @param aCallback callback object for mid-speech operations.
   * @param aChannels number of audio channels. Only required
   *                    in direct audio services
   * @param aRate     audio rate. Only required in direct audio services
   */
  [optional_argc] void setup(in nsISpeechTaskCallback aCallback,
                               [optional] in uint32_t aChannels,
                               [optional] in uint32_t aRate);

  /**
   * Send audio data to browser.
   *
   * @param aData     an Int16Array with PCM-16 audio data.
   * @param aLandmarks an array of sample offset and landmark pairs.
   *                     Used for emiting boundary and mark events.
   */
  [implicit_jscontext]
  void sendAudio(in jsval aData, in jsval aLandmarks);

  [noscript]
  void sendAudioNative([array, size_is(aDataLen)] in short aData, in unsigned long aDataLen);

  /**
   * Dispatch start event.
   */
  void dispatchStart();

  /**
   * Dispatch end event.
   *
   * @param aElapsedTime time in seconds since speech has started.
   * @param aCharIndex   offset of spoken characters.
   */
  void dispatchEnd(in float aElapsedTime, in unsigned long aCharIndex);

  /**
   * Dispatch pause event.
   *
   * @param aElapsedTime time in seconds since speech has started.
   * @param aCharIndex   offset of spoken characters.
   */
  void dispatchPause(in float aElapsedTime, in unsigned long aCharIndex);

  /**
   * Dispatch resume event.
   *
   * @param aElapsedTime time in seconds since speech has started.
   * @param aCharIndex   offset of spoken characters.
   */
  void dispatchResume(in float aElapsedTime, in unsigned long aCharIndex);

  /**
   * Dispatch error event.
   *
   * @param aElapsedTime time in seconds since speech has started.
   * @param aCharIndex   offset of spoken characters.
   */
  void dispatchError(in float aElapsedTime, in unsigned long aCharIndex);

  /**
   * Dispatch boundary event.
   *
   * @param aName        name of boundary, 'word' or 'sentence'
   * @param aElapsedTime time in seconds since speech has started.
   * @param aCharIndex   offset of spoken characters.
   */
  void dispatchBoundary(in DOMString aName, in float aElapsedTime,
                        in unsigned long aCharIndex);

  /**
   * Dispatch mark event.
   *
   * @param aName        mark identifier.
   * @param aElapsedTime time in seconds since speech has started.
   * @param aCharIndex   offset of spoken characters.
   */
  void dispatchMark(in DOMString aName, in float aElapsedTime, in unsigned long aCharIndex);
};

/**
 * The main interface of a speech synthesis service.
 *
 * A service's speak method could be implemented in two ways:
 *  1. Indirect audio - the service is responsible for outputting audio.
 *    The service calls the nsISpeechTask.dispatch* methods directly. Starting
 *    with dispatchStart() and ending with dispatchEnd or dispatchError().
 *
 *  2. Direct audio - the service provides us with PCM-16 data, and we output it.
 *    The service does not call the dispatch task methods directly. Instead,
 *    audio information is provided at setup(), and audio data is sent with
 *    sendAudio(). The utterance is terminated with an empty sendAudio().
 */
[scriptable, uuid(9b7d59db-88ff-43d0-b6ee-9f63d042d08f)]
interface nsISpeechService : nsISupports
{
  /**
   * Speak the given text using the voice identified byu the given uri. See
   * W3C Speech API spec for information about pitch and rate.
   * https://dvcs.w3.org/hg/speech-api/raw-file/tip/speechapi.html#utterance-attributes
   *
   * @param aText   text to utter.
   * @param aUri    unique voice identifier.
   * @param aVolume volume to speak voice in. Only relevant for indirect audio.
   * @param aRate   rate to speak voice in.
   * @param aPitch  pitch to speak voice in.
   * @param aTask  task instance for utterance, used for sending events or audio
   *                 data back to browser.
   */
  void speak(in DOMString aText, in DOMString aUri,
             in float aVolume, in float aRate, in float aPitch,
             in nsISpeechTask aTask);

  const SpeechServiceType SERVICETYPE_DIRECT_AUDIO = 1;
  const SpeechServiceType SERVICETYPE_INDIRECT_AUDIO = 2;

  readonly attribute SpeechServiceType serviceType;
};

%{C++
// This is the service category speech services could use to start up as
// a component.
#define NS_SPEECH_SYNTH_STARTED "speech-synth-started"
%}