git.gag.com Git - debian/freetts/blob - demo/freetts/ClientServer/client.c

   1 /*
   2  * Implements the Client side of the Client/Server demo.
   3  *
   4  * It waits for the user to type in a line of text, sends the line of
   5  * text to the speech server, which returns a stream of bytes (the
   6  * synthesized wave samples). This client then plays the stream
   7  * of bytes at the local audio device.
   8  *
   9  * You must start the speech server first. You can do this by typing:
  10  *
  11  * gmake runserver
  12  *
  13  * at the same directory. To run this client, modify set the speech
  14  * server host (and port number if not 5555) at the Makefile, and then type:
  15  *
  16  * gmake runcclient
  17  *
  18  * In the Makefile, you can also specify the sample rate you want
  19  * as the third argument (currently, the server supports only 8kHz and 16kHz).
  20  *
  21  * This C client should run across most UNIX implementations, as it
  22  * uses standard UNIX system libraries.
  23  *
  24  * For a complete specification of the protocol between client and server,
  25  * consult the document <code>Protocol.txt</code>.
  26  */
  27
  28 #include <arpa/inet.h>
  29 #include <ctype.h>
  30 #include <errno.h>
  31 #include <fcntl.h>
  32 #include <netdb.h>
  33 #include <netinet/in.h>
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <strings.h>
  37 #include <sys/audio.h>
  38 #include <sys/audioio.h>
  39 #include <sys/filio.h>
  40 #include <sys/socket.h>
  41 #include <sys/stat.h>
  42 #include <sys/time.h>
  43 #include <sys/types.h>
  44 #include <unistd.h>
  45
  46
  47 #define TRUE 1
  48 #define FALSE 0
  49
  50 #define SERVER_PORT 5555
  51 #define SERVER_HOST "sunlabs.east"
  52
  53 #define AUDIO_DEVICE_FILE "/dev/audio"
  54 #define AUDIO_DEVICE_ENV_VAR "AUDIODEV"  // for SunRays
  55 #define DEFAULT_SAMPLE_RATE 8000
  56
  57 #define ADDRESS_SIZE sizeof(struct sockaddr_in)
  58 #define AUDIO_BUFFER_SIZE 1024
  59 #define TEXT_INPUT_BUFFER_SIZE 1024
  60 #define STR_BUFFER_SIZE 10
  61
  62 #define FIRST_SENTENCE "Type in what you want me to say."
  63
  64
  65 int connect_speech_server(char* server_host, int server_port);
  66 void run_tts_protocol(int sock_fd);
  67 int read_line(int sock_fd, char *buffer, int buffer_size);
  68 int send_tts_request(int sock_fd, char *tts_text);
  69 void receive_play_samples(int sock_fd, int number_samples);
  70 int open_audio_device();
  71 int set_pcm_linear();
  72 unsigned char short_to_ulaw(short sample);
  73 int is_string_nonempty(char *string, int length);
  74
  75
  76 /* our audio device file descriptor */
  77 int audio_fd;
  78
  79 /* the sample rate */
  80 int sample_rate = DEFAULT_SAMPLE_RATE;
  81
  82 /* show metrics */
  83 int metrics = 1;
  84
  85 /* equals 1 if the first byte is received */
  86 int first_byte_received = 0;
  87
  88 /* the start time */
  89 struct timeval start_time;
  90
  91 /* the first byte time */
  92 struct timeval first_byte_time;
  93
  94 /* the first sound time */
  95 struct timeval first_sound_time;
  96
  97
  98
  99
 100 /**
 101  * It first attempts a connection to the speech server. Then,
 102  * it waits for the user to type in a line of text, sends the line of
 103  * text to the speech server, which returns a stream of bytes (the
 104  * synthesized wave samples). This client then plays the stream
 105  * of bytes at the local audio device.
 106  *
 107  * Arguments (optional):
 108  * argv[1] : the host name of speech server
 109  * argv[2] : the port number where the speech server is listening
 110  * argv[3] : the sample rate
 111  * argv[4] : show metrics, 1 to show, 0 to not show
 112  */
 113 int main(int args, char *argv[]) {
 114
 115   int sock_fd;
 116   int server_port;
 117   char* server_host;
 118
 119   server_port = SERVER_PORT;
 120   server_host = SERVER_HOST;
 121
 122   /* parse command line arguments for server hostname and port number */
 123   if (args >= 2) {
 124     server_host = argv[1];
 125   }
 126   if (args >= 3) {
 127     server_port = atoi(argv[2]);
 128   }
 129   if (args >= 4) {
 130     sample_rate = atoi(argv[3]);
 131   }
 132   if (args >= 5) {
 133     metrics = atoi(argv[4]);
 134   }
 135
 136   /* connect to the server */
 137   sock_fd = connect_speech_server(server_host, server_port);
 138
 139   /* start running the TTS protocol */
 140   run_tts_protocol(sock_fd);
 141
 142   /* do cleanup */
 143   close(sock_fd);
 144
 145   return 0;
 146 }
 147
 148
 149 /**
 150  * Connects to the remote speech server at the given host and port,
 151  * and returns the socket file descriptor to the connection.
 152  *
 153  * Arguments:
 154  * server_host: the host name of the speech server
 155  * server_port: the port on which the speech server is listening
 156  *
 157  * Returns:
 158  * a file descriptor of the socket
 159  */
 160 int connect_speech_server(char* server_host, int server_port) {
 161
 162   int sock_fd;
 163   struct sockaddr_in server = {AF_INET, SERVER_PORT};
 164   struct hostent *hp;
 165
 166   /* obtain the IP address */
 167
 168   hp = gethostbyname(server_host);
 169   if (hp == NULL) {
 170     perror("invalid hostname");
 171     exit(1);
 172   }
 173
 174   /* set the IP address and port */
 175
 176   bcopy((char *)hp->h_addr, (char *)&server.sin_addr, hp->h_length);
 177   server.sin_port = htons(server_port);
 178
 179
 180   /* set up the transport end point */
 181
 182   if ((sock_fd = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
 183     perror("socket call failed");
 184     exit(1);
 185   }
 186
 187   /* connect to the server */
 188
 189   if (connect(sock_fd, (struct sockaddr *) &server, ADDRESS_SIZE) == -1) {
 190     perror("connect call failed");
 191     exit(1);
 192   }
 193
 194   return sock_fd;
 195 }
 196
 197
 198 /**
 199  * Runs the TTS protocol.
 200  * It waits for the user to type in a line of text, sends the line of
 201  * text to the speech server, which returns a stream of bytes (the
 202  * synthesized wave samples). This client then plays the stream
 203  * of bytes at the local audio device.
 204  *
 205  * Arguments:
 206  * sock_fd  the socket file descriptor
 207  */
 208 void run_tts_protocol(int sock_fd) {
 209
 210   char buffer[STR_BUFFER_SIZE];
 211   char input_buffer[TEXT_INPUT_BUFFER_SIZE];
 212   ssize_t nread;
 213
 214   /* read the "READY" line from the Server */
 215
 216   nread = recv(sock_fd, buffer, 6, 0);
 217   buffer[nread] = '\0';
 218
 219   if (strcmp(buffer, "READY\n") == 0) {
 220
 221     if (send_tts_request(sock_fd, FIRST_SENTENCE) == -1) {
 222       return;
 223     }
 224
 225     input_buffer[0] = '\0';
 226     printf("Say       : ");
 227     while (fgets(input_buffer, TEXT_INPUT_BUFFER_SIZE, stdin) != NULL) {
 228       if (is_string_nonempty(input_buffer, strlen(input_buffer)) &&
 229           send_tts_request(sock_fd, input_buffer) == -1) {
 230         return;
 231       }
 232       input_buffer[0] = '\0';
 233       printf("Say       : ");
 234     }
 235   }
 236
 237   send(sock_fd, "DONE\n", 5, 0);
 238
 239   /* drain all the audio before returning */
 240
 241   ioctl(audio_fd, AUDIO_DRAIN, 0);
 242
 243   printf("ALL DONE\n");
 244 }
 245
 246
 247 /**
 248  * Sends a TTS request of the given text to the given socket.
 249  *
 250  * Arguments:
 251  * sock_fd : socket file descriptor
 252  * tts_text : the text to perform TTS
 253  *
 254  * Returns:
 255  * 0 if everything's fine, -1 if any error occurred
 256  */
 257 int send_tts_request(int sock_fd, char *tts_text) {
 258
 259   int nsend;
 260
 261   char tts_text_str[TEXT_INPUT_BUFFER_SIZE];
 262   int text_length;
 263
 264   char number_samples_str[STR_BUFFER_SIZE];
 265   int number_samples;
 266
 267   int input_length;
 268   input_length = strlen(tts_text);
 269
 270   if (tts_text[input_length - 1] == '\n') {
 271     tts_text[input_length - 1] = '\0';
 272   }
 273
 274   sprintf(tts_text_str, "TTS\n%d\n%s\n", sample_rate, tts_text);
 275
 276   text_length = strlen(tts_text_str);
 277
 278   /* record the time the request is sent */
 279   if (metrics) {
 280     gettimeofday(&start_time, NULL);
 281     first_byte_received = 0;
 282   }
 283
 284   /*
 285    * send "TTS\n<sample_rate>\n<text>\n" (sent together to avoid
 286    * repetitive send calls)
 287    */
 288   nsend = send(sock_fd, tts_text_str, text_length, 0);
 289
 290   do {
 291     read_line(sock_fd, number_samples_str, STR_BUFFER_SIZE);
 292                                               /* how many samples? */
 293
 294     if (strcmp(number_samples_str, "-2") == 0) {
 295       printf("TTS Error\n");
 296       return -1;
 297     }
 298
 299     if (strcmp(number_samples_str, "-1") != 0) {
 300       number_samples = atoi(number_samples_str);
 301
 302       printf("Receiving : %d samples\n", number_samples);
 303
 304       receive_play_samples(sock_fd, number_samples);
 305     }
 306   }
 307   while (strcmp(number_samples_str, "-1") != 0 &&
 308          strcmp(number_samples_str, "-2") != 0);
 309
 310   if (metrics) {
 311     long elapsed_time =
 312       (first_byte_time.tv_sec - start_time.tv_sec)*1000 +
 313       (first_byte_time.tv_usec - start_time.tv_usec)/1000;
 314
 315     printf("FirstByte : %li ms\n", elapsed_time);
 316   }
 317
 318   return 0;
 319 }
 320
 321
 322 /**
 323  * Receive the given number of wave samples and play it to the audio
 324  * device.
 325  *
 326  * Arguments:
 327  * sock_fd : the socket file descriptor
 328  * number_samples : the number of wave samples to receive from the socket
 329  */
 330 void receive_play_samples(int sock_fd, int number_samples) {
 331
 332   int nread;
 333   int nsend;
 334   int bytes_to_read;
 335   int bytes_remaining;
 336   short socket_buffer[AUDIO_BUFFER_SIZE];
 337
 338   bytes_remaining = number_samples;
 339
 340   open_audio_device();
 341
 342   /* read the samples from the socket, and write it to the audio device */
 343
 344   while (bytes_remaining > 0) {
 345
 346     if (bytes_remaining >= AUDIO_BUFFER_SIZE) {
 347       bytes_to_read = AUDIO_BUFFER_SIZE;
 348     }
 349     else {
 350       bytes_to_read = bytes_remaining;
 351     }
 352
 353     if ((nread = read(sock_fd, socket_buffer, bytes_to_read)) == -1) {
 354       perror("error reading samples");
 355     }
 356
 357     if (metrics && !first_byte_received) {
 358       gettimeofday(&first_byte_time, NULL);
 359       first_byte_received = 1;
 360     }
 361
 362     if ((nsend = write(audio_fd, socket_buffer, nread)) == -1) {
 363       perror("error playing samples");
 364     }
 365
 366     bytes_remaining -= nread;
 367   }
 368
 369   close(audio_fd);
 370 }
 371
 372
 373 /**
 374  * Reads a line of input from the given file descriptor, and save it
 375  * in the given buffer.
 376  *
 377  * Arguments:
 378  * sock_fd : the (socket) file descriptor
 379  * buffer : the buffer to save the line read
 380  * buffer_size : size of the buffer
 381  *
 382  * Returns:
 383  * The number of characters in the line, not including end of line character.
 384  */
 385 int read_line(int sock_fd, char *buffer, int buffer_size) {
 386
 387   int i;
 388   char rc;
 389
 390   for (i = 0; i < (buffer_size-1); i++) {
 391     read(sock_fd, &rc, 1);
 392     buffer[i] = rc;
 393     if (rc == '\n') {
 394       break;
 395     }
 396   }
 397   buffer[i] = '\0';
 398
 399   return i;
 400 }
 401
 402
 403 /**
 404  * Returns 1 if the given string contains text, ie, it does not only
 405  * contain the space, newline or tab characters.
 406  *
 407  * Arguments:
 408  * string : the input string
 409  * length : the string length
 410  */
 411 int is_string_nonempty(char *string, int length) {
 412   int i;
 413   for (i = 0; i < length; i++) {
 414     if (string[i] != ' ' && string[i] != '\n' && string[i] != '\t') {
 415       return 1;
 416     }
 417   }
 418   return 0;
 419 }
 420
 421
 422 /**
 423  * Opens the audio device file, and returns the file descriptor,
 424  * or -1 if an error occurred.
 425  *
 426  * Returns:
 427  * The audio device file descriptor.
 428  */
 429 int open_audio_device() {
 430
 431   char *audio_device = AUDIO_DEVICE_FILE;
 432
 433   if ((audio_fd = open(audio_device, O_WRONLY)) == -1) {
 434
 435     /* the device might be a SunRay, so get the $AUDIODEV env var */
 436     audio_device = getenv(AUDIO_DEVICE_ENV_VAR);
 437
 438     if (audio_device != NULL) {
 439       if ((audio_fd = open(audio_device, O_RDWR)) == -1) {
 440         perror("Can't open audio device with environment variable");
 441         exit(1);
 442       }
 443     }
 444     else {
 445       perror("Can't open audio device");
 446       exit(1);
 447     }
 448   }
 449
 450   if (set_pcm_linear() == FALSE) {
 451     perror("fail to set audio device to PCM linear");
 452     exit(1);
 453   }
 454
 455   return audio_fd;
 456 }
 457
 458
 459 /**
 460  * Attempts to set the audio format of the audio device to 16-bit
 461  * PCM linear, at the given sample rate.
 462  *
 463  * Returns:
 464  * TRUE if the audio format was set successfully
 465  * FALSE otherwise
 466  */
 467 int set_pcm_linear() {
 468   int set_status;
 469
 470   audio_info_t info;
 471   // AUDIO_INITINFO(&info);
 472
 473   ioctl(audio_fd, AUDIO_GETINFO, &info);
 474
 475   info.play.encoding = AUDIO_ENCODING_LINEAR;
 476   info.play.precision = 16;
 477   info.play.channels = 1;
 478   info.play.sample_rate = sample_rate;
 479
 480   set_status = ioctl(audio_fd, AUDIO_SETINFO, &info);
 481
 482   if (set_status == -1) {
 483     return FALSE;
 484   } else {
 485     return TRUE;
 486   }
 487 }