Successfully reported this slideshow.
We use your LinkedIn profile and activity data to personalize ads and to show you more relevant ads. You can change your ad preferences anytime.

TensorFlow.Data 및 TensorFlow Hub

1,593 views

Published on

이 발표는 2018년 4월 14일 서울에서 열린 TensorFlow Dev Summit Extended Seoul '18 에서 TensorFlow Dev Summit 2018의 발표 내용 중 TensorFlow.Data 및 TensorFlow.Hub에 관한 발표들을 정리한 내용입니다.

This presentation summarizes the talks about TensorFlow.Data and TensorFlow.Hub among the sessions of TensorFlow Dev Summit 2018, and presented at TensorFlow Dev Summit Extended Seoul '18 held on April 14, 2018 in Seoul.

Published in: Technology
  • Dating for everyone is here: www.bit.ly/2AJerkH
       Reply 
    Are you sure you want to  Yes  No
    Your message goes here
  • Sex in your area for one night is there tinyurl.com/hotsexinarea Copy and paste link in your browser to visit a site)
       Reply 
    Are you sure you want to  Yes  No
    Your message goes here
  • Girls for sex are waiting for you https://bit.ly/2TQ8UAY
       Reply 
    Are you sure you want to  Yes  No
    Your message goes here
  • Meetings for sex in your area are there: https://bit.ly/2TQ8UAY
       Reply 
    Are you sure you want to  Yes  No
    Your message goes here
  • Best site for flirting and sex in your area you can find there: https://bit.ly/2SlcOnO
       Reply 
    Are you sure you want to  Yes  No
    Your message goes here

TensorFlow.Data 및 TensorFlow Hub

  1. 1. . 1 1 2 1 0/ .
  2. 2. . .
  3. 3. Derek Murray @mrry
  4. 4. § G
  5. 5. TransformExtract Load
  6. 6. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(lambda x: tf.parse_single_example(x, features)) dataset = dataset.batch(BATCH_SIZE) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() E T L
  7. 7. § tf.data0 : 1 § 38 , / . . . / . / . § C 0 G tf.contrib.data.prefetch_to_device() .
  8. 8. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(lambda x: tf.parse_single_example(x, features)) dataset = dataset.batch(BATCH_SIZE) iterator = dataset.make_one_shot_iterator() features = iterator.get_next()
  9. 9. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(lambda x: tf.parse_single_example(x, features)) dataset = dataset.batch(BATCH_SIZE) iterator = dataset.make_one_shot_iterator() features = iterator.get_next()
  10. 10. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.apply( tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS)) dataset = dataset.apply( tf.contrib.data.map_and_batch(lambda x: ..., BATCH_SIZE)) iterator = dataset.make_one_shot_iterator() features = iterator.get_next()
  11. 11. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.apply( tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS)) dataset = dataset.apply( tf.contrib.data.map_and_batch(lambda x: ..., BATCH_SIZE)) dataset = dataset.apply(tf.contrib.data.prefetch_to_device("/gpu:0")) iterator = dataset.make_one_shot_iterator() features = iterator.get_next()
  12. 12. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.apply( tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS)) dataset = dataset.apply( tf.contrib.data.map_and_batch(lambda x: ..., BATCH_SIZE)) dataset = dataset.apply(tf.contrib.data.prefetch_to_device("/gpu:0")) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() = C = = ! ==: B = = ? / .
  13. 13. Dataset.map
  14. 14. § tf.SparseTensor § Dataset.from_generator() + Python § DatasetOpKernel
  15. 15. § ( .8 S 1 8 E K § 8 . . C K § ( 8 C ).8 P ).8 !
  16. 16. tf.enable_eager_execution() files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(lambda x: tf.parse_single_example(x, features)) dataset = dataset.batch(BATCH_SIZE) # Eager execution makes dataset a normal Python iterable. for batch in dataset: train_model(batch)
  17. 17. § ( .8 S 1 8 E K § 8 . . C K § ( 8 C ).8 P ).8 !
  18. 18. tf.enable_eager_execution() files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(lambda x: tf.parse_single_example(x, features)) dataset = dataset.batch(BATCH_SIZE) for batch in dataset: train_model(batch)
  19. 19. tf.enable_eager_execution() # Also implements best practices for high performance! # (See optional args for details.) dataset = tf.contrib.data.make_batched_features_dataset( file_pattern, BATCH_SIZE, features, num_epochs=NUM_EPOCHS) for batch in dataset: train_model(batch)
  20. 20. tf.enable_eager_execution() # In a terminal, run the following commands, e.g.: # $ pip install kaggle # $ kaggle datasets download -d therohk/million-headlines -p . dataset = tf.contrib.data.make_csv_dataset( "*.csv", BATCH_SIZE, num_epochs=NUM_EPOCHS) for batch in dataset: train_model(batch["publish_date"], batch["headline_text"])
  21. 21. § ( .8 S 1 8 E K § 8 . . C K § ( 8 C ).8 P ).8 !
  22. 22. dataset = tf.contrib.data.make_csv_dataset( "*.csv", BATCH_SIZE, num_epochs=NUM_EPOCHS) for batch in dataset: train_model(batch)
  23. 23. # Wrap the dataset in an input function, and return it directly. def input_fn(): dataset = tf.contrib.data.make_csv_dataset( "*.csv", BATCH_SIZE, num_epochs=NUM_EPOCHS) return dataset # Train an Estimator on the dataset. tf.estimator.Estimator(model_fn=train_model).train(input_fn=input_fn)
  24. 24. § G
  25. 25. § tf.data ✓ ✓ G ✓
  26. 26. § . // . / / § / . / / . / .
  27. 27. . Brennan Saeta @bsaeta
  28. 28. . 2 ) , 1. 3 2 3 3 . ( 1 1
  29. 29. def input_fn(batch_size): files = tf.data.Dataset.list_files(FLAGS.data_dir) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(2048) # Sliding window of 2048 records dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(parser_fn).batch(batch_size) return dataset 0- E 4. 5,231 03 ( )
  30. 30. § § §
  31. 31. saeta@saeta:~$ capture_tpu_profile --tpu_name=saeta --logdir=myprofile/ --duration_ms=10000 Welcome to the Cloud TPU Profiler v1.5.1 Starting to profile TPU traces for 10000 ms. Remaining attempt(s): 3 Limiting the number of trace events to 1000000 2018-03-21 01:13:12.350004: I tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc:155] Converting trace events to TraceViewer JSON. 2018-03-21 01:13:12.392162: I tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc:69] Dumped raw-proto trace data to profiles/5/plugins/profile/2018-03-21_01:13:12/tr ace Trace contains 998114 events. Dumped JSON trace data to myprofile/plugins/profile/2018-03-21_01:13:12/trace.json.gz Dumped json op profile data to myprofile/plugins/profile/2018-03-21_01:13:12/op_profile.json Dumped tool data for input_pipeline.json to myprofile/plugins/profile/2018-03-21_01:13:12/input_pipeline.json Dumped tool data for overview_page.json to myprofile/plugins/profile/2018-03-21_01:13:12/overview_page.json NOTE: using the trace duration 10000ms. Set an appropriate duration (with --duration_ms) if you don't see a full step in your trace or the captured trace is too large. saeta@saeta:~$ tensorboard --logdir=myprofile/ TensorBoard 1.6.0 at <redacted> (Press CTRL+C to quit) /: // # / : # : /: : -/ /: // - .# /#- . - - . - :/ : -/
  32. 32. def input_fn(batch_size): files = tf.data.Dataset.list_files(FLAGS.data_dir) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(2048) # Sliding window of 2048 records dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(parser_fn, num_parallel_calls=64) dataset = dataset.batch(batch_size) return dataset - 0 0 !
  33. 33. § § §
  34. 34. § § §
  35. 35. (., (., Extract I Transform Load ) / ) / HGA C D M HGA
  36. 36. 1 1 3 2 3 3
  37. 37. 0 0 0 % 5 7 % 5 5 % 0 5 0 0 5 7 7 % 7 0 1 31 C 4 6 2 2 2
  38. 38. def input_fn(batch_size): files = tf.data.Dataset.list_files(FLAGS.data_dir) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(parser_fn, num_parallel_calls=64) dataset = dataset.batch(batch_size) dataset = dataset.prefetch(2) return dataset . . T bi n D c i Fd_ e a lfR D
  39. 39. § . § §
  40. 40. def input_fn(batch_size): files = tf.data.Dataset.list_files(FLAGS.data_dir) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(parser_fn, num_parallel_calls=64) dataset = dataset.batch(batch_size) dataset = dataset.prefetch(2) return dataset !
  41. 41. § § §
  42. 42. 6 6 53 4 6 53 53 6 53 6 6 53 4 4 4 4 1 7 2
  43. 43. § - § ? F ) § § ( )
  44. 44. def input_fn(batch_size): files = tf.data.Dataset.list_files(FLAGS.data_dir) def tfrecord_dataset(filename): buffer_size = 8 * 1024 * 1024 # 8 MiB per file return tf.data.TFRecordDataset(filename, buffer_size=buffer_size) dataset = files.apply(tf.contrib.data.parallel_interleave( tfrecord_dataset, cycle_length=32, sloppy=True)) dataset = dataset.apply(tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS)) dataset = dataset.apply(tf.contrib.data.map_and_batch(parser_fn, batch_size, num_parallel_batches=4)) dataset = dataset.prefetch(4) return dataset !
  45. 45. ) ( ) +
  46. 46. Jeremiah Harmsen @JeremiahHarmsen Andrew Gasparovic @agasparovic_
  47. 47. Repositories
  48. 48. TensorFlow Hub
  49. 49. Module TensorFlow Hub Model
  50. 50. , ,
  51. 51. CHAIRPERSONFLOWER ANGORAFUZZYLOP EASTERBUNNY
  52. 52. # Download and use NASNet feature vector module. module = hub.Module( "https://tfhub.dev/google/imagenet/nasnet_large/feature_vector/1") features = module(my_images) logits = tf.layers.dense(features, NUM_CLASSES) probabilities = tf.nn.softmax(logits) + !
  53. 53. # Download and use NASNet feature vector module. module = hub.Module( "https://tfhub.dev/google/imagenet/nasnet_large/feature_vector/1", trainable=True, tags={“train”}) features = module(my_images) logits = tf.layers.dense(features, NUM_CLASSES) probabilities = tf.nn.softmax(logits)
  54. 54. § /,2/ A , 5 § 0/,2/ A § 5 / A 3) § - A 3( 3) 3 § - A 1 / A 3) § 1 / A 3( 3) &( (&( ) § 5 / A 3(
  55. 55. “The quick brown fox”
  56. 56. “The shallots were simply underwhelming” POSITIVE NEGATIVE
  57. 57. § L § M ( , § E 2 ,, 2 ) ) ,
  58. 58. # Use pre-trained universal sentence encoder to build text vector column. review = hub.text_embedding_column( "review", "https://tfhub.dev/google/universal-sentence-encoder/1") features = { "review": np.array(["an arugula masterpiece", "inedible shoe leather", ...]) } labels = np.array([[1], [0], ...]) input_fn = tf.estimator.input.numpy_input_fn(features, labels, shuffle=True) estimator = tf.estimator.DNNClassifier(hidden_units, [review]) estimator.train(input_fn, max_steps=100)
  59. 59. # Use pre-trained universal sentence encoder to build text vector column. review = hub.text_embedding_column( "review", "https://tfhub.dev/google/universal-sentence-encoder/1", trainable=True) features = { "review": np.array(["an arugula masterpiece", "inedible shoe leather", ...]) } labels = np.array([[1], [0], ...]) input_fn = tf.estimator.input.numpy_input_fn(features, labels, shuffle=True) estimator = tf.estimator.DNNClassifier(hidden_units, [review]) estimator.train(input_fn, max_steps=100)
  60. 60. # Use pre-trained universal sentence encoder to build text vector column. review = hub.text_embedding_column( "review", "https://tfhub.dev/google/universal-sentence-encoder/1") features = { "review": np.array(["an arugula masterpiece", "inedible shoe leather", ...]) } labels = np.array([[1], [0], ...]) input_fn = tf.estimator.input.numpy_input_fn(features, labels, shuffle=True) estimator = tf.estimator.DNNClassifier(hidden_units, [review]) estimator.train(input_fn, max_steps=100)
  61. 61. § ( § ( ), , ) , , ) § A D F E

×