SlideShare a Scribd company logo
.
1 1
2 1 0/ .
TensorFlow.Data 및 TensorFlow Hub
TensorFlow.Data 및 TensorFlow Hub
TensorFlow.Data 및 TensorFlow Hub
TensorFlow.Data 및 TensorFlow Hub
TensorFlow.Data 및 TensorFlow Hub
TensorFlow.Data 및 TensorFlow Hub
TensorFlow.Data 및 TensorFlow Hub
.
.
Derek Murray
@mrry
§
G
TensorFlow.Data 및 TensorFlow Hub
TransformExtract Load
files = tf.data.Dataset.list_files(file_pattern)
dataset = tf.data.TFRecordDataset(files)
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(NUM_EPOCHS)
dataset = dataset.map(lambda x: tf.parse_single_example(x, features))
dataset = dataset.batch(BATCH_SIZE)
iterator = dataset.make_one_shot_iterator()
features = iterator.get_next()
E
T
L
TensorFlow.Data 및 TensorFlow Hub
§ tf.data0 : 1
§ 38 ,
/ . . . / . / .
§ C 0 G tf.contrib.data.prefetch_to_device() .
files = tf.data.Dataset.list_files(file_pattern)
dataset = tf.data.TFRecordDataset(files)
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(NUM_EPOCHS)
dataset = dataset.map(lambda x: tf.parse_single_example(x, features))
dataset = dataset.batch(BATCH_SIZE)
iterator = dataset.make_one_shot_iterator()
features = iterator.get_next()
files = tf.data.Dataset.list_files(file_pattern)
dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32)
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(NUM_EPOCHS)
dataset = dataset.map(lambda x: tf.parse_single_example(x, features))
dataset = dataset.batch(BATCH_SIZE)
iterator = dataset.make_one_shot_iterator()
features = iterator.get_next()
files = tf.data.Dataset.list_files(file_pattern)
dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32)
dataset = dataset.apply(
tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS))
dataset = dataset.apply(
tf.contrib.data.map_and_batch(lambda x: ..., BATCH_SIZE))
iterator = dataset.make_one_shot_iterator()
features = iterator.get_next()
files = tf.data.Dataset.list_files(file_pattern)
dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32)
dataset = dataset.apply(
tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS))
dataset = dataset.apply(
tf.contrib.data.map_and_batch(lambda x: ..., BATCH_SIZE))
dataset = dataset.apply(tf.contrib.data.prefetch_to_device("/gpu:0"))
iterator = dataset.make_one_shot_iterator()
features = iterator.get_next()
files = tf.data.Dataset.list_files(file_pattern)
dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32)
dataset = dataset.apply(
tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS))
dataset = dataset.apply(
tf.contrib.data.map_and_batch(lambda x: ..., BATCH_SIZE))
dataset = dataset.apply(tf.contrib.data.prefetch_to_device("/gpu:0"))
iterator = dataset.make_one_shot_iterator()
features = iterator.get_next()
= C = = ! ==: B = = ? / .
TensorFlow.Data 및 TensorFlow Hub
Dataset.map
§ tf.SparseTensor
§ Dataset.from_generator() + Python
§ DatasetOpKernel
TensorFlow.Data 및 TensorFlow Hub
§ ( .8 S 1 8 E K
§ 8 . . C K
§ ( 8 C ).8 P ).8 !
tf.enable_eager_execution()
files = tf.data.Dataset.list_files(file_pattern)
dataset = tf.data.TFRecordDataset(files)
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(NUM_EPOCHS)
dataset = dataset.map(lambda x: tf.parse_single_example(x, features))
dataset = dataset.batch(BATCH_SIZE)
# Eager execution makes dataset a normal Python iterable.
for batch in dataset:
train_model(batch)
§ ( .8 S 1 8 E K
§ 8 . . C K
§ ( 8 C ).8 P ).8 !
tf.enable_eager_execution()
files = tf.data.Dataset.list_files(file_pattern)
dataset = tf.data.TFRecordDataset(files)
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(NUM_EPOCHS)
dataset = dataset.map(lambda x: tf.parse_single_example(x, features))
dataset = dataset.batch(BATCH_SIZE)
for batch in dataset:
train_model(batch)
tf.enable_eager_execution()
# Also implements best practices for high performance!
# (See optional args for details.)
dataset = tf.contrib.data.make_batched_features_dataset(
file_pattern, BATCH_SIZE, features, num_epochs=NUM_EPOCHS)
for batch in dataset:
train_model(batch)
tf.enable_eager_execution()
# In a terminal, run the following commands, e.g.:
# $ pip install kaggle
# $ kaggle datasets download -d therohk/million-headlines -p .
dataset = tf.contrib.data.make_csv_dataset(
"*.csv", BATCH_SIZE, num_epochs=NUM_EPOCHS)
for batch in dataset:
train_model(batch["publish_date"], batch["headline_text"])
§ ( .8 S 1 8 E K
§ 8 . . C K
§ ( 8 C ).8 P ).8 !
dataset = tf.contrib.data.make_csv_dataset(
"*.csv", BATCH_SIZE, num_epochs=NUM_EPOCHS)
for batch in dataset:
train_model(batch)
# Wrap the dataset in an input function, and return it directly.
def input_fn():
dataset = tf.contrib.data.make_csv_dataset(
"*.csv", BATCH_SIZE, num_epochs=NUM_EPOCHS)
return dataset
# Train an Estimator on the dataset.
tf.estimator.Estimator(model_fn=train_model).train(input_fn=input_fn)
§
G
§ tf.data
✓
✓ G
✓
§
. //
. / /
§
/ . /
/ . / .
.
Brennan Saeta
@bsaeta
. 2
) , 1.
3
2 3
3
.
( 1
1
def input_fn(batch_size):
files = tf.data.Dataset.list_files(FLAGS.data_dir)
dataset = tf.data.TFRecordDataset(files)
dataset = dataset.shuffle(2048) # Sliding window of 2048 records
dataset = dataset.repeat(NUM_EPOCHS)
dataset = dataset.map(parser_fn).batch(batch_size)
return dataset
0-
E
4. 5,231 03
(
)
§
§
§
saeta@saeta:~$ capture_tpu_profile --tpu_name=saeta --logdir=myprofile/ --duration_ms=10000
Welcome to the Cloud TPU Profiler v1.5.1
Starting to profile TPU traces for 10000 ms. Remaining attempt(s): 3
Limiting the number of trace events to 1000000
2018-03-21 01:13:12.350004: I tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc:155] Converting trace events to TraceViewer JSON.
2018-03-21 01:13:12.392162: I tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc:69] Dumped raw-proto trace data to profiles/5/plugins/profile/2018-03-21_01:13:12/tr
ace
Trace contains 998114 events.
Dumped JSON trace data to myprofile/plugins/profile/2018-03-21_01:13:12/trace.json.gz
Dumped json op profile data to myprofile/plugins/profile/2018-03-21_01:13:12/op_profile.json
Dumped tool data for input_pipeline.json to myprofile/plugins/profile/2018-03-21_01:13:12/input_pipeline.json
Dumped tool data for overview_page.json to myprofile/plugins/profile/2018-03-21_01:13:12/overview_page.json
NOTE: using the trace duration 10000ms.
Set an appropriate duration (with --duration_ms) if you don't see a full step in your trace or the captured trace is too large.
saeta@saeta:~$ tensorboard --logdir=myprofile/
TensorBoard 1.6.0 at <redacted> (Press CTRL+C to quit)
/: // # / : # : /: : -/
/: // - .# /#- . - - . - :/ : -/
def input_fn(batch_size):
files = tf.data.Dataset.list_files(FLAGS.data_dir)
dataset = tf.data.TFRecordDataset(files)
dataset = dataset.shuffle(2048) # Sliding window of 2048 records
dataset = dataset.repeat(NUM_EPOCHS)
dataset = dataset.map(parser_fn, num_parallel_calls=64)
dataset = dataset.batch(batch_size)
return dataset
- 0
0 !
§
§
§
TensorFlow.Data 및 TensorFlow Hub
TensorFlow.Data 및 TensorFlow Hub
§
§
§
(.,
(.,
Extract
I
Transform
Load
) / ) /
HGA
C D
M
HGA
1 1
3
2 3
3
0 0 0 %
5
7
%
5 5
%
0
5
0 0
5
7 7 % 7
0
1 31 C 4
6 2 2
2
def input_fn(batch_size):
files = tf.data.Dataset.list_files(FLAGS.data_dir)
dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32)
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(NUM_EPOCHS)
dataset = dataset.map(parser_fn, num_parallel_calls=64)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(2)
return dataset
. . T
bi n D c
i
Fd_ e a lfR D
§ .
§
§
def input_fn(batch_size):
files = tf.data.Dataset.list_files(FLAGS.data_dir)
dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32)
dataset = dataset.shuffle(10000)
dataset = dataset.repeat(NUM_EPOCHS)
dataset = dataset.map(parser_fn, num_parallel_calls=64)
dataset = dataset.batch(batch_size)
dataset = dataset.prefetch(2)
return dataset
!
§
§
§
6 6
53
4
6
53 53
6
53
6 6
53
4 4 4 4
1 7 2
§
-
§ ? F )
§
§ ( )
def input_fn(batch_size):
files = tf.data.Dataset.list_files(FLAGS.data_dir)
def tfrecord_dataset(filename):
buffer_size = 8 * 1024 * 1024 # 8 MiB per file
return tf.data.TFRecordDataset(filename, buffer_size=buffer_size)
dataset = files.apply(tf.contrib.data.parallel_interleave(
tfrecord_dataset, cycle_length=32, sloppy=True))
dataset = dataset.apply(tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS))
dataset = dataset.apply(tf.contrib.data.map_and_batch(parser_fn,
batch_size, num_parallel_batches=4))
dataset = dataset.prefetch(4)
return dataset
!
TensorFlow.Data 및 TensorFlow Hub
) ( ) +
Jeremiah Harmsen
@JeremiahHarmsen
Andrew Gasparovic
@agasparovic_
Repositories
TensorFlow
Hub
TensorFlow.Data 및 TensorFlow Hub
Module
TensorFlow
Hub
Model
TensorFlow.Data 및 TensorFlow Hub
, ,
TensorFlow.Data 및 TensorFlow Hub
CHAIRPERSONFLOWER
ANGORAFUZZYLOP
EASTERBUNNY
TensorFlow.Data 및 TensorFlow Hub
# Download and use NASNet feature vector module.
module = hub.Module(
"https://tfhub.dev/google/imagenet/nasnet_large/feature_vector/1")
features = module(my_images)
logits = tf.layers.dense(features, NUM_CLASSES)
probabilities = tf.nn.softmax(logits)
+ !
# Download and use NASNet feature vector module.
module = hub.Module(
"https://tfhub.dev/google/imagenet/nasnet_large/feature_vector/1",
trainable=True, tags={“train”})
features = module(my_images)
logits = tf.layers.dense(features, NUM_CLASSES)
probabilities = tf.nn.softmax(logits)
§ /,2/ A , 5
§ 0/,2/ A
§ 5 / A 3)
§ - A 3( 3) 3
§ - A 1 / A 3)
§ 1 / A 3( 3) &( (&( )
§ 5 / A 3(
TensorFlow.Data 및 TensorFlow Hub
“The quick brown fox”
“The shallots were simply underwhelming”
POSITIVE
NEGATIVE
§ L
§ M ( ,
§ E 2 ,, 2 ) ) ,
TensorFlow.Data 및 TensorFlow Hub
# Use pre-trained universal sentence encoder to build text vector column.
review = hub.text_embedding_column(
"review", "https://tfhub.dev/google/universal-sentence-encoder/1")
features = {
"review": np.array(["an arugula masterpiece", "inedible shoe leather", ...])
}
labels = np.array([[1], [0], ...])
input_fn = tf.estimator.input.numpy_input_fn(features, labels, shuffle=True)
estimator = tf.estimator.DNNClassifier(hidden_units, [review])
estimator.train(input_fn, max_steps=100)
# Use pre-trained universal sentence encoder to build text vector column.
review = hub.text_embedding_column(
"review", "https://tfhub.dev/google/universal-sentence-encoder/1",
trainable=True)
features = {
"review": np.array(["an arugula masterpiece", "inedible shoe leather", ...])
}
labels = np.array([[1], [0], ...])
input_fn = tf.estimator.input.numpy_input_fn(features, labels, shuffle=True)
estimator = tf.estimator.DNNClassifier(hidden_units, [review])
estimator.train(input_fn, max_steps=100)
# Use pre-trained universal sentence encoder to build text vector column.
review = hub.text_embedding_column(
"review", "https://tfhub.dev/google/universal-sentence-encoder/1")
features = {
"review": np.array(["an arugula masterpiece", "inedible shoe leather", ...])
}
labels = np.array([[1], [0], ...])
input_fn = tf.estimator.input.numpy_input_fn(features, labels, shuffle=True)
estimator = tf.estimator.DNNClassifier(hidden_units, [review])
estimator.train(input_fn, max_steps=100)
TensorFlow.Data 및 TensorFlow Hub
§ (
§ ( ), , ) , , )
§ A D F E

More Related Content

What's hot (20)

PPTX
Tensorflow internal
Hyunghun Cho
 
PPTX
DIY Deep Learning with Caffe Workshop
odsc
 
PDF
Tensorflow presentation
Ahmed rebai
 
PDF
TENSORFLOW: ARCHITECTURE AND USE CASE - NASA SPACE APPS CHALLENGE by Gema Par...
Big Data Spain
 
PDF
Advanced Spark and TensorFlow Meetup May 26, 2016
Chris Fregly
 
PPTX
Neural Networks with Google TensorFlow
Darshan Patel
 
PDF
TensorFlow
Sang-Houn Choi
 
PDF
TensorFlow Dev Summit 2017 요약
Jin Joong Kim
 
PPTX
An Introduction to TensorFlow architecture
Mani Goswami
 
PDF
Alex Smola at AI Frontiers: Scalable Deep Learning Using MXNet
AI Frontiers
 
PDF
Introduction to Deep Learning, Keras, and TensorFlow
Sri Ambati
 
PDF
Attention mechanisms with tensorflow
Keon Kim
 
PDF
Scaling Deep Learning with MXNet
AI Frontiers
 
PDF
Introduction To TensorFlow | Deep Learning with TensorFlow | TensorFlow For B...
Edureka!
 
PDF
Hack Like It's 2013 (The Workshop)
Itzik Kotler
 
PDF
Text Classification in Python – using Pandas, scikit-learn, IPython Notebook ...
Jimmy Lai
 
PDF
Rajat Monga, Engineering Director, TensorFlow, Google at MLconf 2016
MLconf
 
PDF
Deep Learning through Examples
Sri Ambati
 
PDF
Making fitting in RooFit faster
Patrick Bos
 
PDF
Rajat Monga at AI Frontiers: Deep Learning with TensorFlow
AI Frontiers
 
Tensorflow internal
Hyunghun Cho
 
DIY Deep Learning with Caffe Workshop
odsc
 
Tensorflow presentation
Ahmed rebai
 
TENSORFLOW: ARCHITECTURE AND USE CASE - NASA SPACE APPS CHALLENGE by Gema Par...
Big Data Spain
 
Advanced Spark and TensorFlow Meetup May 26, 2016
Chris Fregly
 
Neural Networks with Google TensorFlow
Darshan Patel
 
TensorFlow
Sang-Houn Choi
 
TensorFlow Dev Summit 2017 요약
Jin Joong Kim
 
An Introduction to TensorFlow architecture
Mani Goswami
 
Alex Smola at AI Frontiers: Scalable Deep Learning Using MXNet
AI Frontiers
 
Introduction to Deep Learning, Keras, and TensorFlow
Sri Ambati
 
Attention mechanisms with tensorflow
Keon Kim
 
Scaling Deep Learning with MXNet
AI Frontiers
 
Introduction To TensorFlow | Deep Learning with TensorFlow | TensorFlow For B...
Edureka!
 
Hack Like It's 2013 (The Workshop)
Itzik Kotler
 
Text Classification in Python – using Pandas, scikit-learn, IPython Notebook ...
Jimmy Lai
 
Rajat Monga, Engineering Director, TensorFlow, Google at MLconf 2016
MLconf
 
Deep Learning through Examples
Sri Ambati
 
Making fitting in RooFit faster
Patrick Bos
 
Rajat Monga at AI Frontiers: Deep Learning with TensorFlow
AI Frontiers
 

Similar to TensorFlow.Data 및 TensorFlow Hub (20)

PDF
tf.data: TensorFlow Input Pipeline
Alluxio, Inc.
 
PPTX
Certification Study Group -Professional ML Engineer Session 2 (GCP-TensorFlow...
gdgsurrey
 
PDF
Anirudh Koul. 30 Golden Rules of Deep Learning Performance
Lviv Startup Club
 
PDF
What's new in Python 3.11
Henry Schreiner
 
PDF
Hands-on Learning with KubeFlow + Keras/TensorFlow 2.0 + TF Extended (TFX) + ...
Chris Fregly
 
PDF
Real Time Streaming Data with Kafka and TensorFlow (Yong Tang, MobileIron) Ka...
confluent
 
PDF
What's new with Apache Spark's Structured Streaming?
Miklos Christine
 
PPTX
CSI conference PPT on Performance Analysis of Map/Reduce to compute the frequ...
shravanthium111
 
PDF
No more struggles with Apache Spark workloads in production
Chetan Khatri
 
PDF
Monitoring InfluxEnterprise
InfluxData
 
PPT
Stat Design3 18 09
stat
 
PPT
r,rstats,r language,r packages
Ajay Ohri
 
PPTX
Group analyses with FieldTrip
Robert Oostenveld
 
PDF
Machine Learning Game Changer for IT - Maartens Lourens
OpenCredo
 
PDF
Metadata and Provenance for ML Pipelines with Hopsworks
Jim Dowling
 
PDF
A Tale of Three Deep Learning Frameworks: TensorFlow, Keras, & PyTorch with B...
Databricks
 
DOCX
Pumps, Compressors and Turbine Fault Frequency Analysis
University of Illinois,Chicago
 
PDF
Pumps, Compressors and Turbine Fault Frequency Analysis
University of Illinois,Chicago
 
PPTX
Flux - Open Machine Learning Stack / Pipeline
Jan Wiegelmann
 
PDF
Unafraid of Change: Optimizing ETL, ML, and AI in Fast-Paced Environments wit...
Databricks
 
tf.data: TensorFlow Input Pipeline
Alluxio, Inc.
 
Certification Study Group -Professional ML Engineer Session 2 (GCP-TensorFlow...
gdgsurrey
 
Anirudh Koul. 30 Golden Rules of Deep Learning Performance
Lviv Startup Club
 
What's new in Python 3.11
Henry Schreiner
 
Hands-on Learning with KubeFlow + Keras/TensorFlow 2.0 + TF Extended (TFX) + ...
Chris Fregly
 
Real Time Streaming Data with Kafka and TensorFlow (Yong Tang, MobileIron) Ka...
confluent
 
What's new with Apache Spark's Structured Streaming?
Miklos Christine
 
CSI conference PPT on Performance Analysis of Map/Reduce to compute the frequ...
shravanthium111
 
No more struggles with Apache Spark workloads in production
Chetan Khatri
 
Monitoring InfluxEnterprise
InfluxData
 
Stat Design3 18 09
stat
 
r,rstats,r language,r packages
Ajay Ohri
 
Group analyses with FieldTrip
Robert Oostenveld
 
Machine Learning Game Changer for IT - Maartens Lourens
OpenCredo
 
Metadata and Provenance for ML Pipelines with Hopsworks
Jim Dowling
 
A Tale of Three Deep Learning Frameworks: TensorFlow, Keras, & PyTorch with B...
Databricks
 
Pumps, Compressors and Turbine Fault Frequency Analysis
University of Illinois,Chicago
 
Pumps, Compressors and Turbine Fault Frequency Analysis
University of Illinois,Chicago
 
Flux - Open Machine Learning Stack / Pipeline
Jan Wiegelmann
 
Unafraid of Change: Optimizing ETL, ML, and AI in Fast-Paced Environments wit...
Databricks
 
Ad

More from Jeongkyu Shin (20)

PDF
Boosting machine learning workflow with TensorFlow 2.0
Jeongkyu Shin
 
PDF
Machine Learning in Google I/O 19
Jeongkyu Shin
 
PDF
머신러닝 및 데이터 과학 연구자를 위한 python 기반 컨테이너 분산처리 플랫폼 설계 및 개발
Jeongkyu Shin
 
PDF
TensorFlow 2: New Era of Developing Deep Learning Models
Jeongkyu Shin
 
PDF
Machine Learning Model Serving with Backend.AI
Jeongkyu Shin
 
PDF
그렇게 커미터가 된다: Python을 통해 오픈소스 생태계 가르치기
Jeongkyu Shin
 
PDF
오픈소스 라이선스를 둘러싼 소송들
Jeongkyu Shin
 
PDF
Backend.AI: 오픈소스 머신러닝 인프라 프레임워크
Jeongkyu Shin
 
PDF
모바일 개발자를 위한 ML Kit: Machine Learning SDK 소개
Jeongkyu Shin
 
PDF
회색지대: 이상과 현실 - 오픈소스 저작권
Jeongkyu Shin
 
PDF
Google Polymer in Action
Jeongkyu Shin
 
PDF
구글의 머신러닝 비전: TPU부터 모바일까지 (Google I/O Extended Seoul 2017)
Jeongkyu Shin
 
PDF
기술 관심 갖기: 스타트업 기술 101 (Interested in Tech?: Startup Technology 101)
Jeongkyu Shin
 
PDF
OSS SW Basics Lecture 14: Open source hardware
Jeongkyu Shin
 
PDF
OSS SW Basics Lecture 12: Open source in research fields
Jeongkyu Shin
 
PDF
OSS SW Basics Lecture 10: Setting up term project
Jeongkyu Shin
 
PDF
OSS SW Basics Lecture 09: Communications in open-source developments
Jeongkyu Shin
 
PDF
OSS SW Basics Lecture 08: Software Configuration Management (2)
Jeongkyu Shin
 
PDF
OSS SW Basics Lecture 06: Software Configuration Management
Jeongkyu Shin
 
PDF
OSS SW Basics Lecture 04: OSS Licenses and documentation
Jeongkyu Shin
 
Boosting machine learning workflow with TensorFlow 2.0
Jeongkyu Shin
 
Machine Learning in Google I/O 19
Jeongkyu Shin
 
머신러닝 및 데이터 과학 연구자를 위한 python 기반 컨테이너 분산처리 플랫폼 설계 및 개발
Jeongkyu Shin
 
TensorFlow 2: New Era of Developing Deep Learning Models
Jeongkyu Shin
 
Machine Learning Model Serving with Backend.AI
Jeongkyu Shin
 
그렇게 커미터가 된다: Python을 통해 오픈소스 생태계 가르치기
Jeongkyu Shin
 
오픈소스 라이선스를 둘러싼 소송들
Jeongkyu Shin
 
Backend.AI: 오픈소스 머신러닝 인프라 프레임워크
Jeongkyu Shin
 
모바일 개발자를 위한 ML Kit: Machine Learning SDK 소개
Jeongkyu Shin
 
회색지대: 이상과 현실 - 오픈소스 저작권
Jeongkyu Shin
 
Google Polymer in Action
Jeongkyu Shin
 
구글의 머신러닝 비전: TPU부터 모바일까지 (Google I/O Extended Seoul 2017)
Jeongkyu Shin
 
기술 관심 갖기: 스타트업 기술 101 (Interested in Tech?: Startup Technology 101)
Jeongkyu Shin
 
OSS SW Basics Lecture 14: Open source hardware
Jeongkyu Shin
 
OSS SW Basics Lecture 12: Open source in research fields
Jeongkyu Shin
 
OSS SW Basics Lecture 10: Setting up term project
Jeongkyu Shin
 
OSS SW Basics Lecture 09: Communications in open-source developments
Jeongkyu Shin
 
OSS SW Basics Lecture 08: Software Configuration Management (2)
Jeongkyu Shin
 
OSS SW Basics Lecture 06: Software Configuration Management
Jeongkyu Shin
 
OSS SW Basics Lecture 04: OSS Licenses and documentation
Jeongkyu Shin
 
Ad

Recently uploaded (20)

PPTX
Q2 FY26 Tableau User Group Leader Quarterly Call
lward7
 
PDF
Empower Inclusion Through Accessible Java Applications
Ana-Maria Mihalceanu
 
PPTX
AUTOMATION AND ROBOTICS IN PHARMA INDUSTRY.pptx
sameeraaabegumm
 
PDF
CIFDAQ Market Wrap for the week of 4th July 2025
CIFDAQ
 
PDF
DevBcn - Building 10x Organizations Using Modern Productivity Metrics
Justin Reock
 
PDF
Bitcoin for Millennials podcast with Bram, Power Laws of Bitcoin
Stephen Perrenod
 
PDF
What Makes Contify’s News API Stand Out: Key Features at a Glance
Contify
 
PDF
Using FME to Develop Self-Service CAD Applications for a Major UK Police Force
Safe Software
 
PPTX
WooCommerce Workshop: Bring Your Laptop
Laura Hartwig
 
PDF
POV_ Why Enterprises Need to Find Value in ZERO.pdf
darshakparmar
 
PDF
How Startups Are Growing Faster with App Developers in Australia.pdf
India App Developer
 
PDF
New from BookNet Canada for 2025: BNC BiblioShare - Tech Forum 2025
BookNet Canada
 
PDF
Advancing WebDriver BiDi support in WebKit
Igalia
 
PDF
[Newgen] NewgenONE Marvin Brochure 1.pdf
darshakparmar
 
PDF
IoT-Powered Industrial Transformation – Smart Manufacturing to Connected Heal...
Rejig Digital
 
PPTX
COMPARISON OF RASTER ANALYSIS TOOLS OF QGIS AND ARCGIS
Sharanya Sarkar
 
PDF
Jak MŚP w Europie Środkowo-Wschodniej odnajdują się w świecie AI
dominikamizerska1
 
PDF
Building Real-Time Digital Twins with IBM Maximo & ArcGIS Indoors
Safe Software
 
PDF
Biography of Daniel Podor.pdf
Daniel Podor
 
PPTX
Future Tech Innovations 2025 – A TechLists Insight
TechLists
 
Q2 FY26 Tableau User Group Leader Quarterly Call
lward7
 
Empower Inclusion Through Accessible Java Applications
Ana-Maria Mihalceanu
 
AUTOMATION AND ROBOTICS IN PHARMA INDUSTRY.pptx
sameeraaabegumm
 
CIFDAQ Market Wrap for the week of 4th July 2025
CIFDAQ
 
DevBcn - Building 10x Organizations Using Modern Productivity Metrics
Justin Reock
 
Bitcoin for Millennials podcast with Bram, Power Laws of Bitcoin
Stephen Perrenod
 
What Makes Contify’s News API Stand Out: Key Features at a Glance
Contify
 
Using FME to Develop Self-Service CAD Applications for a Major UK Police Force
Safe Software
 
WooCommerce Workshop: Bring Your Laptop
Laura Hartwig
 
POV_ Why Enterprises Need to Find Value in ZERO.pdf
darshakparmar
 
How Startups Are Growing Faster with App Developers in Australia.pdf
India App Developer
 
New from BookNet Canada for 2025: BNC BiblioShare - Tech Forum 2025
BookNet Canada
 
Advancing WebDriver BiDi support in WebKit
Igalia
 
[Newgen] NewgenONE Marvin Brochure 1.pdf
darshakparmar
 
IoT-Powered Industrial Transformation – Smart Manufacturing to Connected Heal...
Rejig Digital
 
COMPARISON OF RASTER ANALYSIS TOOLS OF QGIS AND ARCGIS
Sharanya Sarkar
 
Jak MŚP w Europie Środkowo-Wschodniej odnajdują się w świecie AI
dominikamizerska1
 
Building Real-Time Digital Twins with IBM Maximo & ArcGIS Indoors
Safe Software
 
Biography of Daniel Podor.pdf
Daniel Podor
 
Future Tech Innovations 2025 – A TechLists Insight
TechLists
 

TensorFlow.Data 및 TensorFlow Hub

  • 1. . 1 1 2 1 0/ .
  • 9. . .
  • 11. § G
  • 14. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(lambda x: tf.parse_single_example(x, features)) dataset = dataset.batch(BATCH_SIZE) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() E T L
  • 16. § tf.data0 : 1 § 38 , / . . . / . / . § C 0 G tf.contrib.data.prefetch_to_device() .
  • 17. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(lambda x: tf.parse_single_example(x, features)) dataset = dataset.batch(BATCH_SIZE) iterator = dataset.make_one_shot_iterator() features = iterator.get_next()
  • 18. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(lambda x: tf.parse_single_example(x, features)) dataset = dataset.batch(BATCH_SIZE) iterator = dataset.make_one_shot_iterator() features = iterator.get_next()
  • 19. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.apply( tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS)) dataset = dataset.apply( tf.contrib.data.map_and_batch(lambda x: ..., BATCH_SIZE)) iterator = dataset.make_one_shot_iterator() features = iterator.get_next()
  • 20. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.apply( tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS)) dataset = dataset.apply( tf.contrib.data.map_and_batch(lambda x: ..., BATCH_SIZE)) dataset = dataset.apply(tf.contrib.data.prefetch_to_device("/gpu:0")) iterator = dataset.make_one_shot_iterator() features = iterator.get_next()
  • 21. files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.apply( tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS)) dataset = dataset.apply( tf.contrib.data.map_and_batch(lambda x: ..., BATCH_SIZE)) dataset = dataset.apply(tf.contrib.data.prefetch_to_device("/gpu:0")) iterator = dataset.make_one_shot_iterator() features = iterator.get_next() = C = = ! ==: B = = ? / .
  • 24. § tf.SparseTensor § Dataset.from_generator() + Python § DatasetOpKernel
  • 26. § ( .8 S 1 8 E K § 8 . . C K § ( 8 C ).8 P ).8 !
  • 27. tf.enable_eager_execution() files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(lambda x: tf.parse_single_example(x, features)) dataset = dataset.batch(BATCH_SIZE) # Eager execution makes dataset a normal Python iterable. for batch in dataset: train_model(batch)
  • 28. § ( .8 S 1 8 E K § 8 . . C K § ( 8 C ).8 P ).8 !
  • 29. tf.enable_eager_execution() files = tf.data.Dataset.list_files(file_pattern) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(lambda x: tf.parse_single_example(x, features)) dataset = dataset.batch(BATCH_SIZE) for batch in dataset: train_model(batch)
  • 30. tf.enable_eager_execution() # Also implements best practices for high performance! # (See optional args for details.) dataset = tf.contrib.data.make_batched_features_dataset( file_pattern, BATCH_SIZE, features, num_epochs=NUM_EPOCHS) for batch in dataset: train_model(batch)
  • 31. tf.enable_eager_execution() # In a terminal, run the following commands, e.g.: # $ pip install kaggle # $ kaggle datasets download -d therohk/million-headlines -p . dataset = tf.contrib.data.make_csv_dataset( "*.csv", BATCH_SIZE, num_epochs=NUM_EPOCHS) for batch in dataset: train_model(batch["publish_date"], batch["headline_text"])
  • 32. § ( .8 S 1 8 E K § 8 . . C K § ( 8 C ).8 P ).8 !
  • 33. dataset = tf.contrib.data.make_csv_dataset( "*.csv", BATCH_SIZE, num_epochs=NUM_EPOCHS) for batch in dataset: train_model(batch)
  • 34. # Wrap the dataset in an input function, and return it directly. def input_fn(): dataset = tf.contrib.data.make_csv_dataset( "*.csv", BATCH_SIZE, num_epochs=NUM_EPOCHS) return dataset # Train an Estimator on the dataset. tf.estimator.Estimator(model_fn=train_model).train(input_fn=input_fn)
  • 35. § G
  • 37. § . // . / / § / . / / . / .
  • 39. . 2 ) , 1. 3 2 3 3 . ( 1 1
  • 40. def input_fn(batch_size): files = tf.data.Dataset.list_files(FLAGS.data_dir) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(2048) # Sliding window of 2048 records dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(parser_fn).batch(batch_size) return dataset 0- E 4. 5,231 03 ( )
  • 42. saeta@saeta:~$ capture_tpu_profile --tpu_name=saeta --logdir=myprofile/ --duration_ms=10000 Welcome to the Cloud TPU Profiler v1.5.1 Starting to profile TPU traces for 10000 ms. Remaining attempt(s): 3 Limiting the number of trace events to 1000000 2018-03-21 01:13:12.350004: I tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc:155] Converting trace events to TraceViewer JSON. 2018-03-21 01:13:12.392162: I tensorflow/contrib/tpu/profiler/dump_tpu_profile.cc:69] Dumped raw-proto trace data to profiles/5/plugins/profile/2018-03-21_01:13:12/tr ace Trace contains 998114 events. Dumped JSON trace data to myprofile/plugins/profile/2018-03-21_01:13:12/trace.json.gz Dumped json op profile data to myprofile/plugins/profile/2018-03-21_01:13:12/op_profile.json Dumped tool data for input_pipeline.json to myprofile/plugins/profile/2018-03-21_01:13:12/input_pipeline.json Dumped tool data for overview_page.json to myprofile/plugins/profile/2018-03-21_01:13:12/overview_page.json NOTE: using the trace duration 10000ms. Set an appropriate duration (with --duration_ms) if you don't see a full step in your trace or the captured trace is too large. saeta@saeta:~$ tensorboard --logdir=myprofile/ TensorBoard 1.6.0 at <redacted> (Press CTRL+C to quit) /: // # / : # : /: : -/ /: // - .# /#- . - - . - :/ : -/
  • 43. def input_fn(batch_size): files = tf.data.Dataset.list_files(FLAGS.data_dir) dataset = tf.data.TFRecordDataset(files) dataset = dataset.shuffle(2048) # Sliding window of 2048 records dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(parser_fn, num_parallel_calls=64) dataset = dataset.batch(batch_size) return dataset - 0 0 !
  • 50. 0 0 0 % 5 7 % 5 5 % 0 5 0 0 5 7 7 % 7 0 1 31 C 4 6 2 2 2
  • 51. def input_fn(batch_size): files = tf.data.Dataset.list_files(FLAGS.data_dir) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(parser_fn, num_parallel_calls=64) dataset = dataset.batch(batch_size) dataset = dataset.prefetch(2) return dataset . . T bi n D c i Fd_ e a lfR D
  • 53. def input_fn(batch_size): files = tf.data.Dataset.list_files(FLAGS.data_dir) dataset = tf.data.TFRecordDataset(files, num_parallel_reads=32) dataset = dataset.shuffle(10000) dataset = dataset.repeat(NUM_EPOCHS) dataset = dataset.map(parser_fn, num_parallel_calls=64) dataset = dataset.batch(batch_size) dataset = dataset.prefetch(2) return dataset !
  • 55. 6 6 53 4 6 53 53 6 53 6 6 53 4 4 4 4 1 7 2
  • 56. § - § ? F ) § § ( )
  • 57. def input_fn(batch_size): files = tf.data.Dataset.list_files(FLAGS.data_dir) def tfrecord_dataset(filename): buffer_size = 8 * 1024 * 1024 # 8 MiB per file return tf.data.TFRecordDataset(filename, buffer_size=buffer_size) dataset = files.apply(tf.contrib.data.parallel_interleave( tfrecord_dataset, cycle_length=32, sloppy=True)) dataset = dataset.apply(tf.contrib.data.shuffle_and_repeat(10000, NUM_EPOCHS)) dataset = dataset.apply(tf.contrib.data.map_and_batch(parser_fn, batch_size, num_parallel_batches=4)) dataset = dataset.prefetch(4) return dataset !
  • 59. ) ( ) +
  • 66. , ,
  • 70. # Download and use NASNet feature vector module. module = hub.Module( "https://tfhub.dev/google/imagenet/nasnet_large/feature_vector/1") features = module(my_images) logits = tf.layers.dense(features, NUM_CLASSES) probabilities = tf.nn.softmax(logits) + !
  • 71. # Download and use NASNet feature vector module. module = hub.Module( "https://tfhub.dev/google/imagenet/nasnet_large/feature_vector/1", trainable=True, tags={“train”}) features = module(my_images) logits = tf.layers.dense(features, NUM_CLASSES) probabilities = tf.nn.softmax(logits)
  • 72. § /,2/ A , 5 § 0/,2/ A § 5 / A 3) § - A 3( 3) 3 § - A 1 / A 3) § 1 / A 3( 3) &( (&( ) § 5 / A 3(
  • 75. “The shallots were simply underwhelming” POSITIVE NEGATIVE
  • 76. § L § M ( , § E 2 ,, 2 ) ) ,
  • 78. # Use pre-trained universal sentence encoder to build text vector column. review = hub.text_embedding_column( "review", "https://tfhub.dev/google/universal-sentence-encoder/1") features = { "review": np.array(["an arugula masterpiece", "inedible shoe leather", ...]) } labels = np.array([[1], [0], ...]) input_fn = tf.estimator.input.numpy_input_fn(features, labels, shuffle=True) estimator = tf.estimator.DNNClassifier(hidden_units, [review]) estimator.train(input_fn, max_steps=100)
  • 79. # Use pre-trained universal sentence encoder to build text vector column. review = hub.text_embedding_column( "review", "https://tfhub.dev/google/universal-sentence-encoder/1", trainable=True) features = { "review": np.array(["an arugula masterpiece", "inedible shoe leather", ...]) } labels = np.array([[1], [0], ...]) input_fn = tf.estimator.input.numpy_input_fn(features, labels, shuffle=True) estimator = tf.estimator.DNNClassifier(hidden_units, [review]) estimator.train(input_fn, max_steps=100)
  • 80. # Use pre-trained universal sentence encoder to build text vector column. review = hub.text_embedding_column( "review", "https://tfhub.dev/google/universal-sentence-encoder/1") features = { "review": np.array(["an arugula masterpiece", "inedible shoe leather", ...]) } labels = np.array([[1], [0], ...]) input_fn = tf.estimator.input.numpy_input_fn(features, labels, shuffle=True) estimator = tf.estimator.DNNClassifier(hidden_units, [review]) estimator.train(input_fn, max_steps=100)
  • 82. § ( § ( ), , ) , , ) § A D F E