SlideShare a Scribd company logo
1 of 2
Download to read offline
ATTN: yukio.saitoh@gmail.com                                         NVIDIA GT520 SDK MASTER LOG                                                              2011/08/04

                                                                  FXFROG (C2D E8200, RAM 8GB, Win7 x64)
deviceQuery                    CUDA Driver = CUDART               CUDA Driver Version = 4.0    CUDA Runtime Version = 4.0   NumDevs = 1     Device = GeForce GT 520
deviceQuery                    CUDA Driver = CUDART               CUDA Driver Version = 4.0    CUDA Runtime Version = 4.0   NumDevs = 1     Device = GeForce GT 520
MersenneTwister                Throughput = 0.2892 GNumbers/s     Time = 0.08299 s             Size = 24002560 Numbers      NumDevsUsed = 1 Workgroup = 128
quasirandomGenerator           Throughput = 0.3590 GNumbers/s     Time = 0.00876 s             Size = 3145728 Numbers       NumDevsUsed = 1 Workgroup = 384
quasirandomGenerator-inverse   Throughput = 0.7063 GNumbers/s     Time = 0.00445 s             Size = 3145728 Numbers       NumDevsUsed = 1 Workgroup = 128
radixSort                      Throughput = 29.1391 MElements/s   Time = 0.03599 s             Size = 1048576 elements
Reduction                      Throughput = 7.3134 GB/s           Time = 0.00918 s             Size = 16777216 Elements     NumDevsUsed = 1 Workgroup = 256
scan-Short                     Throughput = 0.1387 MElements/s    Time = 0.00738 s             Size = 1024 Elements         NumDevsUsed = 1 Workgroup = 256
scan-Large                     Throughput = 17.5481 MElements/s   Time = 0.01494 s             Size = 262144 Elements       NumDevsUsed = 1 Workgroup = 256
ATTN: yukio.saitoh@gmail.com                                                  NVIDIA GT440 SDK MASTER LOG                                                                                    2011/08/04

                                                                           FXFROG (C2D E8200, RAM 8GB, Win7 x64)

deviceQuery                                     CUDA Driver = CUDART                CUDA Driver Version = 4.0      CUDA Runtime Version = 4.0    NumDevs = 1           Device = GeForce GT 440
BlackScholes                                    Throughput = 1.6170 GOptions/s      Time = 0.00495 s               Size = 8000000 options        NumDevsUsed   =1      Workgroup = 128
convolutionSeparable                            Throughput = 278.8831 MPixels/sec   Time = 0.03384 s               Size = 9437184 Pixels         NumDevsUsed   =1      Workgroup = 0
deviceQuery                                     CUDA Driver = CUDART                CUDA Driver Version = 4.0      CUDA Runtime Version = 4.0    NumDevs = 1           Device = GeForce GT 440
dxtc                                            Throughput = 1.8818 MPixels/s       Time = 0.13930 s               Size = 262144 Pixels          NumDevsUsed   =1      Workgroup = 64
histogram64                                     Throughput = 3087.4561 MB/s         Time = 0.02174 s               Size = 67108864 Bytes         NumDevsUsed   =1      Workgroup = 64
histogram256                                    Throughput = 1720.8238 MB/s         Time = 0.03900 s               Size = 67108864 Bytes         NumDevsUsed   =1      Workgroup = 192
> CUBLAS       Throughput = 39.4083 GFlop/s     Time = 0.00333 s                    Size = 131072000 Ops

> CUDA matrixMul Throughput = 18.3094 GFlop/s   Time = 0.00716 s                    Size = 131072000 Ops           NumDevsUsed = 1               Workgroup = 1024
MersenneTwister                                 Throughput = 0.6063 GNumbers/s      Time = 0.03959 s               Size = 24002560 Numbers       NumDevsUsed = 1       Workgroup = 128
quasirandomGenerator                            Throughput = 0.0468 GNumbers/s      Time = 0.06721 s               Size = 3145728 Numbers        NumDevsUsed = 1       Workgroup = 384
quasirandomGenerator-inverse                    Throughput = 0.0998 GNumbers/s      Time = 0.03153 s               Size = 3145728 Numbers        NumDevsUsed = 1       Workgroup = 128
radixSort                                       Throughput = 5.1810 MElements/s     Time = 0.20239 s               Size = 1048576 elements
Reduction                                       Throughput = 22.4863 GB/s           Time = 0.00298 s               Size = 16777216 Elements      NumDevsUsed   =   1   Workgroup   =   256
scan-Short                                      Throughput = 0.2270 MElements/s     Time = 0.00451 s               Size = 1024 Elements          NumDevsUsed   =   1   Workgroup   =   256
scan-Large                                      Throughput = 40.6028 MElements/s    Time = 0.00646 s               Size = 262144 Elements        NumDevsUsed   =   1   Workgroup   =   256
sortingNetworks-bitonic                         Throughput = 11.6002 MElements/s    Time = 0.09039 s               Size = 1048576 elements       NumDevsUsed   =   1   Workgroup   =   512
transpose-Outer-simple copy                     Throughput = 8.8884 GB/s            Time = 0.21974 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Inner-simple copy                     Throughput = 36.6637 GB/s           Time = 0.05327 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Outer-shared memory copy              Throughput = 3.8134 GB/s            Time = 0.51218 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Inner-shared memory copy              Throughput = 13.6798 GB/s           Time = 0.14277 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Outer-naive                           Throughput = 3.2989 GB/s            Time = 0.59205 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Inner-naive                           Throughput = 6.8978 GB/s            Time = 0.28315 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Outer-coalesced                       Throughput = 4.6051 GB/s            Time = 0.42412 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Inner-coalesced                       Throughput = 13.1095 GB/s           Time = 0.14899 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Outer-optimized                       Throughput = 5.9365 GB/s            Time = 0.32900 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Inner-optimized                       Throughput = 18.6504 GB/s           Time = 0.10472 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Outer-coarse-grained                  Throughput = 7.5914 GB/s            Time = 0.25728 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Inner-coarse-grained                  Throughput = 18.4704 GB/s           Time = 0.10574 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Outer-fine-grained                    Throughput = 5.5713 GB/s            Time = 0.35057 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Inner-fine-grained                    Throughput = 18.2539 GB/s           Time = 0.10700 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Outer-diagonal                        Throughput = 4.2356 GB/s            Time = 0.46112 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256
transpose-Inner-diagonal                        Throughput = 19.9927 GB/s           Time = 0.09769 s               Size = 262144 fp32 elements   NumDevsUsed   =   1   Workgroup   =   256

More Related Content

What's hot

.NET Fest 2019. Николай Балакин. Микрооптимизации в мире .NET
.NET Fest 2019. Николай Балакин. Микрооптимизации в мире .NET.NET Fest 2019. Николай Балакин. Микрооптимизации в мире .NET
.NET Fest 2019. Николай Балакин. Микрооптимизации в мире .NETNETFest
 
RAPIDS: ускоряем Pandas и scikit-learn на GPU Павел Клеменков, NVidia
RAPIDS: ускоряем Pandas и scikit-learn на GPU  Павел Клеменков, NVidiaRAPIDS: ускоряем Pandas и scikit-learn на GPU  Павел Клеменков, NVidia
RAPIDS: ускоряем Pandas и scikit-learn на GPU Павел Клеменков, NVidiaMail.ru Group
 
Jvm tuning for low latency application & Cassandra
Jvm tuning for low latency application & CassandraJvm tuning for low latency application & Cassandra
Jvm tuning for low latency application & CassandraQuentin Ambard
 
Empowering Congress with Data-Driven Analytics (BDT304) | AWS re:Invent 2013
Empowering Congress with Data-Driven Analytics (BDT304) | AWS re:Invent 2013Empowering Congress with Data-Driven Analytics (BDT304) | AWS re:Invent 2013
Empowering Congress with Data-Driven Analytics (BDT304) | AWS re:Invent 2013Amazon Web Services
 
Amazon Web Services - An Overview
Amazon Web Services - An OverviewAmazon Web Services - An Overview
Amazon Web Services - An Overviewchregu
 
Adventures in RDS Load Testing
Adventures in RDS Load TestingAdventures in RDS Load Testing
Adventures in RDS Load TestingMike Harnish
 
Gc crash course (1)
Gc crash course (1)Gc crash course (1)
Gc crash course (1)Tier1 app
 
Openstack grizzley puppet_talk
Openstack grizzley puppet_talkOpenstack grizzley puppet_talk
Openstack grizzley puppet_talkbodepd
 
AWS RDS Benchmark - CMG Brasil 2012
AWS RDS Benchmark - CMG Brasil 2012AWS RDS Benchmark - CMG Brasil 2012
AWS RDS Benchmark - CMG Brasil 2012Rodrigo Campos
 
Openstack presentation
Openstack presentationOpenstack presentation
Openstack presentationbodepd
 
Dive into Catalyst
Dive into CatalystDive into Catalyst
Dive into CatalystCheng Lian
 
Adaptive Linear Solvers and Eigensolvers
Adaptive Linear Solvers and EigensolversAdaptive Linear Solvers and Eigensolvers
Adaptive Linear Solvers and Eigensolversinside-BigData.com
 
How to Stop Worrying and Start Caching in Java
How to Stop Worrying and Start Caching in JavaHow to Stop Worrying and Start Caching in Java
How to Stop Worrying and Start Caching in Javasrisatish ambati
 
Cassandra Community Webinar | Become a Super Modeler
Cassandra Community Webinar | Become a Super ModelerCassandra Community Webinar | Become a Super Modeler
Cassandra Community Webinar | Become a Super ModelerDataStax
 
Fortuna 2012 physical_mashup_artificial_intelligence
Fortuna 2012 physical_mashup_artificial_intelligenceFortuna 2012 physical_mashup_artificial_intelligence
Fortuna 2012 physical_mashup_artificial_intelligencecarolninap
 
NoSQL @ CodeMash 2010
NoSQL @ CodeMash 2010NoSQL @ CodeMash 2010
NoSQL @ CodeMash 2010Ben Scofield
 
Am I reading GC logs Correctly?
Am I reading GC logs Correctly?Am I reading GC logs Correctly?
Am I reading GC logs Correctly?Tier1 App
 

What's hot (20)

.NET Fest 2019. Николай Балакин. Микрооптимизации в мире .NET
.NET Fest 2019. Николай Балакин. Микрооптимизации в мире .NET.NET Fest 2019. Николай Балакин. Микрооптимизации в мире .NET
.NET Fest 2019. Николай Балакин. Микрооптимизации в мире .NET
 
RAPIDS: ускоряем Pandas и scikit-learn на GPU Павел Клеменков, NVidia
RAPIDS: ускоряем Pandas и scikit-learn на GPU  Павел Клеменков, NVidiaRAPIDS: ускоряем Pandas и scikit-learn на GPU  Павел Клеменков, NVidia
RAPIDS: ускоряем Pandas и scikit-learn на GPU Павел Клеменков, NVidia
 
Jvm tuning for low latency application & Cassandra
Jvm tuning for low latency application & CassandraJvm tuning for low latency application & Cassandra
Jvm tuning for low latency application & Cassandra
 
Empowering Congress with Data-Driven Analytics (BDT304) | AWS re:Invent 2013
Empowering Congress with Data-Driven Analytics (BDT304) | AWS re:Invent 2013Empowering Congress with Data-Driven Analytics (BDT304) | AWS re:Invent 2013
Empowering Congress with Data-Driven Analytics (BDT304) | AWS re:Invent 2013
 
Amazon Web Services - An Overview
Amazon Web Services - An OverviewAmazon Web Services - An Overview
Amazon Web Services - An Overview
 
Adventures in RDS Load Testing
Adventures in RDS Load TestingAdventures in RDS Load Testing
Adventures in RDS Load Testing
 
Gc crash course (1)
Gc crash course (1)Gc crash course (1)
Gc crash course (1)
 
Openstack grizzley puppet_talk
Openstack grizzley puppet_talkOpenstack grizzley puppet_talk
Openstack grizzley puppet_talk
 
AWS RDS Benchmark - CMG Brasil 2012
AWS RDS Benchmark - CMG Brasil 2012AWS RDS Benchmark - CMG Brasil 2012
AWS RDS Benchmark - CMG Brasil 2012
 
Dynomite Nosql
Dynomite NosqlDynomite Nosql
Dynomite Nosql
 
Openstack presentation
Openstack presentationOpenstack presentation
Openstack presentation
 
Dive into Catalyst
Dive into CatalystDive into Catalyst
Dive into Catalyst
 
Adaptive Linear Solvers and Eigensolvers
Adaptive Linear Solvers and EigensolversAdaptive Linear Solvers and Eigensolvers
Adaptive Linear Solvers and Eigensolvers
 
How to Stop Worrying and Start Caching in Java
How to Stop Worrying and Start Caching in JavaHow to Stop Worrying and Start Caching in Java
How to Stop Worrying and Start Caching in Java
 
Presentation
PresentationPresentation
Presentation
 
Apache Spark Workshop
Apache Spark WorkshopApache Spark Workshop
Apache Spark Workshop
 
Cassandra Community Webinar | Become a Super Modeler
Cassandra Community Webinar | Become a Super ModelerCassandra Community Webinar | Become a Super Modeler
Cassandra Community Webinar | Become a Super Modeler
 
Fortuna 2012 physical_mashup_artificial_intelligence
Fortuna 2012 physical_mashup_artificial_intelligenceFortuna 2012 physical_mashup_artificial_intelligence
Fortuna 2012 physical_mashup_artificial_intelligence
 
NoSQL @ CodeMash 2010
NoSQL @ CodeMash 2010NoSQL @ CodeMash 2010
NoSQL @ CodeMash 2010
 
Am I reading GC logs Correctly?
Am I reading GC logs Correctly?Am I reading GC logs Correctly?
Am I reading GC logs Correctly?
 

Similar to NVIDIA GT520, GT440 SDK MasterLog

Pick diamonds from garbage
Pick diamonds from garbagePick diamonds from garbage
Pick diamonds from garbageTier1 App
 
GC Tuning & Troubleshooting Crash Course
GC Tuning & Troubleshooting Crash CourseGC Tuning & Troubleshooting Crash Course
GC Tuning & Troubleshooting Crash CourseTier1 app
 
Nvidia® cuda™ 5.0 Sample Evaluation Result Part 1
Nvidia® cuda™ 5.0 Sample Evaluation Result Part 1Nvidia® cuda™ 5.0 Sample Evaluation Result Part 1
Nvidia® cuda™ 5.0 Sample Evaluation Result Part 1Yukio Saito
 
"Metrics: Where and How", Vsevolod Polyakov
"Metrics: Where and How", Vsevolod Polyakov"Metrics: Where and How", Vsevolod Polyakov
"Metrics: Where and How", Vsevolod PolyakovYulia Shcherbachova
 
Databases Have Forgotten About Single Node Performance, A Wrongheaded Trade Off
Databases Have Forgotten About Single Node Performance, A Wrongheaded Trade OffDatabases Have Forgotten About Single Node Performance, A Wrongheaded Trade Off
Databases Have Forgotten About Single Node Performance, A Wrongheaded Trade OffTimescale
 
Tips & Tricks On Architecting Windows Azure For Costs
Tips & Tricks On Architecting Windows Azure For CostsTips & Tricks On Architecting Windows Azure For Costs
Tips & Tricks On Architecting Windows Azure For CostsNuno Godinho
 
Всеволод Поляков (DevOps Team Lead в Grammarly)
Всеволод Поляков (DevOps Team Lead в Grammarly)Всеволод Поляков (DevOps Team Lead в Grammarly)
Всеволод Поляков (DevOps Team Lead в Grammarly)Provectus
 
Become a Java GC Hero - ConFoo Conference
Become a Java GC Hero - ConFoo ConferenceBecome a Java GC Hero - ConFoo Conference
Become a Java GC Hero - ConFoo ConferenceTier1app
 
DUG'20: 12 - DAOS in Lenovo’s HPC Innovation Center
DUG'20: 12 - DAOS in Lenovo’s HPC Innovation CenterDUG'20: 12 - DAOS in Lenovo’s HPC Innovation Center
DUG'20: 12 - DAOS in Lenovo’s HPC Innovation CenterAndrey Kudryavtsev
 
CUDA and Caffe for deep learning
CUDA and Caffe for deep learningCUDA and Caffe for deep learning
CUDA and Caffe for deep learningAmgad Muhammad
 
Shillings in Serverless
Shillings in ServerlessShillings in Serverless
Shillings in ServerlessSheenBrisals
 
Gc and-pagescan-attacks-by-linux
Gc and-pagescan-attacks-by-linuxGc and-pagescan-attacks-by-linux
Gc and-pagescan-attacks-by-linuxCuong Tran
 
Exadata and OLTP
Exadata and OLTPExadata and OLTP
Exadata and OLTPEnkitec
 
Tez Shuffle Handler: Shuffling at Scale with Apache Hadoop
Tez Shuffle Handler: Shuffling at Scale with Apache HadoopTez Shuffle Handler: Shuffling at Scale with Apache Hadoop
Tez Shuffle Handler: Shuffling at Scale with Apache HadoopDataWorks Summit
 
Debugging linux issues with eBPF
Debugging linux issues with eBPFDebugging linux issues with eBPF
Debugging linux issues with eBPFIvan Babrou
 
Мониторинг. Опять, rootconf 2016
Мониторинг. Опять, rootconf 2016Мониторинг. Опять, rootconf 2016
Мониторинг. Опять, rootconf 2016Vsevolod Polyakov
 
ClusterPresentation
ClusterPresentationClusterPresentation
ClusterPresentationWill Dixon
 
Scaling sql server 2014 parallel insert
Scaling sql server 2014 parallel insertScaling sql server 2014 parallel insert
Scaling sql server 2014 parallel insertChris Adkin
 

Similar to NVIDIA GT520, GT440 SDK MasterLog (20)

Pick diamonds from garbage
Pick diamonds from garbagePick diamonds from garbage
Pick diamonds from garbage
 
GC Tuning & Troubleshooting Crash Course
GC Tuning & Troubleshooting Crash CourseGC Tuning & Troubleshooting Crash Course
GC Tuning & Troubleshooting Crash Course
 
Moving to G1GC
Moving to G1GCMoving to G1GC
Moving to G1GC
 
Nvidia® cuda™ 5.0 Sample Evaluation Result Part 1
Nvidia® cuda™ 5.0 Sample Evaluation Result Part 1Nvidia® cuda™ 5.0 Sample Evaluation Result Part 1
Nvidia® cuda™ 5.0 Sample Evaluation Result Part 1
 
"Metrics: Where and How", Vsevolod Polyakov
"Metrics: Where and How", Vsevolod Polyakov"Metrics: Where and How", Vsevolod Polyakov
"Metrics: Where and How", Vsevolod Polyakov
 
Databases Have Forgotten About Single Node Performance, A Wrongheaded Trade Off
Databases Have Forgotten About Single Node Performance, A Wrongheaded Trade OffDatabases Have Forgotten About Single Node Performance, A Wrongheaded Trade Off
Databases Have Forgotten About Single Node Performance, A Wrongheaded Trade Off
 
Tips & Tricks On Architecting Windows Azure For Costs
Tips & Tricks On Architecting Windows Azure For CostsTips & Tricks On Architecting Windows Azure For Costs
Tips & Tricks On Architecting Windows Azure For Costs
 
Всеволод Поляков (DevOps Team Lead в Grammarly)
Всеволод Поляков (DevOps Team Lead в Grammarly)Всеволод Поляков (DevOps Team Lead в Grammarly)
Всеволод Поляков (DevOps Team Lead в Grammarly)
 
Metrics: where and how
Metrics: where and howMetrics: where and how
Metrics: where and how
 
Become a Java GC Hero - ConFoo Conference
Become a Java GC Hero - ConFoo ConferenceBecome a Java GC Hero - ConFoo Conference
Become a Java GC Hero - ConFoo Conference
 
DUG'20: 12 - DAOS in Lenovo’s HPC Innovation Center
DUG'20: 12 - DAOS in Lenovo’s HPC Innovation CenterDUG'20: 12 - DAOS in Lenovo’s HPC Innovation Center
DUG'20: 12 - DAOS in Lenovo’s HPC Innovation Center
 
CUDA and Caffe for deep learning
CUDA and Caffe for deep learningCUDA and Caffe for deep learning
CUDA and Caffe for deep learning
 
Shillings in Serverless
Shillings in ServerlessShillings in Serverless
Shillings in Serverless
 
Gc and-pagescan-attacks-by-linux
Gc and-pagescan-attacks-by-linuxGc and-pagescan-attacks-by-linux
Gc and-pagescan-attacks-by-linux
 
Exadata and OLTP
Exadata and OLTPExadata and OLTP
Exadata and OLTP
 
Tez Shuffle Handler: Shuffling at Scale with Apache Hadoop
Tez Shuffle Handler: Shuffling at Scale with Apache HadoopTez Shuffle Handler: Shuffling at Scale with Apache Hadoop
Tez Shuffle Handler: Shuffling at Scale with Apache Hadoop
 
Debugging linux issues with eBPF
Debugging linux issues with eBPFDebugging linux issues with eBPF
Debugging linux issues with eBPF
 
Мониторинг. Опять, rootconf 2016
Мониторинг. Опять, rootconf 2016Мониторинг. Опять, rootconf 2016
Мониторинг. Опять, rootconf 2016
 
ClusterPresentation
ClusterPresentationClusterPresentation
ClusterPresentation
 
Scaling sql server 2014 parallel insert
Scaling sql server 2014 parallel insertScaling sql server 2014 parallel insert
Scaling sql server 2014 parallel insert
 

More from Yukio Saito

東京2020ボランティア参加メモ(簡易)
東京2020ボランティア参加メモ(簡易)東京2020ボランティア参加メモ(簡易)
東京2020ボランティア参加メモ(簡易)Yukio Saito
 
Exam prep microsoft_ai900_japanese_210428
Exam prep microsoft_ai900_japanese_210428Exam prep microsoft_ai900_japanese_210428
Exam prep microsoft_ai900_japanese_210428Yukio Saito
 
Simple know how to creating agenda notes and daily reports
Simple know how to creating agenda notes and daily reportsSimple know how to creating agenda notes and daily reports
Simple know how to creating agenda notes and daily reportsYukio Saito
 
Aws 転送時間計測(手順付き参考例)
Aws 転送時間計測(手順付き参考例)Aws 転送時間計測(手順付き参考例)
Aws 転送時間計測(手順付き参考例)Yukio Saito
 
異業種から福祉業界ジョブチェンジして10か月後
異業種から福祉業界ジョブチェンジして10か月後異業種から福祉業界ジョブチェンジして10か月後
異業種から福祉業界ジョブチェンジして10か月後Yukio Saito
 
異業種から福祉介護ジョブチェンジ検討
異業種から福祉介護ジョブチェンジ検討異業種から福祉介護ジョブチェンジ検討
異業種から福祉介護ジョブチェンジ検討Yukio Saito
 
オンデマンド学習スタイル例 NFU
オンデマンド学習スタイル例 NFUオンデマンド学習スタイル例 NFU
オンデマンド学習スタイル例 NFUYukio Saito
 
Engadget電子工作部 健康ガジェットを作ろう ドS!コーチ発表最終版
Engadget電子工作部 健康ガジェットを作ろう ドS!コーチ発表最終版Engadget電子工作部 健康ガジェットを作ろう ドS!コーチ発表最終版
Engadget電子工作部 健康ガジェットを作ろう ドS!コーチ発表最終版Yukio Saito
 
Tobii eye x controller で遊ぶ
Tobii eye x controller で遊ぶTobii eye x controller で遊ぶ
Tobii eye x controller で遊ぶYukio Saito
 
斉藤之雄 が 公立大学 産業技術大学院大学 で獲得したこと。
斉藤之雄 が 公立大学 産業技術大学院大学 で獲得したこと。斉藤之雄 が 公立大学 産業技術大学院大学 で獲得したこと。
斉藤之雄 が 公立大学 産業技術大学院大学 で獲得したこと。Yukio Saito
 
Microsoft windows phone_激安購入方法
Microsoft windows phone_激安購入方法Microsoft windows phone_激安購入方法
Microsoft windows phone_激安購入方法Yukio Saito
 
PBLでは先行学習は大事だぜ、シラバスは参考程度で主体的に楽しもうぜ
PBLでは先行学習は大事だぜ、シラバスは参考程度で主体的に楽しもうぜPBLでは先行学習は大事だぜ、シラバスは参考程度で主体的に楽しもうぜ
PBLでは先行学習は大事だぜ、シラバスは参考程度で主体的に楽しもうぜYukio Saito
 
CentOS7をインストールして遊ぶのだ
CentOS7をインストールして遊ぶのだCentOS7をインストールして遊ぶのだ
CentOS7をインストールして遊ぶのだYukio Saito
 
Androidエミュレータをちょっと速くするintel haxm(ハッサム)
Androidエミュレータをちょっと速くするintel haxm(ハッサム)Androidエミュレータをちょっと速くするintel haxm(ハッサム)
Androidエミュレータをちょっと速くするintel haxm(ハッサム)Yukio Saito
 
Winodws7のruby2でrails4を遊ぶ環境を作るのだ。
Winodws7のruby2でrails4を遊ぶ環境を作るのだ。Winodws7のruby2でrails4を遊ぶ環境を作るのだ。
Winodws7のruby2でrails4を遊ぶ環境を作るのだ。Yukio Saito
 
Astah plugin 実行方法とSysML要求図のサンプル
Astah plugin 実行方法とSysML要求図のサンプルAstah plugin 実行方法とSysML要求図のサンプル
Astah plugin 実行方法とSysML要求図のサンプルYukio Saito
 
Windows8でOpenCVを使ったAndroid(MOVERIO)開発体験したい
Windows8でOpenCVを使ったAndroid(MOVERIO)開発体験したいWindows8でOpenCVを使ったAndroid(MOVERIO)開発体験したい
Windows8でOpenCVを使ったAndroid(MOVERIO)開発体験したいYukio Saito
 
NTTcom cloud n にサービス追加の適当な手順
NTTcom cloud n にサービス追加の適当な手順NTTcom cloud n にサービス追加の適当な手順
NTTcom cloud n にサービス追加の適当な手順Yukio Saito
 
Intel xdk導入とhtml5サンプルビルド手順書
Intel xdk導入とhtml5サンプルビルド手順書Intel xdk導入とhtml5サンプルビルド手順書
Intel xdk導入とhtml5サンプルビルド手順書Yukio Saito
 
圏央道ウォーキング日記
圏央道ウォーキング日記圏央道ウォーキング日記
圏央道ウォーキング日記Yukio Saito
 

More from Yukio Saito (20)

東京2020ボランティア参加メモ(簡易)
東京2020ボランティア参加メモ(簡易)東京2020ボランティア参加メモ(簡易)
東京2020ボランティア参加メモ(簡易)
 
Exam prep microsoft_ai900_japanese_210428
Exam prep microsoft_ai900_japanese_210428Exam prep microsoft_ai900_japanese_210428
Exam prep microsoft_ai900_japanese_210428
 
Simple know how to creating agenda notes and daily reports
Simple know how to creating agenda notes and daily reportsSimple know how to creating agenda notes and daily reports
Simple know how to creating agenda notes and daily reports
 
Aws 転送時間計測(手順付き参考例)
Aws 転送時間計測(手順付き参考例)Aws 転送時間計測(手順付き参考例)
Aws 転送時間計測(手順付き参考例)
 
異業種から福祉業界ジョブチェンジして10か月後
異業種から福祉業界ジョブチェンジして10か月後異業種から福祉業界ジョブチェンジして10か月後
異業種から福祉業界ジョブチェンジして10か月後
 
異業種から福祉介護ジョブチェンジ検討
異業種から福祉介護ジョブチェンジ検討異業種から福祉介護ジョブチェンジ検討
異業種から福祉介護ジョブチェンジ検討
 
オンデマンド学習スタイル例 NFU
オンデマンド学習スタイル例 NFUオンデマンド学習スタイル例 NFU
オンデマンド学習スタイル例 NFU
 
Engadget電子工作部 健康ガジェットを作ろう ドS!コーチ発表最終版
Engadget電子工作部 健康ガジェットを作ろう ドS!コーチ発表最終版Engadget電子工作部 健康ガジェットを作ろう ドS!コーチ発表最終版
Engadget電子工作部 健康ガジェットを作ろう ドS!コーチ発表最終版
 
Tobii eye x controller で遊ぶ
Tobii eye x controller で遊ぶTobii eye x controller で遊ぶ
Tobii eye x controller で遊ぶ
 
斉藤之雄 が 公立大学 産業技術大学院大学 で獲得したこと。
斉藤之雄 が 公立大学 産業技術大学院大学 で獲得したこと。斉藤之雄 が 公立大学 産業技術大学院大学 で獲得したこと。
斉藤之雄 が 公立大学 産業技術大学院大学 で獲得したこと。
 
Microsoft windows phone_激安購入方法
Microsoft windows phone_激安購入方法Microsoft windows phone_激安購入方法
Microsoft windows phone_激安購入方法
 
PBLでは先行学習は大事だぜ、シラバスは参考程度で主体的に楽しもうぜ
PBLでは先行学習は大事だぜ、シラバスは参考程度で主体的に楽しもうぜPBLでは先行学習は大事だぜ、シラバスは参考程度で主体的に楽しもうぜ
PBLでは先行学習は大事だぜ、シラバスは参考程度で主体的に楽しもうぜ
 
CentOS7をインストールして遊ぶのだ
CentOS7をインストールして遊ぶのだCentOS7をインストールして遊ぶのだ
CentOS7をインストールして遊ぶのだ
 
Androidエミュレータをちょっと速くするintel haxm(ハッサム)
Androidエミュレータをちょっと速くするintel haxm(ハッサム)Androidエミュレータをちょっと速くするintel haxm(ハッサム)
Androidエミュレータをちょっと速くするintel haxm(ハッサム)
 
Winodws7のruby2でrails4を遊ぶ環境を作るのだ。
Winodws7のruby2でrails4を遊ぶ環境を作るのだ。Winodws7のruby2でrails4を遊ぶ環境を作るのだ。
Winodws7のruby2でrails4を遊ぶ環境を作るのだ。
 
Astah plugin 実行方法とSysML要求図のサンプル
Astah plugin 実行方法とSysML要求図のサンプルAstah plugin 実行方法とSysML要求図のサンプル
Astah plugin 実行方法とSysML要求図のサンプル
 
Windows8でOpenCVを使ったAndroid(MOVERIO)開発体験したい
Windows8でOpenCVを使ったAndroid(MOVERIO)開発体験したいWindows8でOpenCVを使ったAndroid(MOVERIO)開発体験したい
Windows8でOpenCVを使ったAndroid(MOVERIO)開発体験したい
 
NTTcom cloud n にサービス追加の適当な手順
NTTcom cloud n にサービス追加の適当な手順NTTcom cloud n にサービス追加の適当な手順
NTTcom cloud n にサービス追加の適当な手順
 
Intel xdk導入とhtml5サンプルビルド手順書
Intel xdk導入とhtml5サンプルビルド手順書Intel xdk導入とhtml5サンプルビルド手順書
Intel xdk導入とhtml5サンプルビルド手順書
 
圏央道ウォーキング日記
圏央道ウォーキング日記圏央道ウォーキング日記
圏央道ウォーキング日記
 

Recently uploaded

Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Commit University
 
Pigging Solutions Piggable Sweeping Elbows
Pigging Solutions Piggable Sweeping ElbowsPigging Solutions Piggable Sweeping Elbows
Pigging Solutions Piggable Sweeping ElbowsPigging Solutions
 
Key Features Of Token Development (1).pptx
Key  Features Of Token  Development (1).pptxKey  Features Of Token  Development (1).pptx
Key Features Of Token Development (1).pptxLBM Solutions
 
Science&tech:THE INFORMATION AGE STS.pdf
Science&tech:THE INFORMATION AGE STS.pdfScience&tech:THE INFORMATION AGE STS.pdf
Science&tech:THE INFORMATION AGE STS.pdfjimielynbastida
 
Bluetooth Controlled Car with Arduino.pdf
Bluetooth Controlled Car with Arduino.pdfBluetooth Controlled Car with Arduino.pdf
Bluetooth Controlled Car with Arduino.pdfngoud9212
 
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...Fwdays
 
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsMiki Katsuragi
 
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Patryk Bandurski
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsMark Billinghurst
 
Unblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen FramesUnblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen FramesSinan KOZAK
 
Scanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL CertsScanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL CertsRizwan Syed
 
Bun (KitWorks Team Study 노별마루 발표 2024.4.22)
Bun (KitWorks Team Study 노별마루 발표 2024.4.22)Bun (KitWorks Team Study 노별마루 발표 2024.4.22)
Bun (KitWorks Team Study 노별마루 발표 2024.4.22)Wonjun Hwang
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfAddepto
 
Pigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions
 
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 3652toLead Limited
 
Install Stable Diffusion in windows machine
Install Stable Diffusion in windows machineInstall Stable Diffusion in windows machine
Install Stable Diffusion in windows machinePadma Pradeep
 
costume and set research powerpoint presentation
costume and set research powerpoint presentationcostume and set research powerpoint presentation
costume and set research powerpoint presentationphoebematthew05
 
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfAlex Barbosa Coqueiro
 
Understanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitectureUnderstanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitecturePixlogix Infotech
 

Recently uploaded (20)

Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!Nell’iperspazio con Rocket: il Framework Web di Rust!
Nell’iperspazio con Rocket: il Framework Web di Rust!
 
Hot Sexy call girls in Panjabi Bagh 🔝 9953056974 🔝 Delhi escort Service
Hot Sexy call girls in Panjabi Bagh 🔝 9953056974 🔝 Delhi escort ServiceHot Sexy call girls in Panjabi Bagh 🔝 9953056974 🔝 Delhi escort Service
Hot Sexy call girls in Panjabi Bagh 🔝 9953056974 🔝 Delhi escort Service
 
Pigging Solutions Piggable Sweeping Elbows
Pigging Solutions Piggable Sweeping ElbowsPigging Solutions Piggable Sweeping Elbows
Pigging Solutions Piggable Sweeping Elbows
 
Key Features Of Token Development (1).pptx
Key  Features Of Token  Development (1).pptxKey  Features Of Token  Development (1).pptx
Key Features Of Token Development (1).pptx
 
Science&tech:THE INFORMATION AGE STS.pdf
Science&tech:THE INFORMATION AGE STS.pdfScience&tech:THE INFORMATION AGE STS.pdf
Science&tech:THE INFORMATION AGE STS.pdf
 
Bluetooth Controlled Car with Arduino.pdf
Bluetooth Controlled Car with Arduino.pdfBluetooth Controlled Car with Arduino.pdf
Bluetooth Controlled Car with Arduino.pdf
 
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks..."LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
"LLMs for Python Engineers: Advanced Data Analysis and Semantic Kernel",Oleks...
 
Vertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering TipsVertex AI Gemini Prompt Engineering Tips
Vertex AI Gemini Prompt Engineering Tips
 
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
Integration and Automation in Practice: CI/CD in Mule Integration and Automat...
 
Human Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR SystemsHuman Factors of XR: Using Human Factors to Design XR Systems
Human Factors of XR: Using Human Factors to Design XR Systems
 
Unblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen FramesUnblocking The Main Thread Solving ANRs and Frozen Frames
Unblocking The Main Thread Solving ANRs and Frozen Frames
 
Scanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL CertsScanning the Internet for External Cloud Exposures via SSL Certs
Scanning the Internet for External Cloud Exposures via SSL Certs
 
Bun (KitWorks Team Study 노별마루 발표 2024.4.22)
Bun (KitWorks Team Study 노별마루 발표 2024.4.22)Bun (KitWorks Team Study 노별마루 발표 2024.4.22)
Bun (KitWorks Team Study 노별마루 발표 2024.4.22)
 
Gen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdfGen AI in Business - Global Trends Report 2024.pdf
Gen AI in Business - Global Trends Report 2024.pdf
 
Pigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food ManufacturingPigging Solutions in Pet Food Manufacturing
Pigging Solutions in Pet Food Manufacturing
 
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
Tech-Forward - Achieving Business Readiness For Copilot in Microsoft 365
 
Install Stable Diffusion in windows machine
Install Stable Diffusion in windows machineInstall Stable Diffusion in windows machine
Install Stable Diffusion in windows machine
 
costume and set research powerpoint presentation
costume and set research powerpoint presentationcostume and set research powerpoint presentation
costume and set research powerpoint presentation
 
Unraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdfUnraveling Multimodality with Large Language Models.pdf
Unraveling Multimodality with Large Language Models.pdf
 
Understanding the Laravel MVC Architecture
Understanding the Laravel MVC ArchitectureUnderstanding the Laravel MVC Architecture
Understanding the Laravel MVC Architecture
 

NVIDIA GT520, GT440 SDK MasterLog

  • 1. ATTN: yukio.saitoh@gmail.com NVIDIA GT520 SDK MASTER LOG 2011/08/04 FXFROG (C2D E8200, RAM 8GB, Win7 x64) deviceQuery CUDA Driver = CUDART CUDA Driver Version = 4.0 CUDA Runtime Version = 4.0 NumDevs = 1 Device = GeForce GT 520 deviceQuery CUDA Driver = CUDART CUDA Driver Version = 4.0 CUDA Runtime Version = 4.0 NumDevs = 1 Device = GeForce GT 520 MersenneTwister Throughput = 0.2892 GNumbers/s Time = 0.08299 s Size = 24002560 Numbers NumDevsUsed = 1 Workgroup = 128 quasirandomGenerator Throughput = 0.3590 GNumbers/s Time = 0.00876 s Size = 3145728 Numbers NumDevsUsed = 1 Workgroup = 384 quasirandomGenerator-inverse Throughput = 0.7063 GNumbers/s Time = 0.00445 s Size = 3145728 Numbers NumDevsUsed = 1 Workgroup = 128 radixSort Throughput = 29.1391 MElements/s Time = 0.03599 s Size = 1048576 elements Reduction Throughput = 7.3134 GB/s Time = 0.00918 s Size = 16777216 Elements NumDevsUsed = 1 Workgroup = 256 scan-Short Throughput = 0.1387 MElements/s Time = 0.00738 s Size = 1024 Elements NumDevsUsed = 1 Workgroup = 256 scan-Large Throughput = 17.5481 MElements/s Time = 0.01494 s Size = 262144 Elements NumDevsUsed = 1 Workgroup = 256
  • 2. ATTN: yukio.saitoh@gmail.com NVIDIA GT440 SDK MASTER LOG 2011/08/04 FXFROG (C2D E8200, RAM 8GB, Win7 x64) deviceQuery CUDA Driver = CUDART CUDA Driver Version = 4.0 CUDA Runtime Version = 4.0 NumDevs = 1 Device = GeForce GT 440 BlackScholes Throughput = 1.6170 GOptions/s Time = 0.00495 s Size = 8000000 options NumDevsUsed =1 Workgroup = 128 convolutionSeparable Throughput = 278.8831 MPixels/sec Time = 0.03384 s Size = 9437184 Pixels NumDevsUsed =1 Workgroup = 0 deviceQuery CUDA Driver = CUDART CUDA Driver Version = 4.0 CUDA Runtime Version = 4.0 NumDevs = 1 Device = GeForce GT 440 dxtc Throughput = 1.8818 MPixels/s Time = 0.13930 s Size = 262144 Pixels NumDevsUsed =1 Workgroup = 64 histogram64 Throughput = 3087.4561 MB/s Time = 0.02174 s Size = 67108864 Bytes NumDevsUsed =1 Workgroup = 64 histogram256 Throughput = 1720.8238 MB/s Time = 0.03900 s Size = 67108864 Bytes NumDevsUsed =1 Workgroup = 192 > CUBLAS Throughput = 39.4083 GFlop/s Time = 0.00333 s Size = 131072000 Ops > CUDA matrixMul Throughput = 18.3094 GFlop/s Time = 0.00716 s Size = 131072000 Ops NumDevsUsed = 1 Workgroup = 1024 MersenneTwister Throughput = 0.6063 GNumbers/s Time = 0.03959 s Size = 24002560 Numbers NumDevsUsed = 1 Workgroup = 128 quasirandomGenerator Throughput = 0.0468 GNumbers/s Time = 0.06721 s Size = 3145728 Numbers NumDevsUsed = 1 Workgroup = 384 quasirandomGenerator-inverse Throughput = 0.0998 GNumbers/s Time = 0.03153 s Size = 3145728 Numbers NumDevsUsed = 1 Workgroup = 128 radixSort Throughput = 5.1810 MElements/s Time = 0.20239 s Size = 1048576 elements Reduction Throughput = 22.4863 GB/s Time = 0.00298 s Size = 16777216 Elements NumDevsUsed = 1 Workgroup = 256 scan-Short Throughput = 0.2270 MElements/s Time = 0.00451 s Size = 1024 Elements NumDevsUsed = 1 Workgroup = 256 scan-Large Throughput = 40.6028 MElements/s Time = 0.00646 s Size = 262144 Elements NumDevsUsed = 1 Workgroup = 256 sortingNetworks-bitonic Throughput = 11.6002 MElements/s Time = 0.09039 s Size = 1048576 elements NumDevsUsed = 1 Workgroup = 512 transpose-Outer-simple copy Throughput = 8.8884 GB/s Time = 0.21974 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Inner-simple copy Throughput = 36.6637 GB/s Time = 0.05327 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Outer-shared memory copy Throughput = 3.8134 GB/s Time = 0.51218 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Inner-shared memory copy Throughput = 13.6798 GB/s Time = 0.14277 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Outer-naive Throughput = 3.2989 GB/s Time = 0.59205 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Inner-naive Throughput = 6.8978 GB/s Time = 0.28315 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Outer-coalesced Throughput = 4.6051 GB/s Time = 0.42412 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Inner-coalesced Throughput = 13.1095 GB/s Time = 0.14899 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Outer-optimized Throughput = 5.9365 GB/s Time = 0.32900 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Inner-optimized Throughput = 18.6504 GB/s Time = 0.10472 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Outer-coarse-grained Throughput = 7.5914 GB/s Time = 0.25728 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Inner-coarse-grained Throughput = 18.4704 GB/s Time = 0.10574 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Outer-fine-grained Throughput = 5.5713 GB/s Time = 0.35057 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Inner-fine-grained Throughput = 18.2539 GB/s Time = 0.10700 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Outer-diagonal Throughput = 4.2356 GB/s Time = 0.46112 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256 transpose-Inner-diagonal Throughput = 19.9927 GB/s Time = 0.09769 s Size = 262144 fp32 elements NumDevsUsed = 1 Workgroup = 256