R.3.0.0 is released!!
2013年4月20日
第30回Tokyo.R
@sfchaos
2
というわけで,
2013年4月3日
3
R-3.0.0
リリース!
The R Project for Statistical Computing
4
いろめき立つ
R界隈
R-statistics blog
SOURCEFORGE.JP Magazine
5
何が変わったか?
6
@kohskeさんのページに
まとまっている
R.3.0.0の大事な変更点
7
大きな変更点
Long Vectorの導入
8
Long Vectorとは
長いベクトルのこと
9
> x <- rep(0, 2^31-1)
> x <- rep(0, 2^31)
エラー: サイズ 8.0 Gb のベクトルを割り当てることができません
追加情報: 警告メッセージ:
1: Reached total allocation of 16375Mb: see help(memory.size)
2: Reached total allocation of 16375Mb: see help(memory.size)
3: Reached total allocation of 16375Mb: see help(memory.size)
4: Reached total allocation of 16375Mb: see help(memory.size)
R-2.15.3までは
1ベクトルのサイズは
231
-1まで(32ビット)
※ベクトルだけでなく,配列や行列などのオブジェクトにも同じ制約あり
10
これがもっともっと
長くなった
※ただし,いろいろと微妙らしい
前出の@kohskeさんのページ参照
出典: Wikipedia 太陽系
11
その比なんと
221
(≒106.3
)
/* both config.h and Rconfig.h set SIZEOF_SIZE_T, but Rconfig.h is
skipped if config.h has already been included. */
#ifndef R_CONFIG_H
# include <Rconfig.h>
#endif
#if ( SIZEOF_SIZE_T > 4 )
# define LONG_VECTOR_SUPPORT
#endif
#ifdef LONG_VECTOR_SUPPORT
typedef ptrdiff_t R_xlen_t;
typedef struct { R_xlen_t lv_length, lv_truelength; } R_long_vec_hdr_t;
# define R_XLEN_T_MAX 4503599627370496
# define R_SHORT_LEN_MAX 2147483647
# define R_LONG_VEC_TOKEN -1
252
231
-1
12
まさにマシマシ
13
コードも簡潔にchar *R_alloc(size_t nelem, int eltsize)
{
R_size_t size = nelem * eltsize;
double dsize = (double)nelem * eltsize;
if (dsize > 0) { /* precaution against integer
overflow on 32-bit*/
SEXP s;
#if SIZEOF_SIZE_T > 4
/* In this case by allocating larger units we can
get up to
size(Rcomplex) * (2^31 - 1) bytes, approx 16Gb
*/
if(dsize < R_LEN_T_MAX)
s = allocVector(RAWSXP, size + 1);
else if(dsize < sizeof(double) * (R_LEN_T_MAX - 1))
s = allocVector(REALSXP, (int)
(0.99+dsize/sizeof(double)));
else if(dsize < sizeof(Rcomplex) * (R_LEN_T_MAX -
1))
s = allocVector(CPLXSXP, (int)
(0.99+dsize/sizeof(Rcomplex)));
else {
error(_("cannot allocate memory block of size
%0.1f Gb"),
dsize/1024.0/1024.0/1024.0);
s = R_NilValue; /* -Wall */
}
#else
if(dsize > R_LEN_T_MAX) /* must be in the Gb range
*/
error(_("cannot allocate memory block of size
%0.1f Gb"),
dsize/1024.0/1024.0/1024.0);
s = allocVector(RAWSXP, size + 1);
#endif
ATTRIB(s) = R_VStack; R_VStack = s;
return (char *) DATAPTR(s);
}
else return NULL;
}
char *R_alloc(size_t nelem, int eltsize)
{
R_size_t size = nelem * eltsize;
/* doubles are a precaution against integer
overflow on 32-bit */
double dsize = (double) nelem * eltsize;
if (dsize > 0) {
SEXP s;
#ifdef LONG_VECTOR_SUPPORT
/* 64-bit platform: previous version used REALSXPs
*/
if(dsize > R_XLEN_T_MAX) /* currently 4096 TB */
error(_("cannot allocate memory block of size
%0.f Tb"),
dsize/pow(1024.0, 4.0));
s = allocVector(RAWSXP, size + 1);
#else
if(dsize > R_LEN_T_MAX) /* must be in the Gb range
*/
error(_("cannot allocate memory block of size
%0.1f Gb"),
dsize/pow(1024.0, 3.0));
s = allocVector(RAWSXP, size + 1);
#endif
ATTRIB(s) = R_VStack;
R_VStack = s;
return (char *) DATAPTR(s);
}
/* One programmer has relied on this, but it is
undocumented! */
else return NULL;
}
src/main/memory.c
(R.version.tar.gzを解凍)
14
あと,R.3.0.0では
パッケージを
入れ直す必要が
あることに注意
15
以上!
詳しくはこのあとの
@wdkzさんのLTで
16
SEXP attribute_hidden do_Rprofmem(SEXP call, SEXP op, SEXP args, SEXP rho)
{
SEXP filename;
R_size_t threshold;
int append_mode;
checkArity(op, args);
if (!isString(CAR(args)) || (LENGTH(CAR(args))) != 1)
error(_("invalid '%s' argument"), "filename");
append_mode = asLogical(CADR(args));
filename = STRING_ELT(CAR(args), 0);
threshold = REAL(CADDR(args))[0];
if (strlen(CHAR(filename)))
R_InitMemReporting(filename, append_mode, threshold);
else
R_EndMemReporting();
return R_NilValue;
}
#include "RBufferUtils.h"
attribute_hidden
void *R_AllocStringBuffer(size_t blen, R_StringBuffer *buf)
{
size_t blen1, bsize = buf->defaultSize;
/* for backwards compatibility, probably no longer needed */
if(blen == (size_t)-1) {
warning("R_AllocStringBuffer(-1) used: please report");
R_FreeStringBufferL(buf);
return NULL;
}
if(blen * sizeof(char) < buf->bufsize) return buf->data;
if(blen * sizeof(char) < buf->bufsize) return buf->data;
blen1 = blen = (blen + 1) * sizeof(char);
blen = (blen / bsize) * bsize;
if(blen < blen1) blen += bsize;
if(buf->data == NULL) {
buf->data = (char *) malloc(blen);
ところで
ソースコードが
読みにくくないか?
17
読みやすいコード発見namespace CXXR {
class String;
template <typename, SEXPTYPE,
typename Initializer = RObject::DoNothing> class FixedVector;
typedef FixedVector<int, INTSXP> IntVector;
typedef FixedVector<RHandle<>, VECSXP> ListVector;
typedef FixedVector<RHandle<String>, STRSXP> StringVector;
/** @brief Untemplated base class for R vectors.
*/
class VectorBase : public RObject {
public:
/**
* @param stype The required ::SEXPTYPE.
* @param sz The required number of elements in the vector.
*/
VectorBase(SEXPTYPE stype, std::size_t sz)
: RObject(stype), m_truelength(sz), m_size(sz)
{}
/** @brief Copy constructor.
*
* @param pattern VectorBase to be copied.
*/
VectorBase(const VectorBase& pattern)
: RObject(pattern), m_truelength(pattern.m_truelength),
m_size(pattern.m_size)
{}
クラスを用いて
オブジェクト指向で記
述
18
ドキュメントもある
19
その名はCXXR
CXXR: Refactorising R into C++
20
#include <iostream>
#include <fstream>
#include <boost/noncopyable.hpp>
class BigDataFrame : boost::noncopyable
{
public:
enum DataType {CHAR=1, SHORT=2, INT=3, DOUBLE=4, COMPLEX=5};
public:
BigDataFrame(index_type nrow, index_type ncol) : nrow_(nrow), ncol_(ncol) {
// initializing shared pointer
p = std::shared_ptr<Monitor>(new Monitor[ncol],
std::default_delete<Monitor[]>());
Rの中身を
C++で書き直す
プロジェクト
21
useR!2009,2010等で
発表されている
Provenance Tracking in CXXR 
(useR!2009)
CXXR and Add-on Packages
(useR!2010)
22
おわコンかと思いきや
最近の論文も
CXXR: an extensible R interpreter
23
調査して
いつかどこかで
紹介する(かも)

R3.0.0 is relased

  • 1.
  • 2.
  • 3.
    3 R-3.0.0 リリース! The R Projectfor Statistical Computing
  • 4.
  • 5.
  • 6.
  • 7.
  • 8.
  • 9.
    9 > x <-rep(0, 2^31-1) > x <- rep(0, 2^31) エラー: サイズ 8.0 Gb のベクトルを割り当てることができません 追加情報: 警告メッセージ: 1: Reached total allocation of 16375Mb: see help(memory.size) 2: Reached total allocation of 16375Mb: see help(memory.size) 3: Reached total allocation of 16375Mb: see help(memory.size) 4: Reached total allocation of 16375Mb: see help(memory.size) R-2.15.3までは 1ベクトルのサイズは 231 -1まで(32ビット) ※ベクトルだけでなく,配列や行列などのオブジェクトにも同じ制約あり
  • 10.
  • 11.
    11 その比なんと 221 (≒106.3 ) /* both config.hand Rconfig.h set SIZEOF_SIZE_T, but Rconfig.h is skipped if config.h has already been included. */ #ifndef R_CONFIG_H # include <Rconfig.h> #endif #if ( SIZEOF_SIZE_T > 4 ) # define LONG_VECTOR_SUPPORT #endif #ifdef LONG_VECTOR_SUPPORT typedef ptrdiff_t R_xlen_t; typedef struct { R_xlen_t lv_length, lv_truelength; } R_long_vec_hdr_t; # define R_XLEN_T_MAX 4503599627370496 # define R_SHORT_LEN_MAX 2147483647 # define R_LONG_VEC_TOKEN -1 252 231 -1
  • 12.
  • 13.
    13 コードも簡潔にchar *R_alloc(size_t nelem,int eltsize) { R_size_t size = nelem * eltsize; double dsize = (double)nelem * eltsize; if (dsize > 0) { /* precaution against integer overflow on 32-bit*/ SEXP s; #if SIZEOF_SIZE_T > 4 /* In this case by allocating larger units we can get up to size(Rcomplex) * (2^31 - 1) bytes, approx 16Gb */ if(dsize < R_LEN_T_MAX) s = allocVector(RAWSXP, size + 1); else if(dsize < sizeof(double) * (R_LEN_T_MAX - 1)) s = allocVector(REALSXP, (int) (0.99+dsize/sizeof(double))); else if(dsize < sizeof(Rcomplex) * (R_LEN_T_MAX - 1)) s = allocVector(CPLXSXP, (int) (0.99+dsize/sizeof(Rcomplex))); else { error(_("cannot allocate memory block of size %0.1f Gb"), dsize/1024.0/1024.0/1024.0); s = R_NilValue; /* -Wall */ } #else if(dsize > R_LEN_T_MAX) /* must be in the Gb range */ error(_("cannot allocate memory block of size %0.1f Gb"), dsize/1024.0/1024.0/1024.0); s = allocVector(RAWSXP, size + 1); #endif ATTRIB(s) = R_VStack; R_VStack = s; return (char *) DATAPTR(s); } else return NULL; } char *R_alloc(size_t nelem, int eltsize) { R_size_t size = nelem * eltsize; /* doubles are a precaution against integer overflow on 32-bit */ double dsize = (double) nelem * eltsize; if (dsize > 0) { SEXP s; #ifdef LONG_VECTOR_SUPPORT /* 64-bit platform: previous version used REALSXPs */ if(dsize > R_XLEN_T_MAX) /* currently 4096 TB */ error(_("cannot allocate memory block of size %0.f Tb"), dsize/pow(1024.0, 4.0)); s = allocVector(RAWSXP, size + 1); #else if(dsize > R_LEN_T_MAX) /* must be in the Gb range */ error(_("cannot allocate memory block of size %0.1f Gb"), dsize/pow(1024.0, 3.0)); s = allocVector(RAWSXP, size + 1); #endif ATTRIB(s) = R_VStack; R_VStack = s; return (char *) DATAPTR(s); } /* One programmer has relied on this, but it is undocumented! */ else return NULL; } src/main/memory.c (R.version.tar.gzを解凍)
  • 14.
  • 15.
  • 16.
    16 SEXP attribute_hidden do_Rprofmem(SEXPcall, SEXP op, SEXP args, SEXP rho) { SEXP filename; R_size_t threshold; int append_mode; checkArity(op, args); if (!isString(CAR(args)) || (LENGTH(CAR(args))) != 1) error(_("invalid '%s' argument"), "filename"); append_mode = asLogical(CADR(args)); filename = STRING_ELT(CAR(args), 0); threshold = REAL(CADDR(args))[0]; if (strlen(CHAR(filename))) R_InitMemReporting(filename, append_mode, threshold); else R_EndMemReporting(); return R_NilValue; } #include "RBufferUtils.h" attribute_hidden void *R_AllocStringBuffer(size_t blen, R_StringBuffer *buf) { size_t blen1, bsize = buf->defaultSize; /* for backwards compatibility, probably no longer needed */ if(blen == (size_t)-1) { warning("R_AllocStringBuffer(-1) used: please report"); R_FreeStringBufferL(buf); return NULL; } if(blen * sizeof(char) < buf->bufsize) return buf->data; if(blen * sizeof(char) < buf->bufsize) return buf->data; blen1 = blen = (blen + 1) * sizeof(char); blen = (blen / bsize) * bsize; if(blen < blen1) blen += bsize; if(buf->data == NULL) { buf->data = (char *) malloc(blen); ところで ソースコードが 読みにくくないか?
  • 17.
    17 読みやすいコード発見namespace CXXR { classString; template <typename, SEXPTYPE, typename Initializer = RObject::DoNothing> class FixedVector; typedef FixedVector<int, INTSXP> IntVector; typedef FixedVector<RHandle<>, VECSXP> ListVector; typedef FixedVector<RHandle<String>, STRSXP> StringVector; /** @brief Untemplated base class for R vectors. */ class VectorBase : public RObject { public: /** * @param stype The required ::SEXPTYPE. * @param sz The required number of elements in the vector. */ VectorBase(SEXPTYPE stype, std::size_t sz) : RObject(stype), m_truelength(sz), m_size(sz) {} /** @brief Copy constructor. * * @param pattern VectorBase to be copied. */ VectorBase(const VectorBase& pattern) : RObject(pattern), m_truelength(pattern.m_truelength), m_size(pattern.m_size) {} クラスを用いて オブジェクト指向で記 述
  • 18.
  • 19.
  • 20.
    20 #include <iostream> #include <fstream> #include<boost/noncopyable.hpp> class BigDataFrame : boost::noncopyable { public: enum DataType {CHAR=1, SHORT=2, INT=3, DOUBLE=4, COMPLEX=5}; public: BigDataFrame(index_type nrow, index_type ncol) : nrow_(nrow), ncol_(ncol) { // initializing shared pointer p = std::shared_ptr<Monitor>(new Monitor[ncol], std::default_delete<Monitor[]>()); Rの中身を C++で書き直す プロジェクト
  • 21.
    21 useR!2009,2010等で 発表されている Provenance Tracking inCXXR  (useR!2009) CXXR and Add-on Packages (useR!2010)
  • 22.
  • 23.