vBLAS.pas 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. {
  2. File: vecLib/vBLAS.h
  3. Contains: Header for the Basic Linear Algebra Subprograms, with Apple extensions.
  4. Version: vecLib-$(vDSP_Version0).$(vDSP_Version1)
  5. Copyright: © 2000-$(Year) by Apple Computer, Inc., all rights reserved.
  6. Bugs?: For bug reports, consult the following page on
  7. the World Wide Web:
  8. http://bugs.freepascal.org
  9. }
  10. { Pascal Translation Updated: Jonas Maebe, <[email protected]>, October 2009 }
  11. { Pascal Translation Updated: Jonas Maebe, <[email protected]>, October 2012 }
  12. {
  13. Modified for use with Free Pascal
  14. Version 308
  15. Please report any bugs to <[email protected]>
  16. }
  17. {$ifc not defined MACOSALLINCLUDE or not MACOSALLINCLUDE}
  18. {$mode macpas}
  19. {$modeswitch cblocks}
  20. {$packenum 1}
  21. {$macro on}
  22. {$inline on}
  23. {$calling mwpascal}
  24. unit vBLAS;
  25. interface
  26. {$setc UNIVERSAL_INTERFACES_VERSION := $0400}
  27. {$setc GAP_INTERFACES_VERSION := $0308}
  28. {$ifc not defined USE_CFSTR_CONSTANT_MACROS}
  29. {$setc USE_CFSTR_CONSTANT_MACROS := TRUE}
  30. {$endc}
  31. {$ifc defined CPUPOWERPC and defined CPUI386}
  32. {$error Conflicting initial definitions for CPUPOWERPC and CPUI386}
  33. {$endc}
  34. {$ifc defined FPC_BIG_ENDIAN and defined FPC_LITTLE_ENDIAN}
  35. {$error Conflicting initial definitions for FPC_BIG_ENDIAN and FPC_LITTLE_ENDIAN}
  36. {$endc}
  37. {$ifc not defined __ppc__ and defined CPUPOWERPC32}
  38. {$setc __ppc__ := 1}
  39. {$elsec}
  40. {$setc __ppc__ := 0}
  41. {$endc}
  42. {$ifc not defined __ppc64__ and defined CPUPOWERPC64}
  43. {$setc __ppc64__ := 1}
  44. {$elsec}
  45. {$setc __ppc64__ := 0}
  46. {$endc}
  47. {$ifc not defined __i386__ and defined CPUI386}
  48. {$setc __i386__ := 1}
  49. {$elsec}
  50. {$setc __i386__ := 0}
  51. {$endc}
  52. {$ifc not defined __x86_64__ and defined CPUX86_64}
  53. {$setc __x86_64__ := 1}
  54. {$elsec}
  55. {$setc __x86_64__ := 0}
  56. {$endc}
  57. {$ifc not defined __arm__ and defined CPUARM}
  58. {$setc __arm__ := 1}
  59. {$elsec}
  60. {$setc __arm__ := 0}
  61. {$endc}
  62. {$ifc not defined __arm64__ and defined CPUAARCH64}
  63. {$setc __arm64__ := 1}
  64. {$elsec}
  65. {$setc __arm64__ := 0}
  66. {$endc}
  67. {$ifc defined cpu64}
  68. {$setc __LP64__ := 1}
  69. {$elsec}
  70. {$setc __LP64__ := 0}
  71. {$endc}
  72. {$ifc defined __ppc__ and __ppc__ and defined __i386__ and __i386__}
  73. {$error Conflicting definitions for __ppc__ and __i386__}
  74. {$endc}
  75. {$ifc defined __ppc__ and __ppc__}
  76. {$setc TARGET_CPU_PPC := TRUE}
  77. {$setc TARGET_CPU_PPC64 := FALSE}
  78. {$setc TARGET_CPU_X86 := FALSE}
  79. {$setc TARGET_CPU_X86_64 := FALSE}
  80. {$setc TARGET_CPU_ARM := FALSE}
  81. {$setc TARGET_CPU_ARM64 := FALSE}
  82. {$setc TARGET_OS_MAC := TRUE}
  83. {$setc TARGET_OS_IPHONE := FALSE}
  84. {$setc TARGET_IPHONE_SIMULATOR := FALSE}
  85. {$setc TARGET_OS_EMBEDDED := FALSE}
  86. {$elifc defined __ppc64__ and __ppc64__}
  87. {$setc TARGET_CPU_PPC := FALSE}
  88. {$setc TARGET_CPU_PPC64 := TRUE}
  89. {$setc TARGET_CPU_X86 := FALSE}
  90. {$setc TARGET_CPU_X86_64 := FALSE}
  91. {$setc TARGET_CPU_ARM := FALSE}
  92. {$setc TARGET_CPU_ARM64 := FALSE}
  93. {$setc TARGET_OS_MAC := TRUE}
  94. {$setc TARGET_OS_IPHONE := FALSE}
  95. {$setc TARGET_IPHONE_SIMULATOR := FALSE}
  96. {$setc TARGET_OS_EMBEDDED := FALSE}
  97. {$elifc defined __i386__ and __i386__}
  98. {$setc TARGET_CPU_PPC := FALSE}
  99. {$setc TARGET_CPU_PPC64 := FALSE}
  100. {$setc TARGET_CPU_X86 := TRUE}
  101. {$setc TARGET_CPU_X86_64 := FALSE}
  102. {$setc TARGET_CPU_ARM := FALSE}
  103. {$setc TARGET_CPU_ARM64 := FALSE}
  104. {$ifc defined iphonesim}
  105. {$setc TARGET_OS_MAC := FALSE}
  106. {$setc TARGET_OS_IPHONE := TRUE}
  107. {$setc TARGET_IPHONE_SIMULATOR := TRUE}
  108. {$elsec}
  109. {$setc TARGET_OS_MAC := TRUE}
  110. {$setc TARGET_OS_IPHONE := FALSE}
  111. {$setc TARGET_IPHONE_SIMULATOR := FALSE}
  112. {$endc}
  113. {$setc TARGET_OS_EMBEDDED := FALSE}
  114. {$elifc defined __x86_64__ and __x86_64__}
  115. {$setc TARGET_CPU_PPC := FALSE}
  116. {$setc TARGET_CPU_PPC64 := FALSE}
  117. {$setc TARGET_CPU_X86 := FALSE}
  118. {$setc TARGET_CPU_X86_64 := TRUE}
  119. {$setc TARGET_CPU_ARM := FALSE}
  120. {$setc TARGET_CPU_ARM64 := FALSE}
  121. {$ifc defined iphonesim}
  122. {$setc TARGET_OS_MAC := FALSE}
  123. {$setc TARGET_OS_IPHONE := TRUE}
  124. {$setc TARGET_IPHONE_SIMULATOR := TRUE}
  125. {$elsec}
  126. {$setc TARGET_OS_MAC := TRUE}
  127. {$setc TARGET_OS_IPHONE := FALSE}
  128. {$setc TARGET_IPHONE_SIMULATOR := FALSE}
  129. {$endc}
  130. {$setc TARGET_OS_EMBEDDED := FALSE}
  131. {$elifc defined __arm__ and __arm__}
  132. {$setc TARGET_CPU_PPC := FALSE}
  133. {$setc TARGET_CPU_PPC64 := FALSE}
  134. {$setc TARGET_CPU_X86 := FALSE}
  135. {$setc TARGET_CPU_X86_64 := FALSE}
  136. {$setc TARGET_CPU_ARM := TRUE}
  137. {$setc TARGET_CPU_ARM64 := FALSE}
  138. {$setc TARGET_OS_MAC := FALSE}
  139. {$setc TARGET_OS_IPHONE := TRUE}
  140. {$setc TARGET_IPHONE_SIMULATOR := FALSE}
  141. {$setc TARGET_OS_EMBEDDED := TRUE}
  142. {$elifc defined __arm64__ and __arm64__}
  143. {$setc TARGET_CPU_PPC := FALSE}
  144. {$setc TARGET_CPU_PPC64 := FALSE}
  145. {$setc TARGET_CPU_X86 := FALSE}
  146. {$setc TARGET_CPU_X86_64 := FALSE}
  147. {$setc TARGET_CPU_ARM := FALSE}
  148. {$setc TARGET_CPU_ARM64 := TRUE}
  149. {$ifc defined ios}
  150. {$setc TARGET_OS_MAC := FALSE}
  151. {$setc TARGET_OS_IPHONE := TRUE}
  152. {$setc TARGET_OS_EMBEDDED := TRUE}
  153. {$elsec}
  154. {$setc TARGET_OS_MAC := TRUE}
  155. {$setc TARGET_OS_IPHONE := FALSE}
  156. {$setc TARGET_OS_EMBEDDED := FALSE}
  157. {$endc}
  158. {$setc TARGET_IPHONE_SIMULATOR := FALSE}
  159. {$elsec}
  160. {$error __ppc__ nor __ppc64__ nor __i386__ nor __x86_64__ nor __arm__ nor __arm64__ is defined.}
  161. {$endc}
  162. {$ifc defined __LP64__ and __LP64__ }
  163. {$setc TARGET_CPU_64 := TRUE}
  164. {$elsec}
  165. {$setc TARGET_CPU_64 := FALSE}
  166. {$endc}
  167. {$ifc defined FPC_BIG_ENDIAN}
  168. {$setc TARGET_RT_BIG_ENDIAN := TRUE}
  169. {$setc TARGET_RT_LITTLE_ENDIAN := FALSE}
  170. {$elifc defined FPC_LITTLE_ENDIAN}
  171. {$setc TARGET_RT_BIG_ENDIAN := FALSE}
  172. {$setc TARGET_RT_LITTLE_ENDIAN := TRUE}
  173. {$elsec}
  174. {$error Neither FPC_BIG_ENDIAN nor FPC_LITTLE_ENDIAN are defined.}
  175. {$endc}
  176. {$setc ACCESSOR_CALLS_ARE_FUNCTIONS := TRUE}
  177. {$setc CALL_NOT_IN_CARBON := FALSE}
  178. {$setc OLDROUTINENAMES := FALSE}
  179. {$setc OPAQUE_TOOLBOX_STRUCTS := TRUE}
  180. {$setc OPAQUE_UPP_TYPES := TRUE}
  181. {$setc OTCARBONAPPLICATION := TRUE}
  182. {$setc OTKERNEL := FALSE}
  183. {$setc PM_USE_SESSION_APIS := TRUE}
  184. {$setc TARGET_API_MAC_CARBON := TRUE}
  185. {$setc TARGET_API_MAC_OS8 := FALSE}
  186. {$setc TARGET_API_MAC_OSX := TRUE}
  187. {$setc TARGET_CARBON := TRUE}
  188. {$setc TARGET_CPU_68K := FALSE}
  189. {$setc TARGET_CPU_MIPS := FALSE}
  190. {$setc TARGET_CPU_SPARC := FALSE}
  191. {$setc TARGET_OS_UNIX := FALSE}
  192. {$setc TARGET_OS_WIN32 := FALSE}
  193. {$setc TARGET_RT_MAC_68881 := FALSE}
  194. {$setc TARGET_RT_MAC_CFM := FALSE}
  195. {$setc TARGET_RT_MAC_MACHO := TRUE}
  196. {$setc TYPED_FUNCTION_POINTERS := TRUE}
  197. {$setc TYPE_BOOL := FALSE}
  198. {$setc TYPE_EXTENDED := FALSE}
  199. {$setc TYPE_LONGLONG := TRUE}
  200. uses MacTypes,ConditionalMacros;
  201. {$endc} {not MACOSALLINCLUDE}
  202. {$ifc TARGET_OS_MAC}
  203. {$ALIGN POWER}
  204. { ==========================================================================================================================}
  205. {
  206. =================================================================================================
  207. Definitions of the Basic Linear Algebra Subprograms (BLAS) as provided Apple Computer.
  208. A few additional functions, unique to Mac OS, have also been provided.
  209. These are clearly documented as Apple extensions.
  210. Documentation on the BLAS standard, including reference implementations, can be found on the web
  211. starting from the BLAS FAQ page at these URLs (verified live as of April 2002):
  212. http://www.netlib.org/blas/faq.html
  213. http://www.netlib.org/blas/blast-forum/blast-forum.html
  214. =================================================================================================
  215. }
  216. {
  217. =================================================================================================
  218. Matrix shape and storage
  219. ========================
  220. Keeping the various matrix shape and storage parameters straight can be difficult. The BLAS
  221. documentation generally makes a distinction between the concpetual "matrix" and the physical
  222. "array". However there are a number of places where this becomes fuzzy because of the overall
  223. bias towards FORTRAN's column major storage. The confusion is made worse by style differences
  224. between the level 2 and level 3 functions. It is amplified further by the explicit choice of row
  225. or column major storage in the C interface.
  226. The storage order does not affect the actual computation that is performed. That is, it does not
  227. affect the results other than where they appear in memory. It does affect the values passed
  228. for so-called "leading dimension" parameters, such as lda in sgemv. These are always the major
  229. stride in storage, allowing operations on rectangular subsets of larger matrices. For row major
  230. storage this is the number of columns in the parent matrix, and for column major storage this is
  231. the number of rows in the parent matrix.
  232. For the level 2 functions, which deal with only a single matrix, the matrix shape parameters are
  233. always M and N. These are the logical shape of the matrix, M rows by N columns. The transpose
  234. parameter, such as transA in sgemv, defines whether the regular matrix or its transpose is used
  235. in the operation. This affects the implicit length of the input and output vectors. For example,
  236. if the regular matrix A is used in sgemv, the input vector X has length N, the number of columns
  237. of A, and the output vector Y has length M, the number of rows of A. The length of the input and
  238. output vectors is not affected by the storage order of the matrix.
  239. The level 3 functions deal with 2 input matrices and one output matrix, the matrix shape parameters
  240. are M, N, and K. The logical shape of the output matrix is always M by N, while K is the common
  241. dimension of the input matrices. Like level 2, the transpose parameters, such as transA and transB
  242. in sgemm, define whether the regular input or its transpose is used in the operation. However
  243. unlike level 2, in level 3 the transpose parameters affect the implicit shape of the input matrix.
  244. Consider sgemm, which computes "C = (alpha * A * B) + (beta * C)", where A and B might be regular
  245. or transposed. The logical shape of C is always M rows by N columns. The physical shape depends
  246. on the storage order parameter. Using column major storage the declaration of C (the array) in C
  247. (the language) would be something like "float C[N][M]". The logical shape of A without transposition
  248. is M by K, and B is K by N. The one storage order parameter affects all three matrices.
  249. For those readers still wondering about the style differences between level 2 and level 3, they
  250. involve whether the input or output shapes are explicit. For level 2, the input matrix shape is
  251. always M by N. The input and output vector lengths are implicit and vary according to the
  252. transpose parameter. For level 3, the output matrix shape is always M by N. The input matrix
  253. shapes are implicit and vary according to the transpose parameters.
  254. =================================================================================================
  255. }
  256. { ==========================================================================================================================}
  257. {
  258. ------------------------------------------------------------------------------------------------------------------
  259. IsAlignedCount - True if an integer is positive and a multiple of 4. Negative strides are considered unaligned.
  260. IsAlignedAddr - True if an address is a multiple of 16.
  261. }
  262. // #define IsAlignedCount(n) ( (n > 0) && ((n & 3) == 0) )
  263. // #define IsAlignedAddr(a) ( ((long)a & 15L) == 0 )
  264. {
  265. =================================================================================================
  266. Prototypes for FORTRAN BLAS
  267. ===========================
  268. These are prototypes for the FORTRAN callable BLAS functions. They are implemented in C for
  269. Mac OS, as thin shims that simply call the C BLAS counterpart. These routines should never be
  270. called from C, but need to be included here so they will get output for the stub library. It
  271. won't hurt to call them from C, but who would want to since you can't pass literals for sizes?
  272. FORTRAN compilers are typically MPW tools and use PPCLink, so they will link with the official
  273. vecLib stub from Apple.
  274. =================================================================================================
  275. }
  276. {
  277. * SDOT()
  278. *
  279. * Availability:
  280. * Mac OS X: in version 10.0 and later in vecLib.framework
  281. * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
  282. * Non-Carbon CFM: in vecLib 1.0.2 and later
  283. }
  284. function SDOT( (*const*) var N: SInt32; {const} X: Float32Ptr; (*const*) var incX: SInt32; {const} Y: Float32Ptr; (*const*) var incY: SInt32 ): Float32; external name '_SDOT';
  285. (* AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER *)
  286. {
  287. * SNRM2()
  288. *
  289. * Availability:
  290. * Mac OS X: in version 10.0 and later in vecLib.framework
  291. * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
  292. * Non-Carbon CFM: in vecLib 1.0.2 and later
  293. }
  294. function SNRM2( (*const*) var N: SInt32; {const} X: Float32Ptr; (*const*) var incX: SInt32 ): Float32; external name '_SNRM2';
  295. (* AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER *)
  296. {
  297. * SASUM()
  298. *
  299. * Availability:
  300. * Mac OS X: in version 10.0 and later in vecLib.framework
  301. * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
  302. * Non-Carbon CFM: in vecLib 1.0.2 and later
  303. }
  304. function SASUM( (*const*) var N: SInt32; {const} X: Float32Ptr; (*const*) var incX: SInt32 ): Float32; external name '_SASUM';
  305. (* AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER *)
  306. {
  307. * ISAMAX()
  308. *
  309. * Availability:
  310. * Mac OS X: in version 10.0 and later in vecLib.framework
  311. * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
  312. * Non-Carbon CFM: in vecLib 1.0.2 and later
  313. }
  314. function ISAMAX( (*const*) var N: SInt32; {const} X: Float32Ptr; (*const*) var incX: SInt32 ): SInt32; external name '_ISAMAX';
  315. (* AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER *)
  316. {
  317. * SSWAP()
  318. *
  319. * Availability:
  320. * Mac OS X: in version 10.0 and later in vecLib.framework
  321. * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
  322. * Non-Carbon CFM: in vecLib 1.0.2 and later
  323. }
  324. procedure SSWAP( (*const*) var N: SInt32; X: Float32Ptr; (*const*) var incX: SInt32; Y: Float32Ptr; (*const*) var incY: SInt32 ); external name '_SSWAP';
  325. (* AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER *)
  326. {
  327. * SCOPY()
  328. *
  329. * Availability:
  330. * Mac OS X: in version 10.0 and later in vecLib.framework
  331. * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
  332. * Non-Carbon CFM: in vecLib 1.0.2 and later
  333. }
  334. procedure SCOPY( (*const*) var N: SInt32; {const} X: Float32Ptr; (*const*) var incX: SInt32; Y: Float32Ptr; (*const*) var incY: SInt32 ); external name '_SCOPY';
  335. (* AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER *)
  336. {
  337. * SAXPY()
  338. *
  339. * Availability:
  340. * Mac OS X: in version 10.0 and later in vecLib.framework
  341. * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
  342. * Non-Carbon CFM: in vecLib 1.0.2 and later
  343. }
  344. procedure SAXPY( (*const*) var N: SInt32; const (*var*) alpha: Float32; {const} X: Float32Ptr; (*const*) var incX: SInt32; Y: Float32Ptr; (*const*) var incY: SInt32 ); external name '_SAXPY';
  345. (* AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER *)
  346. {
  347. * SROT()
  348. *
  349. * Availability:
  350. * Mac OS X: in version 10.0 and later in vecLib.framework
  351. * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
  352. * Non-Carbon CFM: in vecLib 1.0.2 and later
  353. }
  354. procedure SROT( (*const*) var N: SInt32; X: Float32Ptr; (*const*) var incX: SInt32; Y: Float32Ptr; (*const*) var incY: SInt32; const (*var*) c: Float32; const (*var*) s: Float32 ); external name '_SROT';
  355. (* AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER *)
  356. {
  357. * SSCAL()
  358. *
  359. * Availability:
  360. * Mac OS X: in version 10.0 and later in vecLib.framework
  361. * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
  362. * Non-Carbon CFM: in vecLib 1.0.2 and later
  363. }
  364. procedure SSCAL( (*const*) var N: SInt32; const (*var*) alpha: Float32; X: Float32Ptr; (*const*) var incX: SInt32 ); external name '_SSCAL';
  365. (* AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER *)
  366. {
  367. * SGEMV()
  368. *
  369. * Availability:
  370. * Mac OS X: in version 10.0 and later in vecLib.framework
  371. * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
  372. * Non-Carbon CFM: in vecLib 1.0.2 and later
  373. }
  374. procedure SGEMV( transA: ConstCStringPtr; (*const*) var M: SInt32; (*const*) var N: SInt32; const (*var*) alpha: Float32; {const} A: Float32Ptr; (*const*) var lda: SInt32; {const} X: Float32Ptr; (*const*) var incX: SInt32; const (*var*) beta: Float32; Y: Float32Ptr; (*const*) var incY: SInt32 ); external name '_SGEMV';
  375. (* AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER *)
  376. {
  377. * SGEMM()
  378. *
  379. * Availability:
  380. * Mac OS X: in version 10.0 and later in vecLib.framework
  381. * CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
  382. * Non-Carbon CFM: in vecLib 1.0.2 and later
  383. }
  384. procedure SGEMM( transA: ConstCStringPtr; transB: ConstCStringPtr; (*const*) var M: SInt32; (*const*) var N: SInt32; (*const*) var K: SInt32; const (*var*) alpha: Float32; {const} A: Float32Ptr; (*const*) var lda: SInt32; {const} B: Float32Ptr; (*const*) var ldb: SInt32; const (*var*) beta: Float32; C: Float32Ptr; (*const*) var ldc: SInt32 ); external name '_SGEMM';
  385. (* AVAILABLE_MAC_OS_X_VERSION_10_0_AND_LATER *)
  386. { ==========================================================================================================================}
  387. { ==========================================================================================================================}
  388. {$endc} {TARGET_OS_MAC}
  389. {$ifc not defined MACOSALLINCLUDE or not MACOSALLINCLUDE}
  390. end.
  391. {$endc} {not MACOSALLINCLUDE}