ScanfFormatString.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // Handling of format string in scanf and friends. The structure of format
  11. // strings for fscanf() are described in C99 7.19.6.2.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "clang/Analysis/Analyses/FormatString.h"
  15. #include "FormatStringParsing.h"
  16. #include "clang/Basic/TargetInfo.h"
  17. using clang::analyze_format_string::ArgType;
  18. using clang::analyze_format_string::FormatStringHandler;
  19. using clang::analyze_format_string::LengthModifier;
  20. using clang::analyze_format_string::OptionalAmount;
  21. using clang::analyze_format_string::ConversionSpecifier;
  22. using clang::analyze_scanf::ScanfConversionSpecifier;
  23. using clang::analyze_scanf::ScanfSpecifier;
  24. using clang::UpdateOnReturn;
  25. using namespace clang;
  26. typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
  27. ScanfSpecifierResult;
  28. static bool ParseScanList(FormatStringHandler &H,
  29. ScanfConversionSpecifier &CS,
  30. const char *&Beg, const char *E) {
  31. const char *I = Beg;
  32. const char *start = I - 1;
  33. UpdateOnReturn <const char*> UpdateBeg(Beg, I);
  34. // No more characters?
  35. if (I == E) {
  36. H.HandleIncompleteScanList(start, I);
  37. return true;
  38. }
  39. // Special case: ']' is the first character.
  40. if (*I == ']') {
  41. if (++I == E) {
  42. H.HandleIncompleteScanList(start, I - 1);
  43. return true;
  44. }
  45. }
  46. // Special case: "^]" are the first characters.
  47. if (I + 1 != E && I[0] == '^' && I[1] == ']') {
  48. I += 2;
  49. if (I == E) {
  50. H.HandleIncompleteScanList(start, I - 1);
  51. return true;
  52. }
  53. }
  54. // Look for a ']' character which denotes the end of the scan list.
  55. while (*I != ']') {
  56. if (++I == E) {
  57. H.HandleIncompleteScanList(start, I - 1);
  58. return true;
  59. }
  60. }
  61. CS.setEndScanList(I);
  62. return false;
  63. }
  64. // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
  65. // We can possibly refactor.
  66. static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
  67. const char *&Beg,
  68. const char *E,
  69. unsigned &argIndex,
  70. const LangOptions &LO,
  71. const TargetInfo &Target) {
  72. using namespace clang::analyze_scanf;
  73. const char *I = Beg;
  74. const char *Start = nullptr;
  75. UpdateOnReturn <const char*> UpdateBeg(Beg, I);
  76. // Look for a '%' character that indicates the start of a format specifier.
  77. for ( ; I != E ; ++I) {
  78. char c = *I;
  79. if (c == '\0') {
  80. // Detect spurious null characters, which are likely errors.
  81. H.HandleNullChar(I);
  82. return true;
  83. }
  84. if (c == '%') {
  85. Start = I++; // Record the start of the format specifier.
  86. break;
  87. }
  88. }
  89. // No format specifier found?
  90. if (!Start)
  91. return false;
  92. if (I == E) {
  93. // No more characters left?
  94. H.HandleIncompleteSpecifier(Start, E - Start);
  95. return true;
  96. }
  97. ScanfSpecifier FS;
  98. if (ParseArgPosition(H, FS, Start, I, E))
  99. return true;
  100. if (I == E) {
  101. // No more characters left?
  102. H.HandleIncompleteSpecifier(Start, E - Start);
  103. return true;
  104. }
  105. // Look for '*' flag if it is present.
  106. if (*I == '*') {
  107. FS.setSuppressAssignment(I);
  108. if (++I == E) {
  109. H.HandleIncompleteSpecifier(Start, E - Start);
  110. return true;
  111. }
  112. }
  113. // Look for the field width (if any). Unlike printf, this is either
  114. // a fixed integer or isn't present.
  115. const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E);
  116. if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
  117. assert(Amt.getHowSpecified() == OptionalAmount::Constant);
  118. FS.setFieldWidth(Amt);
  119. if (I == E) {
  120. // No more characters left?
  121. H.HandleIncompleteSpecifier(Start, E - Start);
  122. return true;
  123. }
  124. }
  125. // Look for the length modifier.
  126. if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
  127. // No more characters left?
  128. H.HandleIncompleteSpecifier(Start, E - Start);
  129. return true;
  130. }
  131. // Detect spurious null characters, which are likely errors.
  132. if (*I == '\0') {
  133. H.HandleNullChar(I);
  134. return true;
  135. }
  136. // Finally, look for the conversion specifier.
  137. const char *conversionPosition = I++;
  138. ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
  139. switch (*conversionPosition) {
  140. default:
  141. break;
  142. case '%': k = ConversionSpecifier::PercentArg; break;
  143. case 'A': k = ConversionSpecifier::AArg; break;
  144. case 'E': k = ConversionSpecifier::EArg; break;
  145. case 'F': k = ConversionSpecifier::FArg; break;
  146. case 'G': k = ConversionSpecifier::GArg; break;
  147. case 'X': k = ConversionSpecifier::XArg; break;
  148. case 'a': k = ConversionSpecifier::aArg; break;
  149. case 'd': k = ConversionSpecifier::dArg; break;
  150. case 'e': k = ConversionSpecifier::eArg; break;
  151. case 'f': k = ConversionSpecifier::fArg; break;
  152. case 'g': k = ConversionSpecifier::gArg; break;
  153. case 'i': k = ConversionSpecifier::iArg; break;
  154. case 'n': k = ConversionSpecifier::nArg; break;
  155. case 'c': k = ConversionSpecifier::cArg; break;
  156. case 'C': k = ConversionSpecifier::CArg; break;
  157. case 'S': k = ConversionSpecifier::SArg; break;
  158. case '[': k = ConversionSpecifier::ScanListArg; break;
  159. case 'u': k = ConversionSpecifier::uArg; break;
  160. case 'x': k = ConversionSpecifier::xArg; break;
  161. case 'o': k = ConversionSpecifier::oArg; break;
  162. case 's': k = ConversionSpecifier::sArg; break;
  163. case 'p': k = ConversionSpecifier::pArg; break;
  164. // Apple extensions
  165. // Apple-specific
  166. case 'D':
  167. if (Target.getTriple().isOSDarwin())
  168. k = ConversionSpecifier::DArg;
  169. break;
  170. case 'O':
  171. if (Target.getTriple().isOSDarwin())
  172. k = ConversionSpecifier::OArg;
  173. break;
  174. case 'U':
  175. if (Target.getTriple().isOSDarwin())
  176. k = ConversionSpecifier::UArg;
  177. break;
  178. }
  179. ScanfConversionSpecifier CS(conversionPosition, k);
  180. if (k == ScanfConversionSpecifier::ScanListArg) {
  181. if (ParseScanList(H, CS, I, E))
  182. return true;
  183. }
  184. FS.setConversionSpecifier(CS);
  185. if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
  186. && !FS.usesPositionalArg())
  187. FS.setArgIndex(argIndex++);
  188. // FIXME: '%' and '*' doesn't make sense. Issue a warning.
  189. // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
  190. if (k == ScanfConversionSpecifier::InvalidSpecifier) {
  191. // Assume the conversion takes one argument.
  192. return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg);
  193. }
  194. return ScanfSpecifierResult(Start, FS);
  195. }
  196. ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
  197. const ScanfConversionSpecifier &CS = getConversionSpecifier();
  198. if (!CS.consumesDataArgument())
  199. return ArgType::Invalid();
  200. switch(CS.getKind()) {
  201. // Signed int.
  202. case ConversionSpecifier::dArg:
  203. case ConversionSpecifier::DArg:
  204. case ConversionSpecifier::iArg:
  205. switch (LM.getKind()) {
  206. case LengthModifier::None:
  207. return ArgType::PtrTo(Ctx.IntTy);
  208. case LengthModifier::AsChar:
  209. return ArgType::PtrTo(ArgType::AnyCharTy);
  210. case LengthModifier::AsShort:
  211. return ArgType::PtrTo(Ctx.ShortTy);
  212. case LengthModifier::AsLong:
  213. return ArgType::PtrTo(Ctx.LongTy);
  214. case LengthModifier::AsLongLong:
  215. case LengthModifier::AsQuad:
  216. return ArgType::PtrTo(Ctx.LongLongTy);
  217. case LengthModifier::AsInt64:
  218. return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
  219. case LengthModifier::AsIntMax:
  220. return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
  221. case LengthModifier::AsSizeT:
  222. // FIXME: ssize_t.
  223. return ArgType();
  224. case LengthModifier::AsPtrDiff:
  225. return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
  226. case LengthModifier::AsLongDouble:
  227. // GNU extension.
  228. return ArgType::PtrTo(Ctx.LongLongTy);
  229. case LengthModifier::AsAllocate:
  230. case LengthModifier::AsMAllocate:
  231. case LengthModifier::AsInt32:
  232. case LengthModifier::AsInt3264:
  233. case LengthModifier::AsWide:
  234. return ArgType::Invalid();
  235. }
  236. // Unsigned int.
  237. case ConversionSpecifier::oArg:
  238. case ConversionSpecifier::OArg:
  239. case ConversionSpecifier::uArg:
  240. case ConversionSpecifier::UArg:
  241. case ConversionSpecifier::xArg:
  242. case ConversionSpecifier::XArg:
  243. switch (LM.getKind()) {
  244. case LengthModifier::None:
  245. return ArgType::PtrTo(Ctx.UnsignedIntTy);
  246. case LengthModifier::AsChar:
  247. return ArgType::PtrTo(Ctx.UnsignedCharTy);
  248. case LengthModifier::AsShort:
  249. return ArgType::PtrTo(Ctx.UnsignedShortTy);
  250. case LengthModifier::AsLong:
  251. return ArgType::PtrTo(Ctx.UnsignedLongTy);
  252. case LengthModifier::AsLongLong:
  253. case LengthModifier::AsQuad:
  254. return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
  255. case LengthModifier::AsInt64:
  256. return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
  257. case LengthModifier::AsIntMax:
  258. return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
  259. case LengthModifier::AsSizeT:
  260. return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
  261. case LengthModifier::AsPtrDiff:
  262. // FIXME: Unsigned version of ptrdiff_t?
  263. return ArgType();
  264. case LengthModifier::AsLongDouble:
  265. // GNU extension.
  266. return ArgType::PtrTo(Ctx.UnsignedLongLongTy);
  267. case LengthModifier::AsAllocate:
  268. case LengthModifier::AsMAllocate:
  269. case LengthModifier::AsInt32:
  270. case LengthModifier::AsInt3264:
  271. case LengthModifier::AsWide:
  272. return ArgType::Invalid();
  273. }
  274. // Float.
  275. case ConversionSpecifier::aArg:
  276. case ConversionSpecifier::AArg:
  277. case ConversionSpecifier::eArg:
  278. case ConversionSpecifier::EArg:
  279. case ConversionSpecifier::fArg:
  280. case ConversionSpecifier::FArg:
  281. case ConversionSpecifier::gArg:
  282. case ConversionSpecifier::GArg:
  283. switch (LM.getKind()) {
  284. case LengthModifier::None:
  285. return ArgType::PtrTo(Ctx.FloatTy);
  286. case LengthModifier::AsLong:
  287. return ArgType::PtrTo(Ctx.DoubleTy);
  288. case LengthModifier::AsLongDouble:
  289. return ArgType::PtrTo(Ctx.LongDoubleTy);
  290. default:
  291. return ArgType::Invalid();
  292. }
  293. // Char, string and scanlist.
  294. case ConversionSpecifier::cArg:
  295. case ConversionSpecifier::sArg:
  296. case ConversionSpecifier::ScanListArg:
  297. switch (LM.getKind()) {
  298. case LengthModifier::None:
  299. return ArgType::PtrTo(ArgType::AnyCharTy);
  300. case LengthModifier::AsLong:
  301. case LengthModifier::AsWide:
  302. return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
  303. case LengthModifier::AsAllocate:
  304. case LengthModifier::AsMAllocate:
  305. return ArgType::PtrTo(ArgType::CStrTy);
  306. case LengthModifier::AsShort:
  307. if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
  308. return ArgType::PtrTo(ArgType::AnyCharTy);
  309. default:
  310. return ArgType::Invalid();
  311. }
  312. case ConversionSpecifier::CArg:
  313. case ConversionSpecifier::SArg:
  314. // FIXME: Mac OS X specific?
  315. switch (LM.getKind()) {
  316. case LengthModifier::None:
  317. case LengthModifier::AsWide:
  318. return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
  319. case LengthModifier::AsAllocate:
  320. case LengthModifier::AsMAllocate:
  321. return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
  322. case LengthModifier::AsShort:
  323. if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
  324. return ArgType::PtrTo(ArgType::AnyCharTy);
  325. default:
  326. return ArgType::Invalid();
  327. }
  328. // Pointer.
  329. case ConversionSpecifier::pArg:
  330. return ArgType::PtrTo(ArgType::CPointerTy);
  331. // Write-back.
  332. case ConversionSpecifier::nArg:
  333. switch (LM.getKind()) {
  334. case LengthModifier::None:
  335. return ArgType::PtrTo(Ctx.IntTy);
  336. case LengthModifier::AsChar:
  337. return ArgType::PtrTo(Ctx.SignedCharTy);
  338. case LengthModifier::AsShort:
  339. return ArgType::PtrTo(Ctx.ShortTy);
  340. case LengthModifier::AsLong:
  341. return ArgType::PtrTo(Ctx.LongTy);
  342. case LengthModifier::AsLongLong:
  343. case LengthModifier::AsQuad:
  344. return ArgType::PtrTo(Ctx.LongLongTy);
  345. case LengthModifier::AsInt64:
  346. return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
  347. case LengthModifier::AsIntMax:
  348. return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
  349. case LengthModifier::AsSizeT:
  350. return ArgType(); // FIXME: ssize_t
  351. case LengthModifier::AsPtrDiff:
  352. return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
  353. case LengthModifier::AsLongDouble:
  354. return ArgType(); // FIXME: Is this a known extension?
  355. case LengthModifier::AsAllocate:
  356. case LengthModifier::AsMAllocate:
  357. case LengthModifier::AsInt32:
  358. case LengthModifier::AsInt3264:
  359. case LengthModifier::AsWide:
  360. return ArgType::Invalid();
  361. }
  362. default:
  363. break;
  364. }
  365. return ArgType();
  366. }
  367. bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
  368. const LangOptions &LangOpt,
  369. ASTContext &Ctx) {
  370. // %n is different from other conversion specifiers; don't try to fix it.
  371. if (CS.getKind() == ConversionSpecifier::nArg)
  372. return false;
  373. if (!QT->isPointerType())
  374. return false;
  375. QualType PT = QT->getPointeeType();
  376. // If it's an enum, get its underlying type.
  377. if (const EnumType *ETy = PT->getAs<EnumType>())
  378. PT = ETy->getDecl()->getIntegerType();
  379. const BuiltinType *BT = PT->getAs<BuiltinType>();
  380. if (!BT)
  381. return false;
  382. // Pointer to a character.
  383. if (PT->isAnyCharacterType()) {
  384. CS.setKind(ConversionSpecifier::sArg);
  385. if (PT->isWideCharType())
  386. LM.setKind(LengthModifier::AsWideChar);
  387. else
  388. LM.setKind(LengthModifier::None);
  389. // If we know the target array length, we can use it as a field width.
  390. if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
  391. if (CAT->getSizeModifier() == ArrayType::Normal)
  392. FieldWidth = OptionalAmount(OptionalAmount::Constant,
  393. CAT->getSize().getZExtValue() - 1,
  394. "", 0, false);
  395. }
  396. return true;
  397. }
  398. // Figure out the length modifier.
  399. switch (BT->getKind()) {
  400. // no modifier
  401. case BuiltinType::UInt:
  402. case BuiltinType::Int:
  403. case BuiltinType::Float:
  404. LM.setKind(LengthModifier::None);
  405. break;
  406. // hh
  407. case BuiltinType::Char_U:
  408. case BuiltinType::UChar:
  409. case BuiltinType::Char_S:
  410. case BuiltinType::SChar:
  411. LM.setKind(LengthModifier::AsChar);
  412. break;
  413. // h
  414. case BuiltinType::Short:
  415. case BuiltinType::UShort:
  416. LM.setKind(LengthModifier::AsShort);
  417. break;
  418. // l
  419. case BuiltinType::Long:
  420. case BuiltinType::ULong:
  421. case BuiltinType::Double:
  422. LM.setKind(LengthModifier::AsLong);
  423. break;
  424. // ll
  425. case BuiltinType::LongLong:
  426. case BuiltinType::ULongLong:
  427. LM.setKind(LengthModifier::AsLongLong);
  428. break;
  429. // L
  430. case BuiltinType::LongDouble:
  431. LM.setKind(LengthModifier::AsLongDouble);
  432. break;
  433. // Don't know.
  434. default:
  435. return false;
  436. }
  437. // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
  438. if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
  439. namedTypeToLengthModifier(PT, LM);
  440. // If fixing the length modifier was enough, we are done.
  441. if (hasValidLengthModifier(Ctx.getTargetInfo())) {
  442. const analyze_scanf::ArgType &AT = getArgType(Ctx);
  443. if (AT.isValid() && AT.matchesType(Ctx, QT))
  444. return true;
  445. }
  446. // Figure out the conversion specifier.
  447. if (PT->isRealFloatingType())
  448. CS.setKind(ConversionSpecifier::fArg);
  449. else if (PT->isSignedIntegerType())
  450. CS.setKind(ConversionSpecifier::dArg);
  451. else if (PT->isUnsignedIntegerType())
  452. CS.setKind(ConversionSpecifier::uArg);
  453. else
  454. llvm_unreachable("Unexpected type");
  455. return true;
  456. }
  457. void ScanfSpecifier::toString(raw_ostream &os) const {
  458. os << "%";
  459. if (usesPositionalArg())
  460. os << getPositionalArgIndex() << "$";
  461. if (SuppressAssignment)
  462. os << "*";
  463. FieldWidth.toString(os);
  464. os << LM.toString();
  465. os << CS.toString();
  466. }
  467. bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
  468. const char *I,
  469. const char *E,
  470. const LangOptions &LO,
  471. const TargetInfo &Target) {
  472. unsigned argIndex = 0;
  473. // Keep looking for a format specifier until we have exhausted the string.
  474. while (I != E) {
  475. const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
  476. LO, Target);
  477. // Did a fail-stop error of any kind occur when parsing the specifier?
  478. // If so, don't do any more processing.
  479. if (FSR.shouldStop())
  480. return true;
  481. // Did we exhaust the string or encounter an error that
  482. // we can recover from?
  483. if (!FSR.hasValue())
  484. continue;
  485. // We have a format specifier. Pass it to the callback.
  486. if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
  487. I - FSR.getStart())) {
  488. return true;
  489. }
  490. }
  491. assert(I == E && "Format string not exhausted");
  492. return false;
  493. }