graphemes.rs 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. use criterion::{black_box, criterion_group, criterion_main, Criterion};
  2. use unicode_segmentation;
  3. use std::fs;
  4. use unicode_segmentation::UnicodeSegmentation;
  5. fn graphemes(c: &mut Criterion, lang: &str, path: &str) {
  6. let text = fs::read_to_string(path).unwrap();
  7. c.bench_function(&format!("graphemes_{}", lang), |bench| {
  8. bench.iter(|| {
  9. for g in UnicodeSegmentation::graphemes(black_box(&*text), true) {
  10. black_box(g);
  11. }
  12. })
  13. });
  14. }
  15. fn graphemes_arabic(c: &mut Criterion) {
  16. graphemes(c, "arabic", "benches/texts/arabic.txt");
  17. }
  18. fn graphemes_english(c: &mut Criterion) {
  19. graphemes(c, "english", "benches/texts/english.txt");
  20. }
  21. fn graphemes_hindi(c: &mut Criterion) {
  22. graphemes(c, "hindi", "benches/texts/hindi.txt");
  23. }
  24. fn graphemes_japanese(c: &mut Criterion) {
  25. graphemes(c, "japanese", "benches/texts/japanese.txt");
  26. }
  27. fn graphemes_korean(c: &mut Criterion) {
  28. graphemes(c, "korean", "benches/texts/korean.txt");
  29. }
  30. fn graphemes_mandarin(c: &mut Criterion) {
  31. graphemes(c, "mandarin", "benches/texts/mandarin.txt");
  32. }
  33. fn graphemes_russian(c: &mut Criterion) {
  34. graphemes(c, "russian", "benches/texts/russian.txt");
  35. }
  36. fn graphemes_source_code(c: &mut Criterion) {
  37. graphemes(c, "source_code", "benches/texts/source_code.txt");
  38. }
  39. criterion_group!(
  40. benches,
  41. graphemes_arabic,
  42. graphemes_english,
  43. graphemes_hindi,
  44. graphemes_japanese,
  45. graphemes_korean,
  46. graphemes_mandarin,
  47. graphemes_russian,
  48. graphemes_source_code,
  49. );
  50. criterion_main!(benches);