쉼표로 구분 된 std :: string 구문 분석

development

쉼표로 구분 된 std :: string 구문 분석

big-blog 2020. 7. 11. 08:54

쉼표로 구분 된 std :: string 구문 분석

이 질문에는 이미 답변이 있습니다.

문자열의 단어를 어떻게 반복합니까? 답변 76 개

쉼표로 구분 된 숫자 목록이 포함 된 std :: string이있는 경우 숫자를 구문 분석하고 정수 배열에 넣는 가장 간단한 방법은 무엇입니까?

나는 이것을 다른 것을 파싱하는 것으로 일반화하고 싶지 않습니다. "1,1,1,1,2,1,1,1,0"과 같이 쉼표로 구분 된 정수의 단순한 문자열입니다.

한 번에 하나의 숫자를 입력하고 다음 문자가인지 확인하십시오 ,. 그렇다면 폐기하십시오.

#include <vector>
#include <string>
#include <sstream>
#include <iostream>

int main()
{
    std::string str = "1,2,3,4,5,6";
    std::vector<int> vect;

    std::stringstream ss(str);

    for (int i; ss >> i;) {
        vect.push_back(i);    
        if (ss.peek() == ',')
            ss.ignore();
    }

    for (std::size_t i = 0; i < vect.size(); i++)
        std::cout << vect[i] << std::endl;
}

덜 장황하고 표준이며 쉼표로 구분 된 것을 취합니다.

stringstream ss( "1,1,1,1, or something else ,1,1,1,0" );
vector<string> result;

while( ss.good() )
{
    string substr;
    getline( ss, substr, ',' );
    result.push_back( substr );
}

또 다른 접근 방식은 다음과 같습니다. 쉼표를 공백으로 처리하는 특수 로캘을 사용하십시오.

#include <locale>
#include <vector>

struct csv_reader: std::ctype<char> {
    csv_reader(): std::ctype<char>(get_table()) {}
    static std::ctype_base::mask const* get_table() {
        static std::vector<std::ctype_base::mask> rc(table_size, std::ctype_base::mask());

        rc[','] = std::ctype_base::space;
        rc['\n'] = std::ctype_base::space;
        rc[' '] = std::ctype_base::space;
        return &rc[0];
    }
};

이를 사용하려면 imbue()이 패싯이 포함 된 로케일이있는 스트림입니다. 그렇게하면 쉼표가없는 것처럼 숫자를 읽을 수 있습니다. 예를 들어, 입력에서 쉼표로 구분 된 숫자를 읽고 표준 출력에서 한 줄에 하나씩 씁니다.

#include <algorithm>
#include <iterator>
#include <iostream>

int main() {
    std::cin.imbue(std::locale(std::locale(), new csv_reader()));
    std::copy(std::istream_iterator<int>(std::cin), 
              std::istream_iterator<int>(),
              std::ostream_iterator<int>(std::cout, "\n"));
    return 0;
}

C ++ 문자열 툴킷 라이브러리 (Strtk는) 문제에 대한 다음과 같은 솔루션을 제공합니다 :

#include <string>
#include <deque>
#include <vector>
#include "strtk.hpp"
int main()
{ 
   std::string int_string = "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15";
   std::vector<int> int_list;
   strtk::parse(int_string,",",int_list);

   std::string double_string = "123.456|789.012|345.678|901.234|567.890";
   std::deque<double> double_list;
   strtk::parse(double_string,"|",double_list);

   return 0;
}

더 많은 예제는 여기 에서 찾을 수 있습니다

일반 알고리즘과 Boost.Tokenizer를 사용하는 대체 솔루션 :

struct ToInt
{
    int operator()(string const &str) { return atoi(str.c_str()); }
};

string values = "1,2,3,4,5,9,8,7,6";

vector<int> ints;
tokenizer<> tok(values);

transform(tok.begin(), tok.end(), back_inserter(ints), ToInt());

다음 기능을 사용할 수도 있습니다.

void tokenize(const string& str, vector<string>& tokens, const string& delimiters = ",")
{
  // Skip delimiters at beginning.
  string::size_type lastPos = str.find_first_not_of(delimiters, 0);

  // Find first non-delimiter.
  string::size_type pos = str.find_first_of(delimiters, lastPos);

  while (string::npos != pos || string::npos != lastPos) {
    // Found a token, add it to the vector.
    tokens.push_back(str.substr(lastPos, pos - lastPos));

    // Skip delimiters.
    lastPos = str.find_first_not_of(delimiters, pos);

    // Find next non-delimiter.
    pos = str.find_first_of(delimiters, lastPos);
  }
}

std::string input="1,1,1,1,2,1,1,1,0";
std::vector<long> output;
for(std::string::size_type p0=0,p1=input.find(',');
        p1!=std::string::npos || p0!=std::string::npos;
        (p0=(p1==std::string::npos)?p1:++p1),p1=input.find(',',p0) )
    output.push_back( strtol(input.c_str()+p0,NULL,0) );

strtol()물론 에서 전환 오류를 확인하는 것이 좋습니다 . 코드가 다른 오류 검사의 이점을 얻을 수도 있습니다.

여기에 꽤 끔찍한 답변이 많으므로 테스트 프로그램을 포함하여 내 것을 추가하겠습니다.

#include <string>
#include <iostream>
#include <cstddef>

template<typename StringFunction>
void splitString(const std::string &str, char delimiter, StringFunction f) {
  std::size_t from = 0;
  for (std::size_t i = 0; i < str.size(); ++i) {
    if (str[i] == delimiter) {
      f(str, from, i);
      from = i + 1;
    }
  }
  if (from <= str.size())
    f(str, from, str.size());
}


int main(int argc, char* argv[]) {
    if (argc != 2)
        return 1;

    splitString(argv[1], ',', [](const std::string &s, std::size_t from, std::size_t to) {
        std::cout << "`" << s.substr(from, to - from) << "`\n";
    });

    return 0;
}

좋은 속성 :

의존성 없음 (예 : 부스트)
제정신이 아닌 라이너
이해하기 쉬움
공간을 완벽하게 처리
원하지 않는 경우 분할을 할당하지 않습니다. 예를 들어 표시된대로 람다로 분할 할 수 있습니다.
한 번에 하나씩 문자를 추가하지 않습니다-빨라야합니다.
C ++ 17을 사용하면 a를 사용하도록 변경할 std::stringview수 있으며 할당을 수행하지 않으며 매우 빠릅니다.

변경하고자하는 일부 디자인 선택 :

빈 항목은 무시되지 않습니다.
빈 문자열은 f ()를 한 번 호출합니다.

입력 및 출력 예 :

""      ->   {""}
","     ->   {"", ""}
"1,"    ->   {"1", ""}
"1"     ->   {"1"}
" "     ->   {" "}
"1, 2," ->   {"1", " 2", ""}
" ,, "  ->   {" ", "", " "}

#include <sstream>
#include <vector>

const char *input = "1,1,1,1,2,1,1,1,0";

int main() {
    std::stringstream ss(input);
    std::vector<int> output;
    int i;
    while (ss >> i) {
        output.push_back(i);
        ss.ignore(1);
    }
}

잘못된 입력 (예 : 연속 분리기)은 이것을 망칠 것이지만 간단하다고 말한 것입니다.

아무도 std::regex아직 사용 하지 않는 솔루션을 제안한 것에 놀랐습니다 .

#include <string>
#include <algorithm>
#include <vector>
#include <regex>

void parse_csint( const std::string& str, std::vector<int>& result ) {

    typedef std::regex_iterator<std::string::const_iterator> re_iterator;
    typedef re_iterator::value_type re_iterated;

    std::regex re("(\\d+)");

    re_iterator rit( str.begin(), str.end(), re );
    re_iterator rend;

    std::transform( rit, rend, std::back_inserter(result), 
        []( const re_iterated& it ){ return std::stoi(it[1]); } );

}

이 함수는 입력 벡터의 뒷면에 모든 정수를 삽입합니다. 음수 또는 부동 소수점 숫자 등을 포함하도록 정규식을 조정할 수 있습니다.

string exp = "token1 token2 token3";
char delimiter = ' ';
vector<string> str;
string acc = "";
for(int i = 0; i < exp.size(); i++)
{
    if(exp[i] == delimiter)
    {
        str.push_back(acc);
        acc = "";
    }
    else
        acc += exp[i];
}

나는 아직 언급 할 수 없지만 (사이트에서 시작하기) 그의 게시물에 Jerry Coffin의 환상적인 ctype 파생 클래스의 더 일반적인 버전을 추가했습니다.

슈퍼 아이디어 주셔서 감사합니다.

(반드시 검토해야하므로 여기에 일시적으로 추가해야 함)

struct SeparatorReader: std::ctype<char>
{
    template<typename T>
    SeparatorReader(const T &seps): std::ctype<char>(get_table(seps), true) {}

    template<typename T>
    std::ctype_base::mask const *get_table(const T &seps) {
        auto &&rc = new std::ctype_base::mask[std::ctype<char>::table_size]();
        for(auto &&sep: seps)
            rc[static_cast<unsigned char>(sep)] = std::ctype_base::space;
        return &rc[0];
    }
};

bool GetList (const std::string& src, std::vector<int>& res)
  {
    using boost::lexical_cast;
    using boost::bad_lexical_cast;
    bool success = true;
    typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
    boost::char_separator<char> sepa(",");
    tokenizer tokens(src, sepa);
    for (tokenizer::iterator tok_iter = tokens.begin(); 
         tok_iter != tokens.end(); ++tok_iter) {
      try {
        res.push_back(lexical_cast<int>(*tok_iter));
      }
      catch (bad_lexical_cast &) {
        success = false;
      }
    }
    return success;
  }

간단한 구조, 쉽게 적용 가능, 유지 보수 용이

std::string stringIn = "my,csv,,is 10233478,separated,by commas";
std::vector<std::string> commaSeparated(1);
int commaCounter = 0;
for (int i=0; i<stringIn.size(); i++) {
    if (stringIn[i] == ",") {
        commaSeparated.push_back("");
        commaCounter++;
    } else {
        commaSeparated.at(commaCounter) += stringIn[i];
    }
}

결국 문장의 모든 요소가 공백으로 구분 된 문자열 벡터가 생깁니다. 빈 문자열은 별도의 항목으로 저장됩니다.

부스트 토크 나이저 기반의 간단한 복사 / 붙여 넣기 기능 .

void strToIntArray(std::string string, int* array, int array_len) {
  boost::tokenizer<> tok(string);
  int i = 0;
  for(boost::tokenizer<>::iterator beg=tok.begin(); beg!=tok.end();++beg){
    if(i < array_len)
      array[i] = atoi(beg->c_str());
    i++;
}

이것은 내가 가장 많이 사용하는 가장 간단한 방법입니다. 한 문자 구분 기호에 사용할 수 있습니다.

#include<bits/stdc++.h>
using namespace std;

int main() {
   string str;

   cin >> str;
   int temp;
   vector<int> result;
   char ch;
   stringstream ss(str);

   do
   {
       ss>>temp;
       result.push_back(temp);
   }while(ss>>ch);

   for(int i=0 ; i < result.size() ; i++)
       cout<<result[i]<<endl;

   return 0;
}

void ExplodeString( const std::string& string, const char separator, std::list<int>& result ) {
    if( string.size() ) {
        std::string::const_iterator last = string.begin();
        for( std::string::const_iterator i=string.begin(); i!=string.end(); ++i ) {
            if( *i == separator ) {
                const std::string str(last,i);
                int id = atoi(str.c_str());
                result.push_back(id);
                last = i;
                ++ last;
            }
        }
        if( last != string.end() ) result.push_back( atoi(&*last) );
    }
}

#include <sstream>
#include <vector>
#include <algorithm>
#include <iterator>

const char *input = ",,29870,1,abc,2,1,1,1,0";
int main()
{
    std::stringstream ss(input);
    std::vector<int> output;
    int i;
    while ( !ss.eof() )
    {
       int c =  ss.peek() ;
       if ( c < '0' || c > '9' )
       {
          ss.ignore(1);
          continue;
        }

       if (ss >> i)
       {
          output.push_back(i);
        }

    }

    std::copy(output.begin(), output.end(), std::ostream_iterator<int> (std::cout, " ") );
    return 0;
}

참고URL : https://stackoverflow.com/questions/1894886/parsing-a-comma-delimited-stdstring

'development' 카테고리의 다른 글

'자바'는 내부 또는 외부 명령으로 인식되지 않습니다 (0)	2020.07.11
누락 된 IIS Express SSL 인증서를 어떻게 복원합니까? (0)	2020.07.11
Angular-CLI를 사용하여 특정 모듈에 컴포넌트 작성 (0)	2020.07.11
TypeScript 파일이 변경 될 때 TS 노드를보고 다시로드하는 방법 (0)	2020.07.11
Safari에서 Swift Open Link (0)	2020.07.11

현재글쉼표로 구분 된 std :: string 구문 분석

big-blog

쉼표로 구분 된 std :: string 구문 분석

쉼표로 구분 된 std :: string 구문 분석

'development' 카테고리의 다른 글

'development'의 다른글

티스토리툴바

쉼표로 구분 된 std :: string 구문 분석

쉼표로 구분 된 std :: string 구문 분석

'development' 카테고리의 다른 글

'development'의 다른글

관련글

티스토리툴바