> ## Documentation Index
> Fetch the complete documentation index at: https://private-7c7dfe99-fix-nav-issues.mintlify.site/llms.txt
> Use this file to discover all available pages before exploring further.

> Más de 150 M reseñas de clientes sobre productos de Amazon

# Reseñas de clientes de Amazon

export const RunnableCode = ({children, run = false, showStats = true}) => {
  const [results, setResults] = useState(null);
  const [error, setError] = useState(null);
  const [loading, setLoading] = useState(false);
  const [showResults, setShowResults] = useState(false);
  const [stats, setStats] = useState(null);
  const [isDark, setIsDark] = useState(false);
  const [hoveredRow, setHoveredRow] = useState(-1);
  const codeRef = useRef(null);
  useEffect(() => {
    if (typeof window !== 'undefined') {
      const check = () => setIsDark(document.documentElement.classList.contains('dark'));
      check();
      const observer = new MutationObserver(check);
      observer.observe(document.documentElement, {
        attributes: true,
        attributeFilter: ['class']
      });
      return () => observer.disconnect();
    }
  }, []);
  useEffect(() => {
    if (codeRef.current) {
      const block = codeRef.current.querySelector('.code-block');
      if (block) {
        block.style.marginBottom = '0';
        block.style.marginTop = '0';
        block.style.borderBottomLeftRadius = '0';
        block.style.borderBottomRightRadius = '0';
      }
    }
  });
  const getSqlText = () => {
    if (!codeRef.current) return '';
    const code = codeRef.current.querySelector('code');
    return (code || codeRef.current).textContent.trim();
  };
  const executeQuery = async () => {
    const sql = getSqlText();
    if (!sql) return;
    setLoading(true);
    setError(null);
    setResults(null);
    setShowResults(true);
    try {
      const cleanQuery = sql.replace(/;$/, '').trim();
      const params = new URLSearchParams({
        query: cleanQuery,
        default_format: 'JSONCompact',
        result_overflow_mode: 'break',
        read_overflow_mode: 'break',
        allow_experimental_analyzer: '1'
      });
      const res = await fetch(`https://sql-clickhouse.clickhouse.com/?${params.toString()}`, {
        method: 'POST',
        headers: {
          'Authorization': `Basic ${btoa(`demo:`)}`
        }
      });
      const text = await res.text();
      if (!res.ok) {
        setError(text || `HTTP ${res.status}`);
        setLoading(false);
        return;
      }
      const json = JSON.parse(text);
      setResults(json);
      setStats(json.statistics || null);
    } catch (err) {
      setError(err.message || 'Query execution failed');
    }
    setLoading(false);
  };
  useEffect(() => {
    if (run) executeQuery();
  }, []);
  const formatRows = n => {
    if (n >= 1e9) return `${(n / 1e9).toFixed(1)}B`;
    if (n >= 1e6) return `${(n / 1e6).toFixed(1)}M`;
    if (n >= 1e3) return `${(n / 1e3).toFixed(1)}K`;
    return String(n);
  };
  const formatBytes = b => {
    if (b >= 1e9) return `${(b / 1e9).toFixed(2)} GB`;
    if (b >= 1e6) return `${(b / 1e6).toFixed(2)} MB`;
    if (b >= 1e3) return `${(b / 1e3).toFixed(2)} KB`;
    return `${b} B`;
  };
  const isNumericType = type => {
    return (/^(UInt|Int|Float|Decimal)/).test(type);
  };
  const isHyperlink = value => {
    return typeof value === 'string' && (/^https?:\/\//).test(value);
  };
  const computeColumnExtremes = (meta, data) => {
    const extremes = {};
    for (let i = 0; i < meta.length; i++) {
      if (isNumericType(meta[i].type)) {
        let min = Infinity, max = -Infinity;
        for (const row of data) {
          const v = Number(row[i]);
          if (!isNaN(v)) {
            if (v < min) min = v;
            if (v > max) max = v;
          }
        }
        if (max > -Infinity) {
          extremes[i] = {
            min,
            max
          };
        }
      }
    }
    return extremes;
  };
  const computeColumnWidths = (meta, data) => {
    const lengths = meta.map((col, i) => {
      const headerLen = col.name.length + col.type.length + 1;
      let maxData = 0;
      for (const row of data) {
        const v = row[i];
        const len = v === null ? 4 : String(v).length;
        if (len > maxData) maxData = len;
      }
      return Math.max(headerLen, maxData);
    });
    const total = lengths.reduce((s, l) => s + l, 0);
    return lengths.map(l => `${(l / total * 100).toFixed(1)}%`);
  };
  const copyResultsAsTSV = () => {
    if (!results || !results.meta || !results.data) return;
    const header = results.meta.map(col => col.name).join('\t');
    const rows = results.data.map(row => row.map(cell => cell === null ? 'NULL' : String(cell)).join('\t'));
    const tsv = [header, ...rows].join('\n');
    navigator.clipboard.writeText(tsv);
  };
  const borderColor = isDark ? 'rgba(255,255,255,0.15)' : '#e5e7eb';
  const bgColor = isDark ? 'rgba(255,255,255,0.05)' : '#f9fafb';
  const headerBg = isDark ? '#2a2a2a' : '#f3f4f6';
  const textColor = isDark ? '#e5e7eb' : '#1f2937';
  const mutedColor = isDark ? '#d1d5db' : '#6b7280';
  const accentColor = isDark ? '#FAFF69' : '#323232';
  const accentTextColor = isDark ? '#000' : '#fff';
  const barColor = isDark ? '#35372f' : '#d2d2d2';
  const cellBg = isDark ? '#1f201b' : '#ffffff';
  const cellBgHover = isDark ? 'lch(15.8 0 0)' : '#f0f0f0';
  const extremes = results && results.meta && results.data ? computeColumnExtremes(results.meta, results.data) : {};
  const colWidths = results && results.meta && results.data ? computeColumnWidths(results.meta, results.data) : [];
  const getCellBarStyle = (cell, ci, ri) => {
    if (cell === null) return null;
    const colMeta = results.meta[ci];
    if (!isNumericType(colMeta.type) || !extremes[ci] || results.data.length <= 1 || extremes[ci].max <= 0) return null;
    const ratio = 100 * Number(cell) / extremes[ci].max;
    const bg = ri === hoveredRow ? cellBgHover : cellBg;
    return {
      background: `linear-gradient(to right, ${barColor} 0%, ${barColor} ${ratio}%, ${bg} ${ratio}%, ${bg} 100%)`
    };
  };
  const renderCell = (cell, ci) => {
    if (cell === null) {
      return <span style={{
        color: mutedColor,
        fontStyle: 'italic'
      }}>NULL</span>;
    }
    const value = String(cell);
    if (isHyperlink(value)) {
      return <a href={value} target="_blank" rel="noopener noreferrer" style={{
        color: accentColor,
        textDecoration: 'underline',
        cursor: 'pointer'
      }}>
          {value}
        </a>;
    }
    return value;
  };
  return <div className="not-prose" style={{
    margin: '1rem 0',
    width: '100%',
    boxSizing: 'border-box',
    contain: 'inline-size'
  }}>

      {}
      <div>
        <div ref={codeRef}>
          {children}
        </div>

        {}
        <div style={{
    display: 'flex',
    justifyContent: 'space-between',
    alignItems: 'center',
    padding: '6px 12px',
    backgroundColor: headerBg,
    borderWidth: '0 1px 1px 1px',
    borderStyle: 'solid',
    borderColor: isDark ? 'rgba(255,255,255,0.1)' : 'rgba(11,11,11,0.1)',
    borderRadius: '0 0 4px 4px'
  }}>
          <div style={{
    display: 'flex',
    alignItems: 'center',
    gap: '12px'
  }}>
            {results && <button onClick={() => setShowResults(!showResults)} style={{
    background: 'none',
    border: 'none',
    cursor: 'pointer',
    color: mutedColor,
    fontSize: '12px',
    padding: '2px 4px'
  }}>
                {showResults ? '▼ Hide results' : '▶ Show results'}
              </button>}
            {showStats && stats && <span style={{
    fontSize: '11px',
    color: mutedColor,
    fontStyle: 'italic'
  }}>
                Read {formatRows(stats.rows_read)} rows, {formatBytes(stats.bytes_read)} in {stats.elapsed.toFixed(3)}s
              </span>}
          </div>
          <button onClick={() => executeQuery()} disabled={loading} style={{
    display: 'flex',
    alignItems: 'center',
    gap: '6px',
    padding: '4px 14px',
    borderRadius: '4px',
    border: 'none',
    cursor: loading ? 'wait' : 'pointer',
    backgroundColor: accentColor,
    color: accentTextColor,
    fontSize: '12px',
    fontWeight: 600
  }}>
            {loading ? <span>Running...</span> : <>
                <span style={{
    fontSize: '10px'
  }}>▶</span>
                <span>Run</span>
              </>}
          </button>
        </div>
      </div>

      {}
      {showResults && <div className="not-prose" style={{
    marginTop: '8px',
    maxHeight: '350px',
    overflow: 'auto',
    border: `1px solid ${borderColor}`,
    borderRadius: '4px'
  }}>
          <div>
          {loading && <div style={{
    padding: '24px',
    textAlign: 'center',
    color: mutedColor
  }}>
              Executing query...
            </div>}

          {error && <div style={{
    padding: '12px 16px',
    color: '#ef4444',
    backgroundColor: isDark ? 'rgba(239,68,68,0.1)' : '#fef2f2',
    fontSize: '13px',
    fontFamily: 'monospace',
    whiteSpace: 'pre-wrap'
  }}>
              {error}
            </div>}

          {results && results.meta && results.data && <div style={{
    display: 'grid',
    gridTemplateColumns: colWidths.join(' '),
    width: '100%',
    fontSize: '13px',
    fontFamily: 'ui-monospace, SFMono-Regular, "SF Mono", Menlo, Consolas, monospace'
  }}>
              {results.meta.map((col, i) => <div key={`h-${i}`} style={{
    position: 'sticky',
    top: 0,
    zIndex: 1,
    padding: '6px 12px',
    textAlign: isNumericType(col.type) && results.meta.length > 1 ? 'right' : 'left',
    backgroundColor: headerBg,
    borderBottom: `1px solid ${borderColor}`,
    color: textColor,
    fontWeight: 600,
    fontSize: '12px',
    whiteSpace: 'nowrap',
    overflow: 'hidden',
    textOverflow: 'ellipsis'
  }}>
                  {col.name}
                  <span style={{
    color: mutedColor,
    fontWeight: 400,
    marginLeft: '4px',
    fontSize: '10px'
  }}>
                    {col.type}
                  </span>
                </div>)}
              {results.data.map((row, ri) => row.map((cell, ci) => <div key={`${ri}-${ci}`} onMouseEnter={() => setHoveredRow(ri)} onMouseLeave={() => setHoveredRow(-1)} style={{
    padding: '4px 12px',
    color: textColor,
    whiteSpace: 'nowrap',
    overflow: 'hidden',
    textOverflow: 'ellipsis',
    textAlign: isNumericType(results.meta[ci].type) && results.meta.length > 1 ? 'right' : 'left',
    borderBottom: `1px solid ${borderColor}`,
    backgroundColor: ri === hoveredRow ? cellBgHover : ri % 2 === 0 ? 'transparent' : bgColor,
    transition: 'background-color 0.1s',
    ...getCellBarStyle(cell, ci, ri)
  }}>
                    {renderCell(cell, ci)}
                  </div>))}
            </div>}

          {results && results.data && <div style={{
    display: 'flex',
    justifyContent: 'space-between',
    alignItems: 'center',
    padding: '4px 12px',
    fontSize: '11px',
    color: mutedColor,
    borderTop: `1px solid ${borderColor}`,
    backgroundColor: headerBg
  }}>
              <span>
                {results.rows} row{results.rows !== 1 ? 's' : ''}
              </span>
              <button onClick={copyResultsAsTSV} style={{
    background: 'none',
    border: 'none',
    cursor: 'pointer',
    color: mutedColor,
    fontSize: '11px',
    padding: '2px 6px',
    borderRadius: '3px'
  }} onMouseEnter={e => e.target.style.color = textColor} onMouseLeave={e => e.target.style.color = mutedColor}>
                ⧉ Copy TSV
              </button>
            </div>}
          </div>
        </div>}
    </div>;
};

Este conjunto de datos contiene más de 150 millones de reseñas de clientes sobre productos de Amazon. Los datos están en archivos Parquet comprimidos con Snappy en AWS S3, con un tamaño total de 49 GB (comprimidos). Veamos los pasos para insertarlos en ClickHouse.

<Note>
  Las consultas siguientes se ejecutaron en una instancia de **producción** de ClickHouse Cloud. Para obtener más información, consulte
  ["Especificaciones de Playground"](/es/get-started/sample-datasets/playground#specifications).
</Note>

<div id="loading-the-dataset">
  ## Cargar el conjunto de datos
</div>

1. Sin insertar los datos en ClickHouse, podemos consultarlos directamente. Veamos algunas filas para ver qué aspecto tienen:

```sql theme={null}
SELECT *
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/amazon_reviews/amazon_reviews_2015.snappy.parquet')
LIMIT 3
```

Las filas tienen este aspecto:

```response theme={null}
Row 1:
──────
review_date:       16462
marketplace:       US
customer_id:       25444946 -- 25.44 million
review_id:         R146L9MMZYG0WA
product_id:        B00NV85102
product_parent:    908181913 -- 908.18 million
product_title:     XIKEZAN iPhone 6 Plus 5.5 inch Waterproof Case, Shockproof Dirtproof Snowproof Full Body Skin Case Protective Cover with Hand Strap & Headphone Adapter & Kickstand
product_category:  Wireless
star_rating:       4
helpful_votes:     0
total_votes:       0
vine:              false
verified_purchase: true
review_headline:   case is sturdy and protects as I want
review_body:       I won't count on the waterproof part (I took off the rubber seals at the bottom because the got on my nerves). But the case is sturdy and protects as I want.

Row 2:
──────
review_date:       16462
marketplace:       US
customer_id:       1974568 -- 1.97 million
review_id:         R2LXDXT293LG1T
product_id:        B00OTFZ23M
product_parent:    951208259 -- 951.21 million
product_title:     Season.C Chicago Bulls Marilyn Monroe No.1 Hard Back Case Cover for Samsung Galaxy S5 i9600
product_category:  Wireless
star_rating:       1
helpful_votes:     0
total_votes:       0
vine:              false
verified_purchase: true
review_headline:   One Star
review_body:       Cant use the case because its big for the phone. Waist of money!

Row 3:
──────
review_date:       16462
marketplace:       US
customer_id:       24803564 -- 24.80 million
review_id:         R7K9U5OEIRJWR
product_id:        B00LB8C4U4
product_parent:    524588109 -- 524.59 million
product_title:     iPhone 5s Case, BUDDIBOX [Shield] Slim Dual Layer Protective Case with Kickstand for Apple iPhone 5 and 5s
product_category:  Wireless
star_rating:       4
helpful_votes:     0
total_votes:       0
vine:              false
verified_purchase: true
review_headline:   but overall this case is pretty sturdy and provides good protection for the phone
review_body:       The front piece was a little difficult to secure to the phone at first, but overall this case is pretty sturdy and provides good protection for the phone, which is what I need. I would buy this case again.
```

2. Definamos una nueva tabla `MergeTree` llamada `amazon_reviews` para almacenar estos datos en ClickHouse:

```sql theme={null}
CREATE DATABASE amazon

CREATE TABLE amazon.amazon_reviews
(
    `review_date` Date,
    `marketplace` LowCardinality(String),
    `customer_id` UInt64,
    `review_id` String,
    `product_id` String,
    `product_parent` UInt64,
    `product_title` String,
    `product_category` LowCardinality(String),
    `star_rating` UInt8,
    `helpful_votes` UInt32,
    `total_votes` UInt32,
    `vine` Bool,
    `verified_purchase` Bool,
    `review_headline` String,
    `review_body` String,
    PROJECTION helpful_votes
    (
        SELECT *
        ORDER BY helpful_votes
    )
)
ENGINE = MergeTree
ORDER BY (review_date, product_category)
```

3. El siguiente comando `INSERT` usa la función de tabla `s3Cluster`, que permite procesar varios archivos de S3 en paralelo con todos los nodos de su clúster. También usamos un comodín para insertar cualquier archivo que empiece por `https://datasets-documentation.s3.eu-west-3.amazonaws.com/amazon_reviews/amazon_reviews_*.snappy.parquet`:

```sql theme={null}
INSERT INTO amazon.amazon_reviews SELECT *
FROM s3Cluster('default', 
'https://datasets-documentation.s3.eu-west-3.amazonaws.com/amazon_reviews/amazon_reviews_*.snappy.parquet')
```

<Tip>
  En ClickHouse Cloud, el nombre del clúster es `default`. Cambia `default` por el nombre de tu clúster... o usa la función de tabla `s3` (en lugar de `s3Cluster`) si no tienes un clúster.
</Tip>

5. Esa consulta no tarda mucho: promedia unas 300.000 filas por segundo. En unos 5 minutos deberías ver todas las filas insertadas:

<RunnableCode>
  ```sql theme={null}
  SELECT formatReadableQuantity(count())
  FROM amazon.amazon_reviews
  ```
</RunnableCode>

6. Veamos cuánto espacio ocupan nuestros datos:

<RunnableCode>
  ```sql theme={null}
  SELECT
      disk_name,
      formatReadableSize(sum(data_compressed_bytes) AS size) AS compressed,
      formatReadableSize(sum(data_uncompressed_bytes) AS usize) AS uncompressed,
      round(usize / size, 2) AS compr_rate,
      sum(rows) AS rows,
      count() AS part_count
  FROM system.parts
  WHERE (active = 1) AND (table = 'amazon_reviews')
  GROUP BY disk_name
  ORDER BY size DESC
  ```
</RunnableCode>

Los datos originales ocupaban unos 70G, pero en ClickHouse comprimidos ocupan unos 30G.

<div id="example-queries">
  ## Consultas de ejemplo
</div>

7. Ejecutemos algunas consultas. Estas son las 10 reseñas más útiles del conjunto de datos:

<RunnableCode>
  ```sql theme={null}
  SELECT
      product_title,
      review_headline
  FROM amazon.amazon_reviews
  ORDER BY helpful_votes DESC
  LIMIT 10
  ```
</RunnableCode>

<Note>
  Esta consulta usa una [proyección](/es/concepts/features/projections/projections) para mejorar el rendimiento.
</Note>

8. Estos son los 10 productos de Amazon con más reseñas:

<RunnableCode>
  ```sql theme={null}
  SELECT
      any(product_title),
      count()
  FROM amazon.amazon_reviews
  GROUP BY product_id
  ORDER BY 2 DESC
  LIMIT 10;
  ```
</RunnableCode>

9. Estas son las calificaciones promedio de las reseñas por mes para cada producto (¡una [pregunta real de entrevista de Amazon](https://datalemur.com/questions/sql-avg-review-ratings)!):

<RunnableCode>
  ```sql theme={null}
  SELECT
      toStartOfMonth(review_date) AS month,
      any(product_title),
      avg(star_rating) AS avg_stars
  FROM amazon.amazon_reviews
  GROUP BY
      month,
      product_id
  ORDER BY
      month DESC,
      product_id ASC
  LIMIT 20;
  ```
</RunnableCode>

10. Este es el número total de votos por categoría de producto. Esta consulta es rápida porque `product_category` está en la clave primaria:

<RunnableCode>
  ```sql theme={null}
  SELECT
      sum(total_votes),
      product_category
  FROM amazon.amazon_reviews
  GROUP BY product_category
  ORDER BY 1 DESC
  ```
</RunnableCode>

11. Busquemos los productos en cuyas reseñas aparece con mayor frecuencia la palabra **"awful"**. Esta es una tarea considerable: hay que analizar más de 151M cadenas en busca de una sola palabra:

```sql runnable settings={'enable_parallel_replicas':1} theme={null}
SELECT
    product_id,
    any(product_title),
    avg(star_rating),
    count() AS count
FROM amazon.amazon_reviews
WHERE position(review_body, 'awful') > 0
GROUP BY product_id
ORDER BY count DESC
LIMIT 50;
```

Observa el tiempo de ejecución de la consulta para un volumen de datos tan grande. ¡Además, los resultados son una lectura entretenida!

12. Podemos ejecutar la misma consulta otra vez, pero esta vez buscamos **awesome** en las reseñas:

```sql runnable settings={'enable_parallel_replicas':1} theme={null}
SELECT 
    product_id,
    any(product_title),
    avg(star_rating),
    count() AS count
FROM amazon.amazon_reviews
WHERE position(review_body, 'awesome') > 0
GROUP BY product_id
ORDER BY count DESC
LIMIT 50;
```
